Xmipp  v3.23.11-Nereus
Public Member Functions | Public Attributes | Friends | List of all members

#include <naive_bayes.h>

Collaboration diagram for NaiveBayes:
Collaboration graph
[legend]

Public Member Functions

 NaiveBayes (const std::vector< MultidimArray< double > > &features, const Matrix1D< double > &priorProbs, int discreteLevels)
 Constructor. More...
 
void setCostMatrix (const Matrix2D< double > &cost)
 Set cost matrix. More...
 
int doInference (const MultidimArray< double > &newFeatures, double &cost, Matrix1D< double > &classesProbs, Matrix1D< double > &allCosts)
 

Public Attributes

int K
 Number of classes. More...
 
int Nfeatures
 Number of features. More...
 
Matrix1D< double > __priorProbsLog10
 Prior probabilities of the classes. More...
 
MultidimArray< double > __weights
 Weight of each feature. More...
 
std::vector< LeafNode__leafs
 The vector containing the Leafs (as many leafs as features) More...
 
Matrix2D< double > __cost
 

Friends

std::ostream & operator<< (std::ostream &_out, const NaiveBayes &naive)
 Show. More...
 

Detailed Description

Naive Bayes classifier class.

Definition at line 80 of file naive_bayes.h.

Constructor & Destructor Documentation

◆ NaiveBayes()

NaiveBayes::NaiveBayes ( const std::vector< MultidimArray< double > > &  features,
const Matrix1D< double > &  priorProbs,
int  discreteLevels 
)

Constructor.

Definition at line 280 of file naive_bayes.cpp.

284 {
285  K = features.size();
286  Nfeatures=XSIZE(features[0]);
289  VEC_ELEM(__priorProbsLog10,i)=log10(VEC_ELEM(priorProbs,i));
290 
291  // Create a dummy leaf for features that cannot classify
292  std::vector < MultidimArray<double> > aux(K);
293  auto dummyLeaf = LeafNode(aux,0);
294 
295  // Build a leafnode for each feature and assign a weight
297  for (int f=0; f<Nfeatures; f++)
298  {
299  for (int k=0; k<K; k++)
300  features[k].getCol(f, aux[k]);
301  auto leaf = LeafNode(aux,discreteLevels);
302  if (leaf.__discreteLevels>0)
303  {
304  __leafs.push_back(leaf);
305  DIRECT_A1D_ELEM(__weights,f)=__leafs[f].computeWeight();
306  }
307  else
308  {
309  __leafs.push_back(dummyLeaf);
311  }
312 #ifdef DEBUG_WEIGHTS
313 
314  if(debugging == true)
315  {
316  std::cout << "Node " << f << std::endl;
317  std::cout << *(__leafs[f]) << std::endl;
318  //char c;
319  //std::cin >> c;
320  }
321 #endif
322 
323  }
324  double norm=__weights.computeMax();
325  if (norm>0)
326  __weights *= 1.0/norm;
327 
328  // Set default cost matrix
329  __cost.resizeNoCopy(K,K);
330  __cost.initConstant(1);
331  for (int i=0; i<K; i++)
332  MAT_ELEM(__cost,i,i)=0;
333 }
#define VEC_ELEM(v, i)
Definition: matrix1d.h:245
Matrix2D< double > __cost
Definition: naive_bayes.h:101
void initConstant(T val)
Definition: matrix2d.h:602
int Nfeatures
Number of features.
Definition: naive_bayes.h:87
T norm(const std::vector< T > &v)
Definition: vector_ops.h:399
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
void resizeNoCopy(int Ydim, int Xdim)
Definition: matrix2d.h:534
#define MAT_ELEM(m, i, j)
Definition: matrix2d.h:116
#define FOR_ALL_ELEMENTS_IN_MATRIX1D(v)
Definition: matrix1d.h:72
#define DIRECT_A1D_ELEM(v, i)
bool debugging
Definition: naive_bayes.cpp:31
double * f
int K
Number of classes.
Definition: naive_bayes.h:84
#define XSIZE(v)
void log10(Image< double > &op)
void initZeros()
Definition: matrix1d.h:592
void initZeros(const MultidimArray< T1 > &op)
std::vector< LeafNode > __leafs
The vector containing the Leafs (as many leafs as features)
Definition: naive_bayes.h:96
MultidimArray< double > __weights
Weight of each feature.
Definition: naive_bayes.h:93
Matrix1D< double > __priorProbsLog10
Prior probabilities of the classes.
Definition: naive_bayes.h:90
T computeMax() const

Member Function Documentation

◆ doInference()

int NaiveBayes::doInference ( const MultidimArray< double > &  newFeatures,
double &  cost,
Matrix1D< double > &  classesProbs,
Matrix1D< double > &  allCosts 
)

Returns the class with the largest probability given a set of features. classesProbs and allCosts are auxiliary vectors to avoid continuous allocating of memory.

Definition at line 345 of file naive_bayes.cpp.

347 {
348  classesProbs=__priorProbsLog10;
349  for(int f=0; f<Nfeatures; f++)
350  {
351  const LeafNode &leaf_f = __leafs[f];
352  double newFeatures_f=DIRECT_A1D_ELEM(newFeatures,f);
353  for (int k=0; k<K; k++)
354  {
355  double p = leaf_f.assignProbability(newFeatures_f, k);
356 
357  if (fabs(p) < 1e-2)
358  VEC_ELEM(classesProbs,k) += -2*DIRECT_A1D_ELEM(__weights,f);
359  else
360  VEC_ELEM(classesProbs,k) += DIRECT_A1D_ELEM(__weights,f)*std::log10(p);
361 
362 #ifdef DEBUG_FINE_CLASSIFICATION
363 
364  if(debugging == true)
365  {
366  std::cout << "Feature " << f
367  << " Probability for class " << k << " = "
368  << classesProbs(k) << " increase= " << p
369  << std::endl;
370  char c;
371  // COSS std::cin >> c;
372  // if (c=='q') debugging = false;
373  }
374 #endif
375 
376  }
377  }
378 
379  classesProbs-=classesProbs.computeMax();
380  // std::cout << "classesProbs " << classesProbs.transpose() << std::endl;
381 
382  for (int k=0; k<K; k++)
383  VEC_ELEM(classesProbs,k)=pow(10.0,VEC_ELEM(classesProbs,k));
384  classesProbs*=1.0/classesProbs.sum();
385  // std::cout << "classesProbs norm " << classesProbs.transpose() << std::endl;
386 
387  allCosts=__cost*classesProbs;
388  // std::cout << "allCosts " << allCosts.transpose() << std::endl;
389 
390  int bestk=0;
391  cost=VEC_ELEM(allCosts,0)=std::log10(VEC_ELEM(allCosts,0));
392  for (int k=1; k<K; k++)
393  {
394  VEC_ELEM(allCosts,k)=std::log10(VEC_ELEM(allCosts,k));
395  if (VEC_ELEM(allCosts,k)<cost)
396  {
397  cost=VEC_ELEM(allCosts,k);
398  bestk=k;
399  }
400  }
401 
402 #ifdef DEBUG_CLASSIFICATION
403  if(debugging == true)
404  {
405  for (int k=0; k<K; k++)
406  classesProbs(k)=log10(classesProbs(k));
407  std::cout << "Class probababilities=" << classesProbs.transpose()
408  << "\n costs=" << allCosts.transpose()
409  << " best class=" << bestk << " cost=" << cost << std::endl;
410  char c;
411  // COSS std::cin >> c;
412  // if (c=='q') debugging = false;
413  }
414 #endif
415  return bestk;
416 }
#define VEC_ELEM(v, i)
Definition: matrix1d.h:245
Matrix2D< double > __cost
Definition: naive_bayes.h:101
doublereal * c
int Nfeatures
Number of features.
Definition: naive_bayes.h:87
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
Matrix1D< T > transpose() const
Definition: matrix1d.cpp:644
#define DIRECT_A1D_ELEM(v, i)
bool debugging
Definition: naive_bayes.cpp:31
T computeMax() const
Definition: matrix1d.cpp:558
double * f
int K
Number of classes.
Definition: naive_bayes.h:84
double sum(bool average=false) const
Definition: matrix1d.cpp:652
void log10(Image< double > &op)
double assignProbability(double value, int k) const
Assign probability to a value within the PDF of a given class.
double cost
Cost, scaled between 0 and 1.
Definition: micrograph.h:66
std::vector< LeafNode > __leafs
The vector containing the Leafs (as many leafs as features)
Definition: naive_bayes.h:96
MultidimArray< double > __weights
Weight of each feature.
Definition: naive_bayes.h:93
Matrix1D< double > __priorProbsLog10
Prior probabilities of the classes.
Definition: naive_bayes.h:90

◆ setCostMatrix()

void NaiveBayes::setCostMatrix ( const Matrix2D< double > &  cost)

Set cost matrix.

Definition at line 336 of file naive_bayes.cpp.

337 {
338  auto iK=(size_t) K;
339  if (MAT_XSIZE(cost)!=iK || MAT_YSIZE(cost)!=iK)
340  REPORT_ERROR(ERR_MULTIDIM_SIZE,"Cost matrix does not have the appropriate size");
341  __cost=cost;
342 }
#define MAT_YSIZE(m)
Definition: matrix2d.h:124
Matrix2D< double > __cost
Definition: naive_bayes.h:101
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
Incorrect MultidimArray size.
Definition: xmipp_error.h:174
int K
Number of classes.
Definition: naive_bayes.h:84
#define MAT_XSIZE(m)
Definition: matrix2d.h:120
double cost
Cost, scaled between 0 and 1.
Definition: micrograph.h:66

Friends And Related Function Documentation

◆ operator<<

std::ostream& operator<< ( std::ostream &  _out,
const NaiveBayes naive 
)
friend

Show.

Definition at line 419 of file naive_bayes.cpp.

420 {
421  for (int f=0; f<naive.Nfeatures; f++)
422  {
423  _out << "Node " << f << std::endl;
424  _out << naive.__leafs[f] << std::endl;
425  }
426  return _out;
427 }
int Nfeatures
Number of features.
Definition: naive_bayes.h:87
double * f
std::vector< LeafNode > __leafs
The vector containing the Leafs (as many leafs as features)
Definition: naive_bayes.h:96

Member Data Documentation

◆ __cost

Matrix2D<double> NaiveBayes::__cost

Cost matrix C(i,j) is the cost of predicting class j when the true class is class i.

Definition at line 101 of file naive_bayes.h.

◆ __leafs

std::vector<LeafNode> NaiveBayes::__leafs

The vector containing the Leafs (as many leafs as features)

Definition at line 96 of file naive_bayes.h.

◆ __priorProbsLog10

Matrix1D<double> NaiveBayes::__priorProbsLog10

Prior probabilities of the classes.

Definition at line 90 of file naive_bayes.h.

◆ __weights

MultidimArray<double> NaiveBayes::__weights

Weight of each feature.

Definition at line 93 of file naive_bayes.h.

◆ K

int NaiveBayes::K

Number of classes.

Definition at line 84 of file naive_bayes.h.

◆ Nfeatures

int NaiveBayes::Nfeatures

Number of features.

Definition at line 87 of file naive_bayes.h.


The documentation for this class was generated from the following files: