#include <naive_bayes.h>

Collaboration diagram for NaiveBayes:

Public Member Functions
	NaiveBayes (const std::vector< MultidimArray< double > > &features, const Matrix1D< double > &priorProbs, int discreteLevels)
	Constructor. More...

void	setCostMatrix (const Matrix2D< double > &cost)
	Set cost matrix. More...

int	doInference (const MultidimArray< double > &newFeatures, double &cost, Matrix1D< double > &classesProbs, Matrix1D< double > &allCosts)

Public Attributes
int	K
	Number of classes. More...

int	Nfeatures
	Number of features. More...

Matrix1D< double >	__priorProbsLog10
	Prior probabilities of the classes. More...

MultidimArray< double >	__weights
	Weight of each feature. More...

std::vector< LeafNode >	__leafs
	The vector containing the Leafs (as many leafs as features) More...

Matrix2D< double >	__cost

Friends
std::ostream &	operator<< (std::ostream &_out, const NaiveBayes &naive)
	Show. More...

Detailed Description

Naive Bayes classifier class.

Definition at line 80 of file naive_bayes.h.

Constructor & Destructor Documentation

◆ NaiveBayes()

NaiveBayes::NaiveBayes	(	const std::vector< MultidimArray< double > > &	features,
		const Matrix1D< double > &	priorProbs,
		int	discreteLevels
	)

Constructor.

Definition at line 280 of file naive_bayes.cpp.

 {
     K = features.size();
     Nfeatures=XSIZE(features[0]);
     __priorProbsLog10.initZeros(K);
     FOR_ALL_ELEMENTS_IN_MATRIX1D(__priorProbsLog10)
     VEC_ELEM(__priorProbsLog10,i)=log10(VEC_ELEM(priorProbs,i));
 
     // Create a dummy leaf for features that cannot classify
     std::vector < MultidimArray<double> > aux(K);
     auto dummyLeaf = LeafNode(aux,0);
 
     // Build a leafnode for each feature and assign a weight
     __weights.initZeros(Nfeatures);
     for (int f=0; f<Nfeatures; f++)
     {
         for (int k=0; k<K; k++)
             features[k].getCol(f, aux[k]);
         auto leaf = LeafNode(aux,discreteLevels);
         if (leaf.__discreteLevels>0)
         {
             __leafs.push_back(leaf);
             DIRECT_A1D_ELEM(__weights,f)=__leafs[f].computeWeight();
         }
         else
         {
             __leafs.push_back(dummyLeaf);
             DIRECT_A1D_ELEM(__weights,f)=0;
         }
 #ifdef DEBUG_WEIGHTS
 
         if(debugging == true)
         {
             std::cout << "Node " << f << std::endl;
             std::cout << *(__leafs[f]) << std::endl;
             //char c;
             //std::cin >> c;
         }
 #endif
 
     }
     double norm=__weights.computeMax();
     if (norm>0)
         __weights *= 1.0/norm;
 
     // Set default cost matrix
     __cost.resizeNoCopy(K,K);
     __cost.initConstant(1);
     for (int i=0; i<K; i++)
         MAT_ELEM(__cost,i,i)=0;
 }

Member Function Documentation

◆ doInference()

int NaiveBayes::doInference	(	const MultidimArray< double > &	newFeatures,
		double &	cost,
		Matrix1D< double > &	classesProbs,
		Matrix1D< double > &	allCosts
	)

Returns the class with the largest probability given a set of features. classesProbs and allCosts are auxiliary vectors to avoid continuous allocating of memory.

Definition at line 345 of file naive_bayes.cpp.

 {
     classesProbs=__priorProbsLog10;
     for(int f=0; f<Nfeatures; f++)
     {
         const LeafNode &leaf_f = __leafs[f];
         double newFeatures_f=DIRECT_A1D_ELEM(newFeatures,f);
         for (int k=0; k<K; k++)
         {
             double p = leaf_f.assignProbability(newFeatures_f, k);
 
             if (fabs(p) < 1e-2)
                 VEC_ELEM(classesProbs,k) += -2*DIRECT_A1D_ELEM(__weights,f);
             else
                 VEC_ELEM(classesProbs,k) += DIRECT_A1D_ELEM(__weights,f)*std::log10(p);
 
 #ifdef DEBUG_FINE_CLASSIFICATION
 
             if(debugging == true)
             {
                 std::cout << "Feature " << f
                 << " Probability for class " << k << " = "
                 << classesProbs(k) << " increase= " << p
                 << std::endl;
                 char c;
                 // COSS                    std::cin >> c;
                 //                    if (c=='q') debugging = false;
             }
 #endif
 
         }
     }
 
     classesProbs-=classesProbs.computeMax();
     //    std::cout << "classesProbs " << classesProbs.transpose() << std::endl;
 
     for (int k=0; k<K; k++)
         VEC_ELEM(classesProbs,k)=pow(10.0,VEC_ELEM(classesProbs,k));
     classesProbs*=1.0/classesProbs.sum();
     //    std::cout << "classesProbs norm " << classesProbs.transpose() << std::endl;
 
     allCosts=__cost*classesProbs;
     //    std::cout << "allCosts " << allCosts.transpose() << std::endl;
 
     int bestk=0;
     cost=VEC_ELEM(allCosts,0)=std::log10(VEC_ELEM(allCosts,0));
     for (int k=1; k<K; k++)
     {
         VEC_ELEM(allCosts,k)=std::log10(VEC_ELEM(allCosts,k));
         if (VEC_ELEM(allCosts,k)<cost)
         {
             cost=VEC_ELEM(allCosts,k);
             bestk=k;
         }
     }
 
 #ifdef DEBUG_CLASSIFICATION
     if(debugging == true)
     {
         for (int k=0; k<K; k++)
             classesProbs(k)=log10(classesProbs(k));
         std::cout << "Class probababilities=" << classesProbs.transpose()
         << "\n  costs=" << allCosts.transpose()
         << "  best class=" << bestk << " cost=" << cost << std::endl;
         char c;
         // COSS std::cin >> c;
         // if (c=='q') debugging = false;
     }
 #endif
     return bestk;
 }

◆ setCostMatrix()

void NaiveBayes::setCostMatrix ( const Matrix2D< double > & cost )

Set cost matrix.

Definition at line 336 of file naive_bayes.cpp.

 {
     auto iK=(size_t) K;
     if (MAT_XSIZE(cost)!=iK || MAT_YSIZE(cost)!=iK)
         REPORT_ERROR(ERR_MULTIDIM_SIZE,"Cost matrix does not have the appropriate size");
     __cost=cost;
 }

Friends And Related Function Documentation

◆ operator<<

std::ostream& operator<<	(	std::ostream &	_out,
		const NaiveBayes &	naive
	)

friend

Show.

Definition at line 419 of file naive_bayes.cpp.

 {
     for (int f=0; f<naive.Nfeatures; f++)
     {
         _out << "Node " << f << std::endl;
         _out << naive.__leafs[f] << std::endl;
     }
     return _out;
 }

Member Data Documentation

◆ __cost

Matrix2D<double> NaiveBayes::__cost

Cost matrix C(i,j) is the cost of predicting class j when the true class is class i.

Definition at line 101 of file naive_bayes.h.

◆ __leafs

std::vector<LeafNode> NaiveBayes::__leafs

The vector containing the Leafs (as many leafs as features)

Definition at line 96 of file naive_bayes.h.

◆ __priorProbsLog10

Matrix1D<double> NaiveBayes::__priorProbsLog10

Prior probabilities of the classes.

Definition at line 90 of file naive_bayes.h.

◆ __weights

MultidimArray<double> NaiveBayes::__weights

Weight of each feature.

Definition at line 93 of file naive_bayes.h.

◆ K

int NaiveBayes::K

Number of classes.

Definition at line 84 of file naive_bayes.h.

◆ Nfeatures

int NaiveBayes::Nfeatures

Number of features.

Definition at line 87 of file naive_bayes.h.

The documentation for this class was generated from the following files:

xmipp/libraries/classification/naive_bayes.h
xmipp/libraries/classification/naive_bayes.cpp

Public Member Functions

Public Attributes

Friends