Xmipp  v3.23.11-Nereus
Public Member Functions | Public Attributes | Friends | List of all members

#include <naive_bayes.h>

Public Member Functions

 LeafNode (const std::vector< MultidimArray< double > > &leafFeatures, int _discrete_levels=8)
 Constructor. More...
 
double assignProbability (double value, int k) const
 Assign probability to a value within the PDF of a given class. More...
 
double computeWeight () const
 Compute weight of this leaf as a classifier. More...
 

Public Attributes

int __discreteLevels
 
std::vector< IrregularHistogram1D__leafPDF
 
int K
 

Friends

std::ostream & operator<< (std::ostream &_out, const LeafNode &leaf)
 Show. More...
 

Detailed Description

Leaf node class.

This class is for a single feature. After receiving a set of features, it discretizes the features and builds the probability density function in an irregular histogram. There is an irregular histogram for each class.

Definition at line 45 of file naive_bayes.h.

Constructor & Destructor Documentation

◆ LeafNode()

LeafNode::LeafNode ( const std::vector< MultidimArray< double > > &  leafFeatures,
int  _discrete_levels = 8 
)

Constructor.

Definition at line 127 of file naive_bayes.cpp.

129 {
130  __discreteLevels = discrete_levels;
131  K = leafFeatures.size();
132  if (__discreteLevels==0)
133  {
134  // This is a dummy node for features that cannot classify
135  MultidimArray<int> newBins(1);
136  A1D_ELEM(newBins,0)=0;
137  Histogram1D hist;
138  hist.resize(1);
139  A1D_ELEM(hist,0)=1;
140  IrregularHistogram1D irregHist;
141  for (int k=0; k<K; k++)
142  {
143  irregHist.init(hist, newBins);
144  irregHist.selfNormalize();
145  __leafPDF.push_back(irregHist);
146  }
147  }
148  else
149  {
150  // Compute the minimum and maximum of each class
151  double minval=0.;
152  double maxval=0.;
153  for(int k=0; k<K; k++)
154  {
155  double minvalk=0.;
156  double maxvalk=0.;
157  leafFeatures[k].computeDoubleMinMax(minvalk, maxvalk);
158  if (k==0)
159  {
160  minval=minvalk;
161  maxval=maxvalk;
162  }
163  else
164  {
165  minval=std::min(minval,minvalk);
166  maxval=std::max(maxval,maxvalk);
167  }
168  }
169  if (minval==maxval)
170  {
172  return;
173  }
174 
175  // Compute the PDF of each class
176  std::vector<Histogram1D> hist(K);
177  for (int k=0; k<K; k++)
178  {
179  // There is variation of this feature for this class
180  compute_hist(leafFeatures[k], hist[k], minval, maxval, 100);
181  hist[k] += 1; // Apply Laplace correction
182  }
183 
184  // Split the histograms into discrete_level (power of 2) bins
185  std::queue< Matrix1D<int> > intervals;
186  std::queue< Matrix1D<int> > splittedIntervals;
187  Matrix1D<int> limits(2);
188  VECTOR_R2(limits,0,99);
189  intervals.push(limits);
190  int imax=ROUND(log2(__discreteLevels));
191  for (int i=0; i<imax; i++)
192  {
193  // Split all the intervals in the queue
194  while (!intervals.empty())
195  {
196  Matrix1D<int> currentInterval = intervals.front();
197  intervals.pop();
198  int lsplit = splitHistogramsUsingEntropy(hist,
199  currentInterval(0), currentInterval(1));
200  VECTOR_R2(limits,currentInterval(0),lsplit);
201  splittedIntervals.push(limits);
202  VECTOR_R2(limits,lsplit+1, currentInterval(1));
203  splittedIntervals.push(limits);
204  }
205 
206  // Copy the splitted intervals to the interval list
207  while (!splittedIntervals.empty())
208  {
209  intervals.push(splittedIntervals.front());
210  splittedIntervals.pop();
211  }
212  }
213 
214  // Compute the bins of the split
216  imax=intervals.size();
217  for (int i=0; i<imax; i++)
218  {
219  if (i<__discreteLevels)
220  A1D_ELEM(newBins,i) = intervals.front()(1);
221  intervals.pop();
222  }
223 
224  // Compute now the irregular histograms
225  IrregularHistogram1D irregHist;
226  for (int k=0; k<K; k++)
227  {
228  irregHist.init(hist[k], newBins);
229  irregHist.selfNormalize();
230  __leafPDF.push_back(irregHist);
231  }
232  }
233 }
#define VECTOR_R2(v, x, y)
Definition: matrix1d.h:112
double log2(double x, ae_state *_state)
void min(Image< double > &op1, const Image< double > &op2)
void resize(size_t Ndim, size_t Zdim, size_t Ydim, size_t Xdim, bool copy=true)
int __discreteLevels
Definition: naive_bayes.h:49
void init(const Histogram1D &oldHistogram, const MultidimArray< int > &bins)
Initialize class.
Definition: histogram.cpp:393
#define A1D_ELEM(v, i)
int splitHistogramsUsingEntropy(const std::vector< Histogram1D > &hist, size_t l0, size_t lF)
Definition: naive_bayes.cpp:43
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
void compute_hist(const MultidimArrayGeneric &array, Histogram1D &hist, int no_steps)
Definition: histogram.cpp:572
void max(Image< double > &op1, const Image< double > &op2)
#define ROUND(x)
Definition: xmipp_macros.h:210
void selfNormalize()
Normalize to be a probability density function.
Definition: histogram.cpp:451
std::vector< IrregularHistogram1D > __leafPDF
Definition: naive_bayes.h:52

Member Function Documentation

◆ assignProbability()

double LeafNode::assignProbability ( double  value,
int  k 
) const

Assign probability to a value within the PDF of a given class.

Definition at line 244 of file naive_bayes.cpp.

245 {
246  const IrregularHistogram1D& hist=__leafPDF[k];
247  int index = hist.val2Index(value);
248  return DIRECT_A1D_ELEM(hist.__hist,index);
249 }
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
int val2Index(double value) const
Return the index corresponding to a certain value.
Definition: histogram.cpp:411
#define DIRECT_A1D_ELEM(v, i)
viol index
std::vector< IrregularHistogram1D > __leafPDF
Definition: naive_bayes.h:52
Histogram1D __hist
Definition: histogram.h:389

◆ computeWeight()

double LeafNode::computeWeight ( ) const

Compute weight of this leaf as a classifier.

Definition at line 252 of file naive_bayes.cpp.

253 {
254  double retval=0;
255  for (int k1=0; k1<K; k1++)
256  for (int k2=0; k2<K; k2++)
257  {
258  if (k1==k2)
259  continue;
260  retval+=KLDistance(
261  __leafPDF[k1].getHistogram(),
262  __leafPDF[k2].getHistogram());
263  }
264  return (retval/(K*K-K));
265 }
double KLDistance(const Histogram1D &h1, const Histogram1D &h2)
Definition: histogram.cpp:377
std::vector< IrregularHistogram1D > __leafPDF
Definition: naive_bayes.h:52

Friends And Related Function Documentation

◆ operator<<

std::ostream& operator<< ( std::ostream &  _out,
const LeafNode leaf 
)
friend

Show.

Definition at line 268 of file naive_bayes.cpp.

269 {
270  for (int k=0; k<leaf.K; k++)
271  _out << "Histogram of class " << k << "=\n"
272  << leaf.__leafPDF[k] << std::endl;
273  _out << "Classification power=" << leaf.computeWeight() << std::endl;
274  return _out;
275 }
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
double computeWeight() const
Compute weight of this leaf as a classifier.
std::vector< IrregularHistogram1D > __leafPDF
Definition: naive_bayes.h:52

Member Data Documentation

◆ __discreteLevels

int LeafNode::__discreteLevels

Definition at line 49 of file naive_bayes.h.

◆ __leafPDF

std::vector< IrregularHistogram1D > LeafNode::__leafPDF

Definition at line 52 of file naive_bayes.h.

◆ K

int LeafNode::K

Definition at line 55 of file naive_bayes.h.


The documentation for this class was generated from the following files: