Xmipp  v3.23.11-Nereus
knn_classifier.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Vahid Abrishami (vabrishami@cnb.csic.es)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20  * 02111-1307 USA
21  *
22  * All comments concerning this program package may be sent to the
23  * e-mail address 'xmipp@cnb.csic.es'
24  ***************************************************************************/
25 
26 #include "knn_classifier.h"
27 
28 KNN::KNN(int k)
29 {
30  setK(k);
31  neighborsIndex.resize(1,1,1,K);
32 }
33 
35  MultidimArray<double> &labelset)
36 {
37  __dataset=dataset;
38  __dataLabel=dataLabel;
39  __labelSet=labelset;
40 }
41 
43 {
44  double maximum;
45  double distance;
46  int maximumIndex;
47  maxDist.resize(1,1,1,K);
49  {
50  DIRECT_A1D_ELEM(maxDist,i)=euclideanDistance(sample,i,-1.0);
51  DIRECT_A1D_ELEM(neighborsIndex,i)=i;
52  }
53  maximumIndex=findMaxIndex(maxDist);
54  maximum=DIRECT_A1D_ELEM(maxDist,maximumIndex);
55  for (size_t i=K;i<YSIZE(__dataset);i++)
56  {
57  distance=euclideanDistance(sample,i,maximum);
58  if (distance==-1)
59  continue;
60  if (distance<maximum)
61  {
62  DIRECT_A1D_ELEM(maxDist,maximumIndex)=distance;
63  DIRECT_A1D_ELEM(neighborsIndex,maximumIndex)=i;
64  maximumIndex=findMaxIndex(maxDist);
65  maximum=DIRECT_A1D_ELEM(maxDist,maximumIndex);
66  }
67  }
68 }
69 
70 int KNN::predict(MultidimArray<double> &sample,double &score)
71 {
72  MultidimArray<double> voteArray;
73  int index;
74  voteArray.initZeros(XSIZE(__labelSet));
75  KNearestNeighbors(sample);
77  {
78  index=DIRECT_A1D_ELEM(neighborsIndex,i);
79  for (size_t j=0;j<XSIZE(__labelSet);++j)
80  if (DIRECT_A1D_ELEM(__labelSet,j)==DIRECT_A1D_ELEM(__dataLabel,index))
81  DIRECT_A1D_ELEM(voteArray,j)+=1;
82  }
83  index=findMaxIndex(voteArray);
84  score=DIRECT_A1D_ELEM(voteArray,index)/double(K);
85  if (DIRECT_A1D_ELEM(voteArray,index)>(K*0.5))
86  return (int)DIRECT_A1D_ELEM(__labelSet,index);
87  index=findMinIndex(maxDist);
88  return (int)DIRECT_A1D_ELEM(__dataLabel,DIRECT_A1D_ELEM(neighborsIndex,index));
89 }
90 
91 int KNN::findMaxIndex(MultidimArray<double> &inputArray)
92 {
93  double maximum;
94  int maximumIndex;
95  maximum=DIRECT_A1D_ELEM(inputArray,0);
96  maximumIndex=0;
97  for (size_t i=1;i<XSIZE(inputArray);i++)
98  if (maximum<DIRECT_A1D_ELEM(inputArray,i))
99  {
100  maximum=DIRECT_A1D_ELEM(inputArray,i);
101  maximumIndex=i;
102  }
103  return maximumIndex;
104 }
105 
106 int KNN::findMinIndex(MultidimArray<double> &inputArray)
107 {
108  double minimum;
109  int minimumIndex;
110  minimum=DIRECT_A1D_ELEM(inputArray,0);
111  minimumIndex=0;
112  for (size_t i=1;i<XSIZE(inputArray);i++)
113  if (minimum>DIRECT_A1D_ELEM(inputArray,i))
114  {
115  minimum=DIRECT_A1D_ELEM(inputArray,i);
116  minimumIndex=i;
117  }
118  return minimumIndex;
119 }
120 
121 double KNN::euclideanDistance(MultidimArray<double> &sample,int index,double maximumDist)
122 {
123  double dist=0;
125  {
126  double tmp=DIRECT_A1D_ELEM(sample,i)-DIRECT_A2D_ELEM(__dataset,index,i);
127  dist+=tmp*tmp;
128  if (maximumDist>0 && sqrt(dist)>maximumDist)
129  return -1.0;
130  }
131  return sqrt(dist);
132 }
133 
134 #ifdef UNUSED // detected as unused 29.6.2018
135 double KNN::cityBlockDistance(MultidimArray<double> &sample,int index,double maximumDist)
136 {
137  double dist=0;
139  {
140  double tmp=fabs(DIRECT_A1D_ELEM(sample,i)-DIRECT_A2D_ELEM(__dataset,index,i));
141  dist+=tmp;
142  if (maximumDist>0 && dist>maximumDist)
143  return -1.0;
144  }
145  return dist;
146 }
147 
148 void KNN::saveModel(const FileName &fn)
149 {
150  std::ofstream fh;
151  fh.open(fn.c_str());
152  fh<<XSIZE(__labelSet)<<std::endl;
154  fh<<DIRECT_A1D_ELEM(__labelSet,i)<<" ";
155  fh<<std::endl;
156  fh<<YSIZE(__dataset)<<" "<<XSIZE(__dataset)<<std::endl;
157  for (size_t i=0;i<YSIZE(__dataset);++i)
158  {
159  fh<<DIRECT_A1D_ELEM(__dataLabel,i)<<" ";
160  for (size_t j=0;j<XSIZE(__dataset);++j)
161  {
162  fh<<DIRECT_A2D_ELEM(__dataset,i,j)<<" ";
163  }
164  fh<<std::endl;
165  }
166  fh.close();
167 }
168 
169 void KNN::loadModel(const FileName &fn)
170 {
171  std::ifstream fh;
172  fh.open(fn.c_str());
173  int x,y;
174  fh>>x;
175  __labelSet.resize(1,1,1,x);
177  fh>>DIRECT_A1D_ELEM(__labelSet,i);
178  fh>>y>>x;
179  __dataset.resize(1,1,y,x);
180  __dataLabel.resize(1,1,1,y);
181  for (size_t i=0;i<YSIZE(__dataset);++i)
182  {
183  fh>>DIRECT_A1D_ELEM(__dataLabel,i);
184  for (size_t j=0;j<XSIZE(__dataset);++j)
185  {
186  fh>>DIRECT_A2D_ELEM(__dataset,i,j);
187  }
188  }
189  fh.close();
190 }
191 #endif
192 
193 void KNN::setK(int k)
194 {
195  K=k;
196 }
197 
198 
#define YSIZE(v)
void resize(size_t Ndim, size_t Zdim, size_t Ydim, size_t Xdim, bool copy=true)
void sqrt(Image< double > &op)
static double * y
#define DIRECT_A2D_ELEM(v, i, j)
void train(MultidimArray< double > &dataset, MultidimArray< double > &dataLabel, MultidimArray< double > &labelset)
#define FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(v)
doublereal * x
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
#define DIRECT_A1D_ELEM(v, i)
viol index
void setK(int k)
Method for setting the K.
#define XSIZE(v)
void KNearestNeighbors(MultidimArray< double > &sample)
Compute the K nearest neighbors to the sample.
#define j
TYPE distance(struct Point_T *p, struct Point_T *q)
Definition: point.cpp:28
KNN(int k)
void initZeros(const MultidimArray< T1 > &op)
int predict(MultidimArray< double > &sample, double &score)