52 <<
"Input selfile: " <<
fnSel << std::endl
53 <<
"Output selfile: " <<
fnOut << std::endl
54 <<
"Number of clusters: " <<
K << std::endl
55 <<
"Filename with clusters: " <<
fnClusters << std::endl
56 <<
"Filename with points: " <<
fnPoints << std::endl
57 <<
"Threshold for number of particles: " <<
maxObjects << std::endl
65 addParamsLine(
" -i <selfile> : Selfile containing images to be clustered");
66 addParamsLine(
" [-o <image=\"output.xmd\">] : Output selfile");
68 addParamsLine(
" [-c <image=\"clusters.xmd\">] : Filename with clusters");
69 addParamsLine(
" [-p <image=\"points.xmd\">] : Filename with points");
70 addParamsLine(
" [-m <int=\"-1\">] : Threshold for number of particles after which the position of clusters will be fixed");
79 std::vector<double> values;
83 KPoint(
int id_point, std::vector<double>& values)
85 this->id_point = id_point;
86 total_values = values.size();
88 for (
int i = 0;
i < total_values;
i++)
89 this->values.push_back(values[
i]);
101 this->id_cluster = id_cluster;
111 return values[
index];
121 values.push_back(value);
130 std::vector<double> central_values;
131 std::vector<KPoint> points;
136 this->id_cluster = id_cluster;
140 for (
int i = 0;
i < total_values;
i++)
141 central_values.push_back(point.
getValue(
i));
143 points.push_back(point);
148 points.push_back(point);
153 int total_points = points.size();
155 for (
int i = 0;
i < total_points;
i++)
157 if(points[
i].getID() == id_point)
159 points.erase(points.begin() +
i);
168 return central_values[
index];
173 central_values[
index] = value;
178 return points[
index];
183 return points.size();
197 int total_values, total_points, maxIterations,
maxObjects;
198 std::vector<Cluster> clusters;
203 double sum = 0.0, min_dist;
204 int id_cluster_center = 0;
206 for (
int i = 0;
i < total_values;
i++)
207 sum += ((clusters[0].getCentralValue(
i) - point.
getValue(
i)) *
208 (clusters[0].getCentralValue(
i) - point.
getValue(
i)));
210 min_dist =
sqrt(sum);
212 for (
int i = 1;
i <
K;
i++)
217 for (
int j = 0;
j < total_values;
j++)
218 sum += ((clusters[
i].getCentralValue(
j) - point.
getValue(
j)) *
219 (clusters[
i].getCentralValue(
j) - point.
getValue(
j)));
226 id_cluster_center =
i;
230 return id_cluster_center;
234 KMeans(
int K,
int total_points,
int total_values,
int maxIterations,
238 this->total_points = total_points;
239 this->total_values = total_values;
240 this->maxIterations = maxIterations;
244 std::vector<Cluster>
run(std::vector<KPoint> & points,
248 std::vector<int> prohibited_indexes;
249 std::random_device rd;
250 std::mt19937 gen(rd());
251 std::uniform_int_distribution<> udistr(0,total_points-1);
253 if (total_points == 0)
255 std::ostringstream msg;
256 msg <<
"Division by zero: total_points == 0";
257 throw std::runtime_error(msg.str());
260 for (
int i = 0;
i <
K;
i++)
264 int index_point = udistr(gen);
266 if (
find(prohibited_indexes.begin(), prohibited_indexes.end(),
267 index_point) == prohibited_indexes.end())
269 prohibited_indexes.push_back(index_point);
270 points[index_point].setCluster(
i);
271 Cluster cluster(
i, points[index_point]);
272 clusters.push_back(cluster);
279 std::fstream savedClusters(fnClusters.c_str());
280 if (savedClusters.good())
283 for (
int i = 0;
i <
K;
i++)
285 std::getline(savedClusters, line);
286 std::stringstream ss(line);
288 for (
int j = 0;
j < total_values;
j++)
291 clusters[
i].setCentralValue(
j, point_value);
303 if ((maxObjects != -1) && (total_points > maxObjects))
305 for (
int i = 0;
i < total_points;
i++)
307 int nearest_center = getIDNearestCenter(points[
i]);
308 points[
i].setCluster(nearest_center);
309 clusters[nearest_center].addPoint(points[i]);
315 for (
int i = 0;
i < total_points;
i++)
317 int old_cluster = points[
i].getCluster();
318 int nearest_center = getIDNearestCenter(points[
i]);
320 if (old_cluster != nearest_center)
322 if (old_cluster != -1)
323 clusters[old_cluster].removePoint(points[i].getID());
325 points[
i].setCluster(nearest_center);
326 clusters[nearest_center].addPoint(points[i]);
332 for (
int i = 0;
i <
K;
i++)
334 for (
int j = 0;
j < total_values;
j++)
336 int total_points = clusters[
i].getTotalPoints();
339 if (total_points > 0)
341 for (
int p = 0; p < total_points; p++)
342 sum += clusters[
i].getPoint(p).getValue(
j);
344 clusters[
i].setCentralValue(
j, sum / total_points);
349 if (done ==
true || iter >= maxIterations)
break;
391 std::ofstream saveData;
394 saveData.open(fnClusters.c_str());
395 for (
int i = 0;
i <
K;
i++)
397 for (
int j = 0;
j < total_values;
j++)
398 saveData << clusters[
i].getCentralValue(
j) <<
" ";
399 saveData << std::endl;
404 saveData.open(fnPoints.c_str());
405 for (
int i = 0;
i < total_points;
i++)
407 for (
int j = 0;
j < total_values;
j++)
408 saveData << points[
i].getValue(
j) <<
" ";
409 saveData << std::endl;
424 std::vector<std::vector<double> > fvs;
425 std::vector<double> fv, fv_temp;
426 std::vector<KPoint> points;
427 std::vector<Cluster> clusters;
429 srand (time(
nullptr));
433 for (
size_t objId : SF.
ids())
437 I().setXmippOrigin();
450 double min_item, max_item;
451 for(
int i = 0;
i < fv.size();
i++)
454 for (
int j = 0;
j < fvs.size();
j++)
455 fv_temp.push_back(fvs[
j][
i]);
457 max_item = *std::max_element(fv_temp.begin(), fv_temp.end());
458 min_item = *std::min_element(fv_temp.begin(), fv_temp.end());
459 for (
int j = 0;
j < fvs.size();
j++)
460 fvs[
j][i] = (fvs[
j][i] - min_item) / (max_item - min_item);
464 for (
size_t objId : SF.
ids())
467 KPoint p(allItems, fvs.front());
469 fvs.erase(fvs.begin());
473 std::size_t extraPath =
fnSel.find_last_of(
"/");
477 fnallDone =
fnSel.substr(0, extraPath+1) +
"allDone.xmd";
480 std::vector<double> fv_load;
481 std::fstream savedPoints(
fnPoints.c_str());
483 while (savedPoints.good())
485 std::getline(savedPoints, line);
486 if (line.size() < 2)
break;
488 std::stringstream ss(line);
491 for (
int j = 0;
j < fv.size();
j++)
494 fv_load.push_back(point_value);
496 KPoint p(allItems, fv_load);
505 for (
int i = 0;
i < clusters.size();
i++)
507 size_t total_points_cluster = clusters[
i].getTotalPoints();
513 std::ostringstream clusterValues;
514 clusterValues <<
"[";
515 for (
int j = 0;
j < fv.size()-1;
j++)
516 clusterValues << clusters[
i].getCentralValue(
j) <<
", ";
517 clusterValues << clusters[
i].getCentralValue(fv.size()-1) <<
"]";
523 std::ifstream
f(fnallDone.c_str());
524 if (
f.good()) MDallDone.
read(fnallDone);
526 for (
int j = 0;
j < total_points_cluster;
j++)
529 MDallDone.
getRow(row, clusters[
i].getPoint(
j).getID());
530 size_t recId = MDclass.
addRow(row);
double getCentralValue(int index)
void centerImageTranslationally(MultidimArray< double > &I, CorrelationAux &aux)
void sqrt(Image< double > &op)
std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
KMeans(int K, int total_points, int total_values, int maxIterations, int maxObjects)
void readParams()
Read argument.
KPoint(int id_point, std::vector< double > &values)
KPoint getPoint(int index)
void addPoint(KPoint point)
const char * getParam(const char *param, int arg=0)
void setCentralValue(int index, double value)
bool removePoint(int id_point)
double getValue(int index)
int verbose
Verbosity level.
void defineParams()
Define parameters.
void addValue(double value)
String formatString(const char *format,...)
int read(const FileName &name, DataMode datamode=DATA, size_t select_img=ALL_IMAGES, bool mapData=false, int mode=WRITE_READONLY)
Cluster(int id_cluster, KPoint point)
void addUsageLine(const char *line, bool verbatim=false)
std::vector< Cluster > run(std::vector< KPoint > &points, FileName fnClusters, FileName fnPoints)
int getIntParam(const char *param, int arg=0)
void setCluster(int id_cluster)
void addParamsLine(const String &line)