Xmipp  v3.23.11-Nereus
selfile.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es)
4  *
5  * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC
6  *
7  * Part of this module has been developed by Lorenzo Zampighi and Nelson Tang
8  * Dept. Physiology of the David Geffen School of Medistd::cine
9  * Univ. of California, Los Angeles.
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24  * 02111-1307 USA
25  *
26  * All comments concerning this program package may be sent to the
27  * e-mail address 'xmipp@cnb.csic.es'
28  ***************************************************************************/
29 #include <fstream>
30 #include <stdio.h>
31 #include <string.h>
32 #include <algorithm>
33 
34 #include <core/xmipp_funcs.h>
35 #include "selfile.h"
36 #include <core/xmipp_image.h>
37 
38 /*****************************************************************************/
39 /* SEL FILE LINE */
40 /*****************************************************************************/
41 void SelLine::assign(const SelLine &SL)
42 {
43  *this = SL;
44 }
45 
46 // Returns false if the comparison is nonsense, ie, if line_types are not the
47 // same or the lines are to be deleted or are not assigned.
48 // Returns TRUE if l1<l2 otherwise false
49 bool operator < (const SelLine &l1, const SelLine &l2)
50 {
51  if (l1.line_type < l2.line_type)
52  return 1;
53  else if (l1.line_type > l2.line_type)
54  return 0;
55  else
56  return l1.text < l2.text;
57 }
58 
59 std::ostream& operator << (std::ostream& o, const SelLine &SFL)
60 {
61  switch (SFL.line_type)
62  {
63  case SelLine::DATALINE:
64  o << SFL.text << " " << SFL.label << std::endl;
65  break;
66  case SelLine::COMMENT:
67  o << SFL.text << std::endl;
68  break;
69  default:
70  break;
71  }
72  return o;
73 }
74 
76 {
77  std::string line;
78  char img_name[1024];
79  int no_elements_read;
80  int label;
81 
82  // Get line
83  getline(o, line);
84 
85  // Initialise target
86  SFL.line_type = SelLine::NOT_ASSIGNED;
87  SFL.text = "";
88  SFL.label = SelLine::DISCARDED;
89  if (line.length() == 0)
90  return o;
91 
92  // Check if comment or empty line
93  if (line[0] == '#' || line[0] == '\0' || line[0] == ';')
94  {
95  line[line.length()-1] = '\0';
96  SFL.line_type = SelLine::COMMENT;
97  SFL.text = line;
98 
99  // Check if a true "filename label" line
100  }
101  else
102  {
103  no_elements_read = sscanf(line.c_str(), "%s %d", img_name, &label);
104  // *** THE SSCANF CAN BE REPLACED BY A STRING I/O OPERATION
105  if (no_elements_read == 2)
106  {
107  SFL.line_type = SelLine::DATALINE;
108  SFL.text = img_name;
109  SFL.label = (label >= 0) ? SelLine::ACTIVE : SelLine::DISCARDED;
110  SFL.number = label;
111  }
112  else
113  REPORT_ERROR(ERR_SELFILE, "Format error when reading Selection line");
114  }
115  return o;
116 }
117 
118 /*****************************************************************************/
119 /* SEL FILE */
120 /*****************************************************************************/
121 /* Constructor ------------------------------------------------------------- */
123 {
124  fn_sel = "Unnamed";
125  no_imgs = 0;
126  current_line = text_line.begin();
127 }
128 
129 /* Clear ------------------------------------------------------------------- */
131 {
132  fn_sel = "Unnamed";
133  text_line.erase(text_line.begin(), text_line.end());
134  no_imgs = 0;
135  current_line = text_line.begin();
136 }
137 
138 /* Another function for assignment ----------------------------------------- */
139 void SelFile::assign(const SelFile &SF)
140 {
141  *this = SF;
142 }
143 
144 /* Show Sel file ----------------------------------------------------------- */
145 std::ostream& operator << (std::ostream& o, const SelFile &SF)
146 {
147  std::vector<SelLine>::const_iterator current = SF.text_line.begin();
148  std::vector<SelLine>::const_iterator last = SF.text_line.end();
149  while (current != last)
150  {
151  o << *current;
152  current++;
153  }
154  return o;
155 }
156 
157 /* Clean ------------------------------------------------------------------- */
159 {
160  std::vector<SelLine>::iterator current = text_line.begin();
161  while (current != text_line.end())
162  {
163  if ((*current).line_type == SelLine::DATALINE &&
164  (*current).label == SelLine::DISCARDED)
165  {
166  text_line.erase(current);
167  }
168  else
169  current++;
170  }
171  current_line = text_line.begin();
172 }
173 
174 /* Clean comments ---------------------------------------------------------- */
176 {
177  std::vector<SelLine>::iterator current = text_line.begin();
178  std::vector<SelLine>::iterator last = text_line.end();
179  std::vector<SelLine>::iterator temp;
180  while (current != last)
181  {
182  if ((*current).line_type == SelLine::COMMENT)
183  {
184  temp = current;
185  temp++;
186  text_line.erase(current);
187  current = temp;
188  }
189  else
190  current++;
191  }
192  current_line = text_line.begin();
193 }
194 
195 /* Read -------------------------------------------------------------------- */
196 void SelFile::read(const FileName &sel_name, int overriding)
197 {
198  SelLine temp;
199  std::ifstream fh_sel;
200  int line_no = 1;
201 
202  // Empties current SelFile
203  if (overriding)
204  clear();
205 
206  // Open file
207  else
208  {
209  // Read normal selfile
210  fh_sel.open(sel_name.c_str(), std::ios::in);
211  if (!fh_sel)
212  REPORT_ERROR(ERR_IO_NOTEXIST, sel_name);
213 
214  // Read each line and keep it in the list of the SelFile object
215  fh_sel.peek();
216  while (!fh_sel.eof())
217  {
218  try
219  {
220  fh_sel >> temp;
221  }
222  catch (XmippError &e)
223  {
224  std::cout << "Sel file: Line " << line_no << " is skipped due to an error\n";
225  }
226  switch (temp.line_type)
227  {
228  case SelLine::NOT_ASSIGNED: break; // Line with an error
229  case SelLine::DATALINE:
230  if (temp.label != SelLine::DISCARDED)
231  no_imgs++;
232  text_line.push_back(temp);
233  break;
234  case SelLine::COMMENT:
235  text_line.push_back(temp);
236  break;
237  default:
238  break;
239  }
240  line_no++;
241  fh_sel.peek();
242  }
243 
244  // Close file
245  fh_sel.close();
246  }
247 
248  // Set "pointer" to the beginning of the file
249  if (overriding)
250  fn_sel = sel_name;
251  go_first_ACTIVE();
252 }
253 /* Merge ------------------------------------------------------------------- */
254 void SelFile::merge(const FileName &sel_name)
255 {
256  SelFile SF(sel_name);
257  *this = *this + SF;
258  go_first_ACTIVE();
259 }
260 
261 /* Write ------------------------------------------------------------------- */
262 void SelFile::write(const FileName &sel_name)
263 {
264  std::ofstream fh_sel;
265  std::vector<SelLine>::iterator current = text_line.begin();
266  std::vector<SelLine>::iterator last = text_line.end();
267 
268  if (strcmp(sel_name.c_str(), "") != 0)
269  fn_sel = sel_name;
270  // Don't use sel_name=="" because it wastes memory
271 #ifdef NEVEREVERROB
272  if (sel_name.find(IMAGIC_TAG) == 0)
273  {
274  // Write Imagic selfile
275  const FileName hed_fname = sel_name.substr(IMAGIC_TAG_LEN);
276  std::vector<Image *> imgs;
277  for (; current != last; current++)
278  {
279  Image *img;
280  if (current->Is_data() && (current->get_label() == SelLine::ACTIVE) &&
281  (img = Image::LoadImage(current->get_text())))
282  imgs.push_back(img);
283  }
284  if (!ImagicWriteImagicFile(hed_fname, imgs))
285  REPORT_ERROR(1553, "Error writing selfile to Imagic file " + sel_name);
286  for (std::vector<Image *>::iterator i = imgs.begin(); i != imgs.end(); i++)
287  delete(*i);
288  }
289  else
290 #endif
291  {
292  // Write Xmipp selfile
293  // Open file
294  fh_sel.open(fn_sel.c_str(), std::ios::out);
295  if (!fh_sel)
296  REPORT_ERROR(ERR_IO_NOWRITE, fn_sel);
297 
298  // Read each line and keep it in the list of the SelFile object
299  while (current != last)
300  fh_sel << *(current++);
301 
302  // Close file
303  fh_sel.close();
304  }
305 }
306 
307 /* Merging with another selfile -------------------------------------------- */
309 {
310  std::vector<SelLine>::iterator current = SF.text_line.begin();
311  std::vector<SelLine>::iterator last = SF.text_line.end();
312  std::vector<SelLine>::iterator found;
313 
314  SelLine discrepancy;
315  discrepancy.line_type = SelLine::COMMENT;
316  discrepancy.text = "# There were discrepancy in the tags for next line, the "
317  "ACTIVE state is kept";
318 
319  while (current != last)
320  {
321  if ((*current).line_type != SelLine::DATALINE)
322  {
323  current++;
324  continue;
325  }
326  if ((found = find((*current).text)) == text_line.end())
327  {
328  // New image not found in the whole Sel File.
329  // Add it if it is not discarded
330  if ((*current).label != SelLine::DISCARDED)
331  {
332  text_line.push_back(*current);
333  no_imgs++;
334  }
335  }
336  else
337  // New image is found, check that its line is not going
338  // to be removed, if it is add it again; else, check if
339  // there is a discrepancy between them
340  if ((*found).label != (*current).label)
341  {
342  if ((*found).label < (*current).label)
343  {
344  (*found).label = SelLine::ACTIVE;
345  no_imgs++;
346  }
347  text_line.insert(found, 1, discrepancy);
348  }
349  current++;
350  }
351 }
352 
353 /* Merging, operator + ----------------------------------------------------- */
354 // If the same file is in both Sel Files the label in the first is kept
356 {
357  SelFile result;
358  result = *this;
359  result.merge(SF);
360  return result;
361 }
362 
363 /* Split randomly in two equally large selfiles ---------------------------- */
365 {
366  SelFile SFtmp;
367  SF1 = *this;
368  SFtmp = SF1.randomize();
369  SF1.clear();
370  int N = SFtmp.ImgNo();
371  SF1.reserve(N);
372  SF2.reserve(N);
373  int half = N / 2;
374  SFtmp.go_beginning();
375  for (int i = 0;i < N; i++)
376  {
377  if (i < half)
378  SF1.insert(SFtmp.current());
379  else
380  SF2.insert(SFtmp.current());
381  if (i < N - 1)
382  SFtmp.NextImg();
383  }
384  SFtmp = SF1.sort_by_filenames();
385  SF1 = SFtmp;
386  SFtmp = SF2.sort_by_filenames();
387  SF2 = SFtmp;
388 }
389 
390 /* Split randomly in N equally large selfiles ------------------------------ */
391 void SelFile::split_in_N(int N, std::vector<SelFile> &SF)
392 {
393  // Randomize input data
394  SelFile SFtmp, SFrnd;
395  SFrnd = *this;
396  SFtmp = SFrnd.randomize();
397  SFtmp.go_beginning();
398  int Nimg = SFtmp.ImgNo();
399  SF.clear();
400 
401  // Create space for all SelFiles
402  for (int n = 0; n < N; n++)
403  {
404  auto *ptr_SF = new SelFile;
405  ptr_SF->reserve(CEIL(Nimg / N));
406  SF.push_back(*ptr_SF);
407  }
408 
409  // Distribute images
410  int n = 0;
411  for (int i = 0;i < Nimg; i++)
412  {
413  SF[n].insert(SFtmp.current());
414  n = (n + 1) % N;
415  if (i < Nimg - 1)
416  SFtmp.NextImg();
417  }
418 
419  // Sort the Selfiles
420  for (int n = 0; n < N; n++)
421  SF[n] = SF[n].sort_by_filenames();
422 }
423 
424 /* Select only part of the selfile for parallel MPI-runs ------------------ */
425 void SelFile::mpi_select_part(int rank, int size, int &num_img_tot)
426 {
427 
428  (*this).clean_comments();
429  (*this).clean();
430  num_img_tot = (*this).ImgNo();
431  int remaining = num_img_tot % size;
432  int Npart = (int)(num_img_tot - remaining) / size;
433  int myFirst, myLast;
434  if (rank < remaining)
435  {
436  myFirst = rank * (Npart + 1);
437  myLast = myFirst + Npart;
438  }
439  else
440  {
441  myFirst = rank * Npart + remaining;
442  myLast = myFirst + Npart - 1;
443  }
444  // Now discard all images in Selfile that are outside myFirst-myLast
445  (*this).go_beginning();
446  SelFile SFpart = *this;
447  SFpart.clear();
448  for (int nr = myFirst; nr <= myLast; nr++)
449  {
450  (*this).go_beginning();
451  (*this).jump_lines(nr);
452  SFpart.insert((*this).current());
453  }
454  *this = SFpart;
455 
456 }
457 /* Select only part of the selfile for parallel MPI-runs ------------------ */
458 void SelFile::mpi_select_part2(int jobNumber,
459  int numberJobs,
460  int &totalNumImg,
461  int mpi_job_size)
462 { // jobNumber process number
463  // total number of processes
464  // total number of images
465 
466  (*this).clean_comments();
467  (*this).clean();
468  totalNumImg = (*this).ImgNo();
469  int myFirst = jobNumber * mpi_job_size;
470  int myLast = myFirst + mpi_job_size-1;
471  while ((myLast+1) > totalNumImg)
472  {
473  myLast = totalNumImg-1;
474  }
475  // Now discard all images in Selfile that are outside myFirst-myLast
476  (*this).go_beginning();
477  SelFile SFpart = *this;
478  SFpart.clear();
479  for (int nr = myFirst; nr <= myLast; nr++)
480  {
481  (*this).go_beginning();
482  (*this).jump_lines(nr);
483  SFpart.insert((*this).current());
484  }
485  *this = SFpart;
486 }
487 
488 /* Choose subset ----------------------------------------------------------- */
489 void SelFile::chooseSubset(int firstImage, int lastImage, SelFile &SFsubset)
490 {
491  SFsubset.clear();
492  go_beginning();
493  jump(firstImage);
494  for (int i=firstImage; i<=lastImage; i++)
495  {
496  if (!eof()) SFsubset.insert(current());
497  next();
498  }
499 }
500 
501 /* Adjust to label --------------------------------------------------------- */
502 void SelFile::adjust_to_label(SelLine::Label label)
503 {
504  if (current_line == text_line.end())
505  return;
506  while ((*current_line).line_type != SelLine::DATALINE ||
507  (*current_line).label != label)
508  {
509  current_line++;
510  if (current_line == text_line.end())
511  return;
512  }
513 }
514 
515 /* Next Image with a certain label ----------------------------------------- */
516 const std::string& SelFile::NextImg(SelLine::Label label)
517 {
518  adjust_to_label(label);
519  static const std::string emptyString;
520  if (current_line != text_line.end())
521  return (*current_line++).text;
522  else
523  return emptyString;
524 }
525 
526 /* Jump over a certain number of data lines (disregarding any label) ------- */
527 bool SelFile::jump_lines(int how_many)
528 {
529  for (int i = 0; i < how_many; i++)
530  {
531  if (current_line != text_line.end())
532  current_line++;
533  else
534  return false;
535  }
536  return true;
537 }
538 
539 /* Jump over images with a certain label ----------------------------------- */
540 void SelFile::jump(int how_many, SelLine::Label label)
541 {
542  adjust_to_label(label);
543  for (int i = 0; i < how_many; i++)
544  if (current_line != text_line.end())
545  {
546  current_line++;
547  adjust_to_label(label);
548  }
549 }
550 
551 /* Find an image (inside the list) ----------------------------------------- */
552 // It returns a pointer to past-last element if the image is not inside
553 std::vector<SelLine>::iterator find(std::vector<SelLine> &text,
554  const std::string &img_name)
555 {
556  std::vector<SelLine>::iterator current = text.begin();
557  std::vector<SelLine>::iterator last = text.end();
558 
559  while (current != last)
560  {
561  if ((*current).line_type == SelLine::DATALINE &&
562  (*current).text == img_name)
563  return current;
564  current++;
565  }
566  return current;
567 }
568 
569 /* Find an image (inside the Sel File) ------------------------------------- */
570 // It returns a pointer to past-last element if the image is not inside
571 // *** THIS SHOULD USE THE PREVIOUS FUNCTION BUT I CANNOT MAKE IT TO COMPILE
572 std::vector<SelLine>::iterator SelFile::find(const std::string &img_name)
573 {
574  std::vector<SelLine>::iterator current = text_line.begin();
575  std::vector<SelLine>::iterator last = text_line.end();
576 
577  while (current != last)
578  {
579  if ((*current).line_type == SelLine::DATALINE &&
580  (*current).text == img_name)
581  return current;
582  current++;
583  }
584  return current;
585 }
586 
587 /* Number of images with a certain label ----------------------------------- */
588 // If the label is 0 it means any valid image
590 {
591  int N = 0;
592  std::vector<SelLine>::const_iterator current = text_line.begin();
593  std::vector<SelLine>::const_iterator last = text_line.end();
594  while (current != last)
595  {
596  if ((*current).line_type == SelLine::DATALINE &&
597  (*current).label == label)
598  N++;
599  current++;
600  }
601  return N;
602 }
603 
604 /* Number of lines within file --------------------------------------------- */
606 {
607  int N = 0;
608  std::vector<SelLine>::iterator current = text_line.begin();
609  std::vector<SelLine>::iterator last = text_line.end();
610  while (current != last)
611  {
612  N++;
613  current++;
614  }
615  return N;
616 }
617 
618 
619 /* File Extension ---------------------------------------------------------- */
621 {
622  std::vector<SelLine>::iterator aux = current_line;
623  go_first_ACTIVE();
624  FileName ext = (*current_line).text;
625  ext = ext.getExtension();
626  current_line = aux;
627  return ext;
628 }
629 
630 
631 /* Maximum filename length ------------------------------------------------- */
633 {
634  std::vector<SelLine>::iterator aux = current_line;
635  size_t max_length = 0;
636  go_first_ACTIVE();
637  while (!eof())
638  {
639  FileName fn = NextImg();
640  max_length = XMIPP_MAX(max_length, fn.length());
641  }
642  current_line = aux;
643  return max_length;
644 }
645 
646 /* Get current filename ---------------------------------------------------- */
647 const std::string SelFile::get_current_file()
648 {
649  if (current_line == text_line.end())
650  return "";
651  if ((*current_line).line_type != SelLine::DATALINE)
652  return "";
653  return (*current_line).text;
654 }
655 
656 /* Get filename number i --------------------------------------------------- */
657 const std::string SelFile::get_file_number(int i)
658 {
659  if (i < 0)
660  return "";
661  std::vector<SelLine>::iterator current = text_line.begin();
662  std::vector<SelLine>::iterator last = text_line.end();
663 
664  int currenti = 0;
665  while (current != last)
666  {
667  if ((*current).line_type == SelLine::DATALINE &&
668  (*current).label == SelLine::ACTIVE)
669  currenti++;
670  if (currenti > i)
671  return (*current).text;
672  current++;
673  }
674  return "";
675 }
676 
677 /* Remove a certain file --------------------------------------------------- */
678 void SelFile::remove(const std::string &img_name)
679 {
680  std::vector<SelLine>::iterator aux = find(img_name);
681  std::vector<SelLine>::iterator temp;
682  if (aux != text_line.end())
683  {
684  if (aux == current_line)
685  {
686  temp = current_line;
687  temp++;
688  }
689  else
690  temp = current_line;
691  if ((*aux).line_type == SelLine::DATALINE)
692  no_imgs--;
693  text_line.erase(aux);
694  current_line = temp;
695  }
696 }
697 
698 /* Remove current line ----------------------------------------------------- */
700 {
701  if (current_line != text_line.end())
702  {
703  std::vector<SelLine>::iterator temp;
704  temp = current_line;
705  temp++;
706  if ((*current_line).line_type == SelLine::DATALINE)
707  no_imgs--;
708  text_line.erase(current_line);
709  current_line = temp;
710  }
711 }
712 
713 /* Append a file or change label ------------------------------------------- */
714 void SelFile::set(const std::string& img_name, SelLine::Label label)
715 {
716  SelLine temp;
717  std::vector<SelLine>::iterator aux = find(img_name);
718  if (aux == text_line.end())
719  {
720  temp.line_type = SelLine::DATALINE;
721  temp.text = img_name;
722  temp.label = label;
723  text_line.push_back(temp);
724  if (label != SelLine::DISCARDED)
725  no_imgs++;
726  }
727  else
728  {
729  if ((*aux).label != label)
730  {
731  (*aux).label = label;
732  if (label != SelLine::DISCARDED)
733  no_imgs++;
734  }
735  }
736 }
737 
738 /* Append a file or change label ------------------------------------------- */
740 {
741  if ((*current_line).label != label)
742  {
743  (*current_line).label = label;
744  if (label != SelLine::DISCARDED)
745  no_imgs++;
746  }
747 }
748 
749 /* Change current filename ------------------------------------------- */
751 {
752  if ((*current_line).line_type == SelLine::DATALINE)
753  {
754  (*current_line).text = fn_new;
755  }
756 }
757 
758 /* Insert image before current line ---------------------------------------- */
759 void SelFile::insert(const std::string& img_name, SelLine::Label label)
760 {
761  SelLine temp;
762  temp.line_type = SelLine::DATALINE;
763  temp.text = img_name;
764  temp.label = label;
765  if (label != SelLine::DISCARDED)
766  no_imgs++;
767 
768  // Insert and updates current_line
769  current_line = text_line.insert(current_line, temp);
770  current_line++;
771 }
772 
773 /* Insert line before current line ----------------------------------------- */
774 void SelFile::insert(const SelLine &_selline)
775 {
776  if (_selline.line_type != SelLine::DATALINE &&
777  _selline.line_type != SelLine::COMMENT)
778  REPORT_ERROR(ERR_SELFILE, "SelFile::insert(SelLine): SelLine type not valid");
779  if (_selline.line_type == SelLine::DATALINE)
780  if (_selline.label != SelLine::DISCARDED &&
781  _selline.label != SelLine::ACTIVE)
782  REPORT_ERROR(ERR_SELFILE, "SelFile::insert(SelLine): SelLine label not valid");
783 
784  // Sjors 18sep06: added next line
785  if (_selline.label != SelLine::DISCARDED)
786  no_imgs++;
787 
788  // Insert and updates current_line
789  current_line = text_line.insert(current_line, _selline);
790  current_line++;
791 }
792 
793 /* Insert a comment before current line ------------------------------------ */
794 void SelFile::insert_comment(const std::string& comment)
795 {
796  SelLine temp;
797  temp.line_type = SelLine::COMMENT;
798  temp.text = "# " + comment;
799  temp.label = SelLine::DISCARDED;
800 
801  // Insert and updates current_line
802  current_line = text_line.insert(current_line, temp);
803  current_line++;
804 }
805 
806 /* Sort -------------------------------------------------------------------- */
808 {
809  SelFile result(*this);
810  sort(result.text_line.begin(), result.text_line.end());
811  result.current_line = result.text_line.begin();
812  return result;
813 }
814 
815 /* Randomize --------------------------------------------------------------- */
817 {
818  SelFile result, aux;
819  int i;
820  int rnd_indx;
821 
823  if (no_imgs == 0)
824  return aux;
825  aux = *this;
826  for (i = no_imgs; i > 0; i--)
827  {
828  // Jump a random number from the beginning
829  rnd_indx = (int) rnd_unif(0, i);
830  aux.go_first_ACTIVE();
831  aux.jump(rnd_indx);
832  result.text_line.push_back(*(aux.current_line));
833  (*aux.current_line).line_type = SelLine::NOT_CONSIDERED;
834  }
835 
836  // Adjust remaining fields
837  result.no_imgs = no_imgs;
838  result.current_line = result.text_line.begin();
839  return result;
840 }
841 
842 
843 /* Discard randomly a set of images ---------------------------------------- */
845 {
846  SelFile result;
847  int i, rnd_indx;
848 
850  result = *this;
851  N = std::min(N, no_imgs);
852  for (i = 0; i < N; i++)
853  {
854  // Jump a random number from the beginning
855  rnd_indx = (int) rnd_unif(0, result.no_imgs);
856  result.go_first_ACTIVE();
857  result.jump(rnd_indx, label);
858 
859  // Discard that image
860  (*(result.current_line)).label = SelLine::DISCARDED;
861 
862  // Decrease the number of images such that next time
863  result.no_imgs--;
864  }
865 
866  result.go_beginning();
867  return result;
868 }
869 
870 /* Compare ----------------------------------------------------------------- */
871 // Only img_files with the active label are considered
872 SelFile compare(SelFile &SF1, SelFile &SF2, const int mode)
873 {
874  std::vector<SelLine> only_in_SF1;
875  std::vector<SelLine> only_in_SF2;
876  std::vector<SelLine> in_both;
877  SelFile result;
878  SelLine temp;
879  int SF1_discarded = 0, SF2_discarded = 0;
880  int maxLen = 15;
881  char str[maxLen + 1]; // + terminating null character
882 
883  // Search in File 1
884  std::vector<SelLine>::iterator current = SF1.text_line.begin();
885  std::vector<SelLine>::iterator last = SF1.text_line.end();
886  std::vector<SelLine>::iterator last_SF = SF2.text_line.end();
887  std::vector<SelLine>::iterator found;
888 
889  while (current != last)
890  {
891  // Skip if not active
892  if ((*current).line_type != SelLine::DATALINE)
893  {
894  current++;
895  continue;
896  }
897  if ((*current).label == SelLine::DISCARDED)
898  {
899  SF1_discarded++;
900  current++;
901  continue;
902  }
903 
904  // Try to find this archive into Sel File 2
905  found = SF2.find((*current).text);
906  if (found == last_SF)
907  only_in_SF1.push_back(*current);
908  else
909  if ((*found).label == SelLine::DISCARDED)
910  only_in_SF1.push_back(*current);
911  else
912  in_both.push_back(*current);
913  current++;
914  }
915 
916  // Search in File 2
917  current = SF2.text_line.begin();
918  last = SF2.text_line.end();
919 
920  while (current != last)
921  {
922  // Skip if not active
923  if ((*current).line_type != SelLine::DATALINE)
924  {
925  current++;
926  continue;
927  }
928  if ((*current).label == SelLine::DISCARDED)
929  {
930  SF2_discarded++;
931  current++;
932  continue;
933  }
934 
935  // Try to find this archive into Sel File 2
936  found = find(in_both, (*current).text);
937  if (found != in_both.end())
938  {
939  current++;
940  continue;
941  }
942  only_in_SF2.push_back(*current);
943  current++;
944  }
945 
946  // Write Statistics
947  if (mode < 0)
948  {
949  temp.line_type = SelLine::COMMENT;
950  temp.label = SelLine::DISCARDED;
951  temp.text = "# Statistics of comparison";
952  result.text_line.push_back(temp);
953  temp.text = "# -------------------------------------------------------------";
954  result.text_line.push_back(temp);
955  snprintf(str, maxLen, "%6d", SF1.no_imgs);
956  temp.text = "# File 1: " + SF1.fn_sel + "(VALID: " + str;
957  snprintf(str, maxLen, "%6d", SF1_discarded);
958  temp.text += (std::string) " DISCARDED: " + str + ")";
959  result.text_line.push_back(temp);
960  snprintf(str, maxLen, "%6d", SF2.no_imgs);
961  temp.text = "# File 2: " + SF2.fn_sel + "(VALID: " + str;
962  snprintf(str, maxLen, "%6d", SF2_discarded);
963  temp.text += (std::string) " DISCARDED: " + str + ")";
964  result.text_line.push_back(temp);
965  temp.text = "";
966  result.text_line.push_back(temp);
967  snprintf(str, maxLen, "%6lu", (unsigned long int)in_both.size());
968  temp.text = (std::string)"# Matching Files: " + str;
969  result.text_line.push_back(temp);
970  snprintf(str, maxLen, "%6lu", (unsigned long int)only_in_SF1.size());
971  temp.text = (std::string)"# Only in file 1: " + str;
972  result.text_line.push_back(temp);
973  snprintf(str, maxLen, "%6lu", (unsigned long int)only_in_SF2.size());
974  temp.text = (std::string)"# Only in file 2: " + str;
975  result.text_line.push_back(temp);
976  temp.text = "# -------------------------------------------------------------";
977  result.text_line.push_back(temp);
978 
979  // Write files in both
980  temp.text = "";
981  result.text_line.push_back(temp);
982  temp.text = "# Files in both .sel files";
983  result.text_line.push_back(temp);
984  }
985  if (mode<0 || mode==0)
986  {
987  current = in_both.begin();
988  last = in_both.end();
989  while (current != last)
990  result.text_line.push_back(*current++);
991  }
992 
993  if (mode<0)
994  {
995  // Write files only in Sel File 1
996  temp.text = "";
997  result.text_line.push_back(temp);
998  temp.text = "# Files only in the first file";
999  result.text_line.push_back(temp);
1000  }
1001  if (mode<0 || mode==1)
1002  {
1003  current = only_in_SF1.begin();
1004  last = only_in_SF1.end();
1005  while (current != last)
1006  result.text_line.push_back(*current++);
1007  }
1008 
1009  if (mode<0)
1010  {
1011  // Write files only in Sel File 2
1012  temp.text = "";
1013  result.text_line.push_back(temp);
1014  temp.text = "# Files only in the second file";
1015  result.text_line.push_back(temp);
1016  }
1017  if (mode<0 || mode==2)
1018  {
1019  current = only_in_SF2.begin();
1020  last = only_in_SF2.end();
1021  while (current != last)
1022  result.text_line.push_back(*current++);
1023  }
1024  // Adjust the remaining fields
1025  if (mode<0)
1026  result.no_imgs = in_both.size() + only_in_SF1.size() + only_in_SF2.size();
1027  else if (mode==0)
1028  result.no_imgs = in_both.size();
1029  else if (mode==1)
1030  result.no_imgs = only_in_SF1.size();
1031  else if (mode==2)
1032  result.no_imgs = only_in_SF2.size();
1033  result.current_line = result.text_line.begin();
1034 
1035  return result;
1036 }
friend std::ostream & operator<<(std::ostream &o, const SelLine &line)
Definition: selfile.cpp:59
void clear()
Definition: selfile.cpp:130
void min(Image< double > &op1, const Image< double > &op2)
#define XMIPP_MAX(x, y)
Definition: xmipp_macros.h:193
void split_in_two(SelFile &sel1, SelFile &sel2)
Definition: selfile.cpp:364
SelFile random_discard(int N)
Definition: selfile.cpp:844
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
void remove_current()
Definition: selfile.cpp:699
int ImgNo(SelLine::Label label=SelLine::ACTIVE) const
Definition: selfile.cpp:589
int MaxFileNameLength()
Definition: selfile.cpp:632
void set_current(SelLine::Label label)
Definition: selfile.cpp:739
const SelLine & current()
Definition: selfile.h:526
SelFile randomize()
Definition: selfile.cpp:816
Couldn&#39;t write to file.
Definition: xmipp_error.h:140
void insert(const std::string &img_name, SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:759
void chooseSubset(int firstImage, int lastImage, SelFile &SFsubset)
Definition: selfile.cpp:489
void merge(const FileName &name)
Definition: selfile.cpp:254
void go_first_ACTIVE()
Definition: selfile.h:377
void set(const std::string &img_name, SelLine::Label label)
Definition: selfile.cpp:714
void write(const FileName &sel_name="")
Definition: selfile.cpp:262
void set_current_filename(const FileName &fn_new)
Definition: selfile.cpp:750
FileName FileExtension()
Definition: selfile.cpp:620
void mpi_select_part2(int rank, int size, int &num_img_tot, int mpi_job_size)
Definition: selfile.cpp:458
void remove(const std::string &img_name)
Definition: selfile.cpp:678
bool jump_lines(int count)
Definition: selfile.cpp:527
void assign(const SelFile &sel)
Definition: selfile.cpp:139
friend SelFile compare(SelFile &SF1, SelFile &SF2, const int mode)
Definition: selfile.cpp:872
#define i
Error in docfile format.
Definition: xmipp_error.h:186
const std::string & NextImg(SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:516
int LineNo()
Definition: selfile.cpp:605
String getExtension() const
SelFile()
Definition: selfile.cpp:122
double rnd_unif()
SelFile operator+(SelFile &sel)
Definition: selfile.cpp:355
void read(const FileName &name, int overrinding=1)
Definition: selfile.cpp:196
SelFile sort_by_filenames()
Definition: selfile.cpp:807
#define CEIL(x)
Definition: xmipp_macros.h:225
int in
const std::string get_current_file()
Definition: selfile.cpp:647
void clean_comments()
Definition: selfile.cpp:175
File or directory does not exist.
Definition: xmipp_error.h:136
void go_beginning()
Definition: selfile.h:364
void mpi_select_part(int rank, int size, int &num_img_tot)
Definition: selfile.cpp:425
void mode
basic_istream< char, std::char_traits< char > > istream
Definition: utilities.cpp:815
void sort(struct DCEL_T *dcel)
Definition: sorting.cpp:18
friend std::vector< SelLine >::iterator find(std::vector< SelLine > &text, const std::string &img_name)
Definition: selfile.cpp:553
void split_in_N(int N, std::vector< SelFile > &parts)
Definition: selfile.cpp:391
void insert_comment(const std::string &comment)
Definition: selfile.cpp:794
void reserve(int n)
Definition: selfile.h:219
void clean()
Definition: selfile.cpp:158
void assign(const SelLine &line)
Definition: selfile.cpp:41
const std::string get_file_number(int i)
Definition: selfile.cpp:657
friend class SelFile
Definition: selfile.h:73
void jump(int count, SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:540
unsigned int randomize_random_generator()
friend std::istream & operator>>(std::istream &i, SelLine &lin)
Definition: selfile.cpp:75
int * n
friend bool operator<(const SelLine &l1, const SelLine &l2)
Definition: selfile.cpp:49