Xmipp  v3.23.11-Nereus
List of all members

#include <selfile.h>

Public Member Functions

Selfile constructors
 SelFile ()
 
 SelFile (const FileName &name)
 
void reserve (int n)
 
void clear ()
 
Managing files in disk
void read (const FileName &name, int overrinding=1)
 
void append (const FileName &name)
 
void merge (const FileName &name)
 
void merge (SelFile &sel)
 
SelFile operator+ (SelFile &sel)
 
void split_in_two (SelFile &sel1, SelFile &sel2)
 
void split_in_N (int N, std::vector< SelFile > &parts)
 
void mpi_select_part (int rank, int size, int &num_img_tot)
 
void mpi_select_part2 (int rank, int size, int &num_img_tot, int mpi_job_size)
 
void chooseSubset (int firstImage, int lastImage, SelFile &SFsubset)
 
void write (const FileName &sel_name="")
 
Moving the current line pointer
void go_beginning ()
 
void go_first_ACTIVE ()
 
const std::string & operator() (int i)
 
const std::string & NextImg (SelLine::Label label=SelLine::ACTIVE)
 
void next ()
 
void jump (int count, SelLine::Label label=SelLine::ACTIVE)
 
bool jump_lines (int count)
 
void search (const std::string &img_name)
 
int eof ()
 
Getting information
FileName name () const
 
int Is_ACTIVE () const
 
int Is_DISCARDED () const
 
int Is_COMMENT () const
 
const SelLinecurrent ()
 
void get_current (SelLine &_SL)
 
int exists (const std::string &img_name)
 
int ImgNo (SelLine::Label label=SelLine::ACTIVE) const
 
int LineNo ()
 
void ImgSize (int &Ydim, int &Xdim)
 
FileName FileExtension ()
 
int MaxFileNameLength ()
 
const std::string get_current_file ()
 
SelLine get_current_line ()
 
const std::string get_file_number (int i)
 
Modifying the selection file
void remove (const std::string &img_name)
 
void remove_current ()
 
void set (const std::string &img_name, SelLine::Label label)
 
void set_current (SelLine::Label label)
 
void set_current_filename (const FileName &fn_new)
 
void insert (const std::string &img_name, SelLine::Label label=SelLine::ACTIVE)
 
void insert (const SelLine &_selline)
 
void insert_comment (const std::string &comment)
 
void clean ()
 
void clean_comments ()
 

Some operators

void assign (const SelFile &sel)
 
std::ostream & operator<< (std::ostream &o, const SelFile &sel)
 

Helpful procedures

SelFile sort_by_filenames ()
 
SelFile randomize ()
 
SelFile randomSubset (int subsetN, bool withReplacement=true)
 
SelFile random_discard (int N)
 
SelFile compare (SelFile &SF1, SelFile &SF2, const int mode)
 

Detailed Description

Selection File

The SelFile is an object which keeps in memory all the information associated to a .sel file.

Definition at line 168 of file selfile.h.

Constructor & Destructor Documentation

◆ SelFile() [1/2]

SelFile::SelFile ( )

Empty constructor.

There is no file associated yet.

SelFile sel;

Definition at line 122 of file selfile.cpp.

123 {
124  fn_sel = "Unnamed";
125  no_imgs = 0;
126  current_line = text_line.begin();
127 }

◆ SelFile() [2/2]

SelFile::SelFile ( const FileName name)
inline

Constructor with filename, read from disk.

The given name is loaded (method read) as a selection file.

SelFile sel("g1t.sel");

Definition at line 209 of file selfile.h.

210  {
211  read(name);
212  }
void read(const FileName &name, int overrinding=1)
Definition: selfile.cpp:196

Member Function Documentation

◆ append()

void SelFile::append ( const FileName name)
inline

Append a file from disk to an already read one.

The old information on the variable is not lost. All lines in the selection file to be read are appened at the end of the already read one without any kind of check.

sel.read("g1t.sel");
sel.append("g2t.sel");

Definition at line 280 of file selfile.h.

281  {
282  read(name, 0);
283  }
void read(const FileName &name, int overrinding=1)
Definition: selfile.cpp:196

◆ assign()

void SelFile::assign ( const SelFile sel)

Another function for assigment.

Definition at line 139 of file selfile.cpp.

140 {
141  *this = SF;
142 }

◆ chooseSubset()

void SelFile::chooseSubset ( int  firstImage,
int  lastImage,
SelFile SFsubset 
)

Choose a subset of images.

Definition at line 489 of file selfile.cpp.

490 {
491  SFsubset.clear();
492  go_beginning();
493  jump(firstImage);
494  for (int i=firstImage; i<=lastImage; i++)
495  {
496  if (!eof()) SFsubset.insert(current());
497  next();
498  }
499 }
void clear()
Definition: selfile.cpp:130
const SelLine & current()
Definition: selfile.h:526
void insert(const std::string &img_name, SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:759
int eof()
Definition: selfile.h:485
#define i
void go_beginning()
Definition: selfile.h:364
void next()
Definition: selfile.h:429
void jump(int count, SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:540

◆ clean()

void SelFile::clean ( )

Deletes all DISCARDED images from the selection file.

The current line "pointer" is moved to the beginning of the file.

sel.clean();

Definition at line 158 of file selfile.cpp.

159 {
160  std::vector<SelLine>::iterator current = text_line.begin();
161  while (current != text_line.end())
162  {
163  if ((*current).line_type == SelLine::DATALINE &&
164  (*current).label == SelLine::DISCARDED)
165  {
166  text_line.erase(current);
167  }
168  else
169  current++;
170  }
171  current_line = text_line.begin();
172 }
const SelLine & current()
Definition: selfile.h:526

◆ clean_comments()

void SelFile::clean_comments ( )

Deletes all comments from the selection file.

The current line "pointer" is moved to the beginning of the file.

sel.clean_comments();

Definition at line 175 of file selfile.cpp.

176 {
177  std::vector<SelLine>::iterator current = text_line.begin();
178  std::vector<SelLine>::iterator last = text_line.end();
179  std::vector<SelLine>::iterator temp;
180  while (current != last)
181  {
182  if ((*current).line_type == SelLine::COMMENT)
183  {
184  temp = current;
185  temp++;
186  text_line.erase(current);
187  current = temp;
188  }
189  else
190  current++;
191  }
192  current_line = text_line.begin();
193 }
const SelLine & current()
Definition: selfile.h:526

◆ clear()

void SelFile::clear ( )

Empties the object.

sel.clear();

Definition at line 130 of file selfile.cpp.

131 {
132  fn_sel = "Unnamed";
133  text_line.erase(text_line.begin(), text_line.end());
134  no_imgs = 0;
135  current_line = text_line.begin();
136 }

◆ current()

const SelLine& SelFile::current ( )
inline

Returns current line as a Sel Line.

Definition at line 526 of file selfile.h.

527  {
528  return *current_line;
529  }

◆ eof()

int SelFile::eof ( )
inline

True if current line "pointer" is at the end of file.

if (sel.eof())
std::cout << "The selection file is over\n";

Definition at line 485 of file selfile.h.

486  {
487  return current_line == text_line.end();
488  }

◆ exists()

int SelFile::exists ( const std::string &  img_name)
inline

True if the image name is inside the selection file.

The current line "pointer" is not modified. If an image is discarded in the selection file, this function still will say that it exists, although it is discarded.

if (sel.exists("g1ta0001"))
std::cout << "g1ta0001 exists in the selection file\n";

Definition at line 549 of file selfile.h.

550  {
551  return find(img_name) != text_line.end();
552  }

◆ FileExtension()

FileName SelFile::FileExtension ( )

Returns the extension of the files inside.

This function returns the extension of the first active file.

Definition at line 620 of file selfile.cpp.

621 {
622  std::vector<SelLine>::iterator aux = current_line;
623  go_first_ACTIVE();
624  FileName ext = (*current_line).text;
625  ext = ext.getExtension();
626  current_line = aux;
627  return ext;
628 }
void go_first_ACTIVE()
Definition: selfile.h:377
String getExtension() const

◆ get_current()

void SelFile::get_current ( SelLine _SL)
inline

Another function to get a SelLine.

Definition at line 533 of file selfile.h.

534  {
535  _SL = current();
536  }
const SelLine & current()
Definition: selfile.h:526

◆ get_current_file()

const std::string SelFile::get_current_file ( )

Get the filename of the current line.

If the current line "pointer" is at the end of the file or is pointing to a comment then an empty string is returned.

fn = sel.get_current_file();

Definition at line 647 of file selfile.cpp.

648 {
649  if (current_line == text_line.end())
650  return "";
651  if ((*current_line).line_type != SelLine::DATALINE)
652  return "";
653  return (*current_line).text;
654 }

◆ get_current_line()

SelLine SelFile::get_current_line ( )
inline

Get current line.

Definition at line 621 of file selfile.h.

622  {
623  return *current_line;
624  }

◆ get_file_number()

const std::string SelFile::get_file_number ( int  i)

Get the filename at the ACTIVE line number i.

The first file is number 0. If i is greater than the total number of ACTIVE files, then "" is returned.

fn = sel.get_file_number(i);

Definition at line 657 of file selfile.cpp.

658 {
659  if (i < 0)
660  return "";
661  std::vector<SelLine>::iterator current = text_line.begin();
662  std::vector<SelLine>::iterator last = text_line.end();
663 
664  int currenti = 0;
665  while (current != last)
666  {
667  if ((*current).line_type == SelLine::DATALINE &&
668  (*current).label == SelLine::ACTIVE)
669  currenti++;
670  if (currenti > i)
671  return (*current).text;
672  current++;
673  }
674  return "";
675 }
const SelLine & current()
Definition: selfile.h:526
#define i

◆ go_beginning()

void SelFile::go_beginning ( )
inline

Go to the beginning of the file.

Moves the pointer to the first line of the file either it is a comment, an active image or a discarded one.

sel.go_beginning();

Definition at line 364 of file selfile.h.

365  {
366  current_line = text_line.begin();
367  }

◆ go_first_ACTIVE()

void SelFile::go_first_ACTIVE ( )
inline

Go to the first ACTIVE image.

Moves the pointer to the first active image in the file.

sel.go_first_ACTIVE();

Definition at line 377 of file selfile.h.

378  {
379  go_beginning();
380  adjust_to_label(SelLine::ACTIVE);
381  }
void go_beginning()
Definition: selfile.h:364

◆ ImgNo()

int SelFile::ImgNo ( SelLine::Label  label = SelLine::ACTIVE) const

Number of images inside a selection file with a certain label.

This function returns the number of images inside the selection file with a given label. By default this label is ACTIVE.

std::cout << "There are " << sel.ImgNo() << " active images\n";
std::cout << "There are " << sel.ImgNo(SelLine::ACTIVE)
<< " active images\n";
std::cout << "There are " << sel.ImgNo(SelLine::DISCARDED)
<< " discarded images\n";

Definition at line 589 of file selfile.cpp.

590 {
591  int N = 0;
592  std::vector<SelLine>::const_iterator current = text_line.begin();
593  std::vector<SelLine>::const_iterator last = text_line.end();
594  while (current != last)
595  {
596  if ((*current).line_type == SelLine::DATALINE &&
597  (*current).label == label)
598  N++;
599  current++;
600  }
601  return N;
602 }
const SelLine & current()
Definition: selfile.h:526

◆ ImgSize()

void SelFile::ImgSize ( int &  Ydim,
int &  Xdim 
)

Returns the size of the images inside.

The filenames within a selection file are supposed to be for SPIDER images, this function opens one of the images (an active one) and returns the size of that image, supposed to be the same for the rest of the images in the selection file.

An exception is thrown if the first valid image in the selfile, doesn't exist in the disk or it is not a XMIPP image.

sel.ImgSize(y, x);

◆ insert() [1/2]

void SelFile::insert ( const std::string &  img_name,
SelLine::Label  label = SelLine::ACTIVE 
)

Insert image before current line.

There is no checking for the previous existence of the img. The current line is still pointing to the same line as it was before entering the function.

sel.insert("g1ta0000");
sel.insert("g1ta0000", SelLine::DISCARDED);

Definition at line 759 of file selfile.cpp.

760 {
761  SelLine temp;
762  temp.line_type = SelLine::DATALINE;
763  temp.text = img_name;
764  temp.label = label;
765  if (label != SelLine::DISCARDED)
766  no_imgs++;
767 
768  // Insert and updates current_line
769  current_line = text_line.insert(current_line, temp);
770  current_line++;
771 }

◆ insert() [2/2]

void SelFile::insert ( const SelLine _selline)

Insert line before current line.

It is checked that the line is either a comment or data line, in this case that the label is right, too. The current line is still pointing to the same line as it was before entering the function.

sel.insert("g1ta0000", SelLine::ACTIVE);

Definition at line 774 of file selfile.cpp.

775 {
776  if (_selline.line_type != SelLine::DATALINE &&
777  _selline.line_type != SelLine::COMMENT)
778  REPORT_ERROR(ERR_SELFILE, "SelFile::insert(SelLine): SelLine type not valid");
779  if (_selline.line_type == SelLine::DATALINE)
780  if (_selline.label != SelLine::DISCARDED &&
781  _selline.label != SelLine::ACTIVE)
782  REPORT_ERROR(ERR_SELFILE, "SelFile::insert(SelLine): SelLine label not valid");
783 
784  // Sjors 18sep06: added next line
785  if (_selline.label != SelLine::DISCARDED)
786  no_imgs++;
787 
788  // Insert and updates current_line
789  current_line = text_line.insert(current_line, _selline);
790  current_line++;
791 }
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
Error in docfile format.
Definition: xmipp_error.h:186

◆ insert_comment()

void SelFile::insert_comment ( const std::string &  comment)

Insert a comment before the current line.

Comments must not start with any special character since a "#" is automatically added at the beginning of the line. The current line is still pointing to the same line as it was before entering the function.

sel.insert_comment("This is a comment");

Definition at line 794 of file selfile.cpp.

795 {
796  SelLine temp;
797  temp.line_type = SelLine::COMMENT;
798  temp.text = "# " + comment;
799  temp.label = SelLine::DISCARDED;
800 
801  // Insert and updates current_line
802  current_line = text_line.insert(current_line, temp);
803  current_line++;
804 }

◆ Is_ACTIVE()

int SelFile::Is_ACTIVE ( ) const
inline

True if current line is a data line and it is active.

Definition at line 503 of file selfile.h.

504  {
505  return current_line->Is_data() &&
506  current_line->get_label() == SelLine::ACTIVE;
507  }

◆ Is_COMMENT()

int SelFile::Is_COMMENT ( ) const
inline

True if current line is a comment.

Definition at line 519 of file selfile.h.

520  {
521  return current_line->Is_comment();
522  }

◆ Is_DISCARDED()

int SelFile::Is_DISCARDED ( ) const
inline

True if current line is a data line and it is active.

Definition at line 511 of file selfile.h.

512  {
513  return current_line->Is_data() &&
514  current_line->get_label() == SelLine::DISCARDED;
515  }

◆ jump()

void SelFile::jump ( int  count,
SelLine::Label  label = SelLine::ACTIVE 
)

Jump over a number of lines with a given label.

Starting from the current_line "pointer" this function skips a given number of entries with a certain label. For instance, jump over 1 active image is to jump to the next active image. Jump over 2 active images is to jump to the next of the next active image, and so on. You can give as label DISCARDED, too. The number of images to jump must always be positive, the jump cannot be done backwards.

sel.jump(2); // Jump over 2 active images
sel.jump(2, SelLine::ACTIVE); // The same
sel.jump(2, SelLine::DISCARDED) // Jump over 2 discarded images

Definition at line 540 of file selfile.cpp.

541 {
542  adjust_to_label(label);
543  for (int i = 0; i < how_many; i++)
544  if (current_line != text_line.end())
545  {
546  current_line++;
547  adjust_to_label(label);
548  }
549 }
#define i

◆ jump_lines()

bool SelFile::jump_lines ( int  count)

Jump over a number of lines disregarding the label.

Returns false if the end of the line is reached before the number of requested line jumps

Definition at line 527 of file selfile.cpp.

528 {
529  for (int i = 0; i < how_many; i++)
530  {
531  if (current_line != text_line.end())
532  current_line++;
533  else
534  return false;
535  }
536  return true;
537 }
#define i

◆ LineNo()

int SelFile::LineNo ( )

Returns the number of lines within a file.

This function gives the total number of lines (including comments) within a file.

std::cout << "There are " << sel.LineNo() << " lines in this file\n";

Definition at line 605 of file selfile.cpp.

606 {
607  int N = 0;
608  std::vector<SelLine>::iterator current = text_line.begin();
609  std::vector<SelLine>::iterator last = text_line.end();
610  while (current != last)
611  {
612  N++;
613  current++;
614  }
615  return N;
616 }
const SelLine & current()
Definition: selfile.h:526

◆ MaxFileNameLength()

int SelFile::MaxFileNameLength ( )

Returns the maximum length of an active filename inside the selfile.

The current pointer is not moved.

Definition at line 632 of file selfile.cpp.

633 {
634  std::vector<SelLine>::iterator aux = current_line;
635  size_t max_length = 0;
636  go_first_ACTIVE();
637  while (!eof())
638  {
639  FileName fn = NextImg();
640  max_length = XMIPP_MAX(max_length, fn.length());
641  }
642  current_line = aux;
643  return max_length;
644 }
#define XMIPP_MAX(x, y)
Definition: xmipp_macros.h:193
void go_first_ACTIVE()
Definition: selfile.h:377
int eof()
Definition: selfile.h:485
const std::string & NextImg(SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:516

◆ merge() [1/2]

void SelFile::merge ( const FileName name)

Merge a file from disk with an already read one.

All lines (except the comments) in the selection file to be read are either added at the end of the already read one if they are not present at them, either ignored if they are already present, or marked with a comment if the corresponding image name is present in both files but with different labels (active, discarded), in this case the image remains active but a comment in the preceding line informs you of the situation.

sel.read("g1t.sel");
sel.merge("g2t.sel");

Definition at line 254 of file selfile.cpp.

255 {
256  SelFile SF(sel_name);
257  *this = *this + SF;
258  go_first_ACTIVE();
259 }
void go_first_ACTIVE()
Definition: selfile.h:377

◆ merge() [2/2]

void SelFile::merge ( SelFile sel)

Merge this file with another selfile.

Definition at line 308 of file selfile.cpp.

309 {
310  std::vector<SelLine>::iterator current = SF.text_line.begin();
311  std::vector<SelLine>::iterator last = SF.text_line.end();
312  std::vector<SelLine>::iterator found;
313 
314  SelLine discrepancy;
315  discrepancy.line_type = SelLine::COMMENT;
316  discrepancy.text = "# There were discrepancy in the tags for next line, the "
317  "ACTIVE state is kept";
318 
319  while (current != last)
320  {
321  if ((*current).line_type != SelLine::DATALINE)
322  {
323  current++;
324  continue;
325  }
326  if ((found = find((*current).text)) == text_line.end())
327  {
328  // New image not found in the whole Sel File.
329  // Add it if it is not discarded
330  if ((*current).label != SelLine::DISCARDED)
331  {
332  text_line.push_back(*current);
333  no_imgs++;
334  }
335  }
336  else
337  // New image is found, check that its line is not going
338  // to be removed, if it is add it again; else, check if
339  // there is a discrepancy between them
340  if ((*found).label != (*current).label)
341  {
342  if ((*found).label < (*current).label)
343  {
344  (*found).label = SelLine::ACTIVE;
345  no_imgs++;
346  }
347  text_line.insert(found, 1, discrepancy);
348  }
349  current++;
350  }
351 }
const SelLine & current()
Definition: selfile.h:526

◆ mpi_select_part()

void SelFile::mpi_select_part ( int  rank,
int  size,
int &  num_img_tot 
)

For MPI-parallelized runs: select relevant part of selfile for that rank.

Definition at line 425 of file selfile.cpp.

426 {
427 
428  (*this).clean_comments();
429  (*this).clean();
430  num_img_tot = (*this).ImgNo();
431  int remaining = num_img_tot % size;
432  int Npart = (int)(num_img_tot - remaining) / size;
433  int myFirst, myLast;
434  if (rank < remaining)
435  {
436  myFirst = rank * (Npart + 1);
437  myLast = myFirst + Npart;
438  }
439  else
440  {
441  myFirst = rank * Npart + remaining;
442  myLast = myFirst + Npart - 1;
443  }
444  // Now discard all images in Selfile that are outside myFirst-myLast
445  (*this).go_beginning();
446  SelFile SFpart = *this;
447  SFpart.clear();
448  for (int nr = myFirst; nr <= myLast; nr++)
449  {
450  (*this).go_beginning();
451  (*this).jump_lines(nr);
452  SFpart.insert((*this).current());
453  }
454  *this = SFpart;
455 
456 }
void clear()
Definition: selfile.cpp:130
void insert(const std::string &img_name, SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:759

◆ mpi_select_part2()

void SelFile::mpi_select_part2 ( int  rank,
int  size,
int &  num_img_tot,
int  mpi_job_size 
)

For MPI-parallelized runs: select relevant part of selfile for that rank. I just do not undertand the previous function

Definition at line 458 of file selfile.cpp.

462 { // jobNumber process number
463  // total number of processes
464  // total number of images
465 
466  (*this).clean_comments();
467  (*this).clean();
468  totalNumImg = (*this).ImgNo();
469  int myFirst = jobNumber * mpi_job_size;
470  int myLast = myFirst + mpi_job_size-1;
471  while ((myLast+1) > totalNumImg)
472  {
473  myLast = totalNumImg-1;
474  }
475  // Now discard all images in Selfile that are outside myFirst-myLast
476  (*this).go_beginning();
477  SelFile SFpart = *this;
478  SFpart.clear();
479  for (int nr = myFirst; nr <= myLast; nr++)
480  {
481  (*this).go_beginning();
482  (*this).jump_lines(nr);
483  SFpart.insert((*this).current());
484  }
485  *this = SFpart;
486 }
void clear()
Definition: selfile.cpp:130
void insert(const std::string &img_name, SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:759

◆ name()

FileName SelFile::name ( ) const
inline

Returns the name of the file.

Definition at line 496 of file selfile.h.

497  {
498  return fn_sel;
499  }

◆ next()

void SelFile::next ( )
inline

Move the current pointer to the next image, disregarding its label.

It doesn't matter if next image is ACTIVE or DISCARDED, this function moves the current pointer to it.

sel.go_beginning();
while (!sel.eof())
{
std::cout << sel.current();
sel.next();
}

Definition at line 429 of file selfile.h.

430  {
431  current_line++;
432  }

◆ NextImg()

const std::string & SelFile::NextImg ( SelLine::Label  label = SelLine::ACTIVE)

Returns the name of the next image with a certain label.

The default label is ACTIVE, ie, by default this function returns the name of the next ACTIVE image. But you can give as label DISCARDED and the function will return the name of the next DISCARDED image starting at the current position of the current_line "pointer". If the file is at the end of the selection file, "" is returned. After this function the "pointer" is actually pointing to the next line following the returned image name.

name = sel.NextImg(); // Next active image
name = sel.NextImg(SelLine::DISCARDED); // Next discarded image

Definition at line 516 of file selfile.cpp.

517 {
518  adjust_to_label(label);
519  static const std::string emptyString;
520  if (current_line != text_line.end())
521  return (*current_line++).text;
522  else
523  return emptyString;
524 }

◆ operator()()

const std::string& SelFile::operator() ( int  i)
inline

Get the line i in the file

Get the line i in the file for reading. If it is a comment the comment text is returned. If it is an image, the image filename. The first line in the file is number 0.

std::cout << sel(0) << std::endl;

Definition at line 393 of file selfile.h.

394  {
395  return text_line[i].text;
396  }
#define i

◆ operator+()

SelFile SelFile::operator+ ( SelFile sel)

Merge two already read files.

sel1.read("g1t.sel");
sel2.merge("g2t.sel");
sel1 = sel1 + sel2;

Definition at line 355 of file selfile.cpp.

356 {
357  SelFile result;
358  result = *this;
359  result.merge(SF);
360  return result;
361 }
void merge(const FileName &name)
Definition: selfile.cpp:254

◆ random_discard()

SelFile SelFile::random_discard ( int  N)

Discard randomly N images.

A set of N images are discarded from the actual selection file. If N is equal or greater than the actual number of images within the file, all images are discarded. Comments are kept at their original positions. The current line of the resulting selection file is placed at the beginning of the file.

sel2 = sel1.random_discard(3);

Definition at line 844 of file selfile.cpp.

845 {
846  SelFile result;
847  int i, rnd_indx;
848 
850  result = *this;
851  N = std::min(N, no_imgs);
852  for (i = 0; i < N; i++)
853  {
854  // Jump a random number from the beginning
855  rnd_indx = (int) rnd_unif(0, result.no_imgs);
856  result.go_first_ACTIVE();
857  result.jump(rnd_indx, label);
858 
859  // Discard that image
860  (*(result.current_line)).label = SelLine::DISCARDED;
861 
862  // Decrease the number of images such that next time
863  result.no_imgs--;
864  }
865 
866  result.go_beginning();
867  return result;
868 }
void min(Image< double > &op1, const Image< double > &op2)
void go_first_ACTIVE()
Definition: selfile.h:377
#define i
double rnd_unif()
void go_beginning()
Definition: selfile.h:364
void jump(int count, SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:540

◆ randomize()

SelFile SelFile::randomize ( )

Alter order in sel file.

A new selection file with all the images of the actual object (either they are active or discarded) is created, but this time all images are in a random order. The comments of the original selection file are lost in the new copy. The current line of the resulting selection file is placed at the beginning of the file.

sel2 = sel1.randomize();

Definition at line 816 of file selfile.cpp.

817 {
818  SelFile result, aux;
819  int i;
820  int rnd_indx;
821 
823  if (no_imgs == 0)
824  return aux;
825  aux = *this;
826  for (i = no_imgs; i > 0; i--)
827  {
828  // Jump a random number from the beginning
829  rnd_indx = (int) rnd_unif(0, i);
830  aux.go_first_ACTIVE();
831  aux.jump(rnd_indx);
832  result.text_line.push_back(*(aux.current_line));
833  (*aux.current_line).line_type = SelLine::NOT_CONSIDERED;
834  }
835 
836  // Adjust remaining fields
837  result.no_imgs = no_imgs;
838  result.current_line = result.text_line.begin();
839  return result;
840 }
void go_first_ACTIVE()
Definition: selfile.h:377
#define i
double rnd_unif()
void jump(int count, SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:540
unsigned int randomize_random_generator()

◆ randomSubset()

SelFile SelFile::randomSubset ( int  subsetN,
bool  withReplacement = true 
)

Random subset.

A new selection file is created with the number of images specified in subsetN. The new subset can be created with or without replacement.

◆ read()

void SelFile::read ( const FileName name,
int  overrinding = 1 
)

Read a file from disk.

The old information on the variable is overwritten. An exception is thrown if the file doesn't exist. Lines which do not fit the comment structure or the "image-label" structure are ignored. The image name is limited to MAX_FILENAME_LENGTH characters. After reading the selfile pointer is moved to the first ACTIVE image.

sel.read("g2t.sel");

Definition at line 196 of file selfile.cpp.

197 {
198  SelLine temp;
199  std::ifstream fh_sel;
200  int line_no = 1;
201 
202  // Empties current SelFile
203  if (overriding)
204  clear();
205 
206  // Open file
207  else
208  {
209  // Read normal selfile
210  fh_sel.open(sel_name.c_str(), std::ios::in);
211  if (!fh_sel)
212  REPORT_ERROR(ERR_IO_NOTEXIST, sel_name);
213 
214  // Read each line and keep it in the list of the SelFile object
215  fh_sel.peek();
216  while (!fh_sel.eof())
217  {
218  try
219  {
220  fh_sel >> temp;
221  }
222  catch (XmippError &e)
223  {
224  std::cout << "Sel file: Line " << line_no << " is skipped due to an error\n";
225  }
226  switch (temp.line_type)
227  {
228  case SelLine::NOT_ASSIGNED: break; // Line with an error
229  case SelLine::DATALINE:
230  if (temp.label != SelLine::DISCARDED)
231  no_imgs++;
232  text_line.push_back(temp);
233  break;
234  case SelLine::COMMENT:
235  text_line.push_back(temp);
236  break;
237  default:
238  break;
239  }
240  line_no++;
241  fh_sel.peek();
242  }
243 
244  // Close file
245  fh_sel.close();
246  }
247 
248  // Set "pointer" to the beginning of the file
249  if (overriding)
250  fn_sel = sel_name;
251  go_first_ACTIVE();
252 }
void clear()
Definition: selfile.cpp:130
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
void go_first_ACTIVE()
Definition: selfile.h:377
int in
File or directory does not exist.
Definition: xmipp_error.h:136

◆ remove()

void SelFile::remove ( const std::string &  img_name)

Removes an image from the selection file.

This function searches for an image in the selection file, if it is found then the corresponding line is deleted. If the image is actually being pointed by the current line, then the current line is now the following line.

sel.remove("g1ta0001");

Definition at line 678 of file selfile.cpp.

679 {
680  std::vector<SelLine>::iterator aux = find(img_name);
681  std::vector<SelLine>::iterator temp;
682  if (aux != text_line.end())
683  {
684  if (aux == current_line)
685  {
686  temp = current_line;
687  temp++;
688  }
689  else
690  temp = current_line;
691  if ((*aux).line_type == SelLine::DATALINE)
692  no_imgs--;
693  text_line.erase(aux);
694  current_line = temp;
695  }
696 }

◆ remove_current()

void SelFile::remove_current ( )

Removes actual line.

This function removes the current line, either it is a comment or an image. The current line "pointer" is moved to the following line in the file.

sel.remove_current();

Definition at line 699 of file selfile.cpp.

700 {
701  if (current_line != text_line.end())
702  {
703  std::vector<SelLine>::iterator temp;
704  temp = current_line;
705  temp++;
706  if ((*current_line).line_type == SelLine::DATALINE)
707  no_imgs--;
708  text_line.erase(current_line);
709  current_line = temp;
710  }
711 }

◆ reserve()

void SelFile::reserve ( int  n)
inline

Reserve memory for N entries.

It doesn't matter if entries are comments or data. The current line is set to the beginning of the SelFile

Definition at line 219 of file selfile.h.

220  {
221  text_line.reserve(n);
222  current_line = text_line.begin();
223  }
int * n

◆ search()

void SelFile::search ( const std::string &  img_name)
inline

Move "pointer" to a certain image filename.

This function searches for an image name within the file, and locate the current line "pointer" pointing to that line. If the image name is not present (it is not the same "not present" and "discarded") in the selection file, then the pointer is moved to the end of the selection file. You can check this situation using eof(). It doesn't matter if the current line "pointer" before the function call is after the line where the image name is, this function makes a search all over the file, regardless the previous situation of the current line "pointer".

sel.search("g1ta0001");

Definition at line 473 of file selfile.h.

474  {
475  current_line = find(img_name);
476  }

◆ set()

void SelFile::set ( const std::string &  img_name,
SelLine::Label  label 
)

Set label of an image.

This function searches for an image inside the selection file and sets its label to the given label. If the image is not found in the file, then it is added at the end with the given label. The current line pointer is not modified.

sel.set("g1ta0001", SelLine::ACTIVE);

Definition at line 714 of file selfile.cpp.

715 {
716  SelLine temp;
717  std::vector<SelLine>::iterator aux = find(img_name);
718  if (aux == text_line.end())
719  {
720  temp.line_type = SelLine::DATALINE;
721  temp.text = img_name;
722  temp.label = label;
723  text_line.push_back(temp);
724  if (label != SelLine::DISCARDED)
725  no_imgs++;
726  }
727  else
728  {
729  if ((*aux).label != label)
730  {
731  (*aux).label = label;
732  if (label != SelLine::DISCARDED)
733  no_imgs++;
734  }
735  }
736 }

◆ set_current()

void SelFile::set_current ( SelLine::Label  label)

Set the label of the current file.

The same as the previous function but the label is set to the file currently pointed.

Definition at line 739 of file selfile.cpp.

740 {
741  if ((*current_line).label != label)
742  {
743  (*current_line).label = label;
744  if (label != SelLine::DISCARDED)
745  no_imgs++;
746  }
747 }

◆ set_current_filename()

void SelFile::set_current_filename ( const FileName fn_new)

Change current filename.

This function changes the current filename to a new one if it is not a comment. If it is a comment line, nothing is done.

Definition at line 750 of file selfile.cpp.

751 {
752  if ((*current_line).line_type == SelLine::DATALINE)
753  {
754  (*current_line).text = fn_new;
755  }
756 }

◆ sort_by_filenames()

SelFile SelFile::sort_by_filenames ( )

Sort images in ascending order.

All images are sorted in ascending order either they are active or discarded. All comments are gathered at the end of the resulting selection file. The current line of the resulting selection file is placed at the beginning of the file.

sel2 = sel1.sort_by_filenames();

Definition at line 807 of file selfile.cpp.

808 {
809  SelFile result(*this);
810  sort(result.text_line.begin(), result.text_line.end());
811  result.current_line = result.text_line.begin();
812  return result;
813 }
void sort(struct DCEL_T *dcel)
Definition: sorting.cpp:18

◆ split_in_N()

void SelFile::split_in_N ( int  N,
std::vector< SelFile > &  parts 
)

Split this file into N random parts of approximately the same size.

Definition at line 391 of file selfile.cpp.

392 {
393  // Randomize input data
394  SelFile SFtmp, SFrnd;
395  SFrnd = *this;
396  SFtmp = SFrnd.randomize();
397  SFtmp.go_beginning();
398  int Nimg = SFtmp.ImgNo();
399  SF.clear();
400 
401  // Create space for all SelFiles
402  for (int n = 0; n < N; n++)
403  {
404  auto *ptr_SF = new SelFile;
405  ptr_SF->reserve(CEIL(Nimg / N));
406  SF.push_back(*ptr_SF);
407  }
408 
409  // Distribute images
410  int n = 0;
411  for (int i = 0;i < Nimg; i++)
412  {
413  SF[n].insert(SFtmp.current());
414  n = (n + 1) % N;
415  if (i < Nimg - 1)
416  SFtmp.NextImg();
417  }
418 
419  // Sort the Selfiles
420  for (int n = 0; n < N; n++)
421  SF[n] = SF[n].sort_by_filenames();
422 }
int ImgNo(SelLine::Label label=SelLine::ACTIVE) const
Definition: selfile.cpp:589
const SelLine & current()
Definition: selfile.h:526
SelFile randomize()
Definition: selfile.cpp:816
#define i
const std::string & NextImg(SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:516
SelFile()
Definition: selfile.cpp:122
SelFile sort_by_filenames()
Definition: selfile.cpp:807
#define CEIL(x)
Definition: xmipp_macros.h:225
void go_beginning()
Definition: selfile.h:364
int * n

◆ split_in_two()

void SelFile::split_in_two ( SelFile sel1,
SelFile sel2 
)

Split this file into two random halves.

Definition at line 364 of file selfile.cpp.

365 {
366  SelFile SFtmp;
367  SF1 = *this;
368  SFtmp = SF1.randomize();
369  SF1.clear();
370  int N = SFtmp.ImgNo();
371  SF1.reserve(N);
372  SF2.reserve(N);
373  int half = N / 2;
374  SFtmp.go_beginning();
375  for (int i = 0;i < N; i++)
376  {
377  if (i < half)
378  SF1.insert(SFtmp.current());
379  else
380  SF2.insert(SFtmp.current());
381  if (i < N - 1)
382  SFtmp.NextImg();
383  }
384  SFtmp = SF1.sort_by_filenames();
385  SF1 = SFtmp;
386  SFtmp = SF2.sort_by_filenames();
387  SF2 = SFtmp;
388 }
void clear()
Definition: selfile.cpp:130
int ImgNo(SelLine::Label label=SelLine::ACTIVE) const
Definition: selfile.cpp:589
const SelLine & current()
Definition: selfile.h:526
SelFile randomize()
Definition: selfile.cpp:816
#define i
const std::string & NextImg(SelLine::Label label=SelLine::ACTIVE)
Definition: selfile.cpp:516
SelFile sort_by_filenames()
Definition: selfile.cpp:807
void go_beginning()
Definition: selfile.h:364

◆ write()

void SelFile::write ( const FileName sel_name = "")

Write a selection file to disk.

If you give a name then it becomes like a "Save as ..." and from this point on the name of the selection file has changed.

sel.write(); // Save
sel.write("g3t.sel"); // Save as ...

Definition at line 262 of file selfile.cpp.

263 {
264  std::ofstream fh_sel;
265  std::vector<SelLine>::iterator current = text_line.begin();
266  std::vector<SelLine>::iterator last = text_line.end();
267 
268  if (strcmp(sel_name.c_str(), "") != 0)
269  fn_sel = sel_name;
270  // Don't use sel_name=="" because it wastes memory
271 #ifdef NEVEREVERROB
272  if (sel_name.find(IMAGIC_TAG) == 0)
273  {
274  // Write Imagic selfile
275  const FileName hed_fname = sel_name.substr(IMAGIC_TAG_LEN);
276  std::vector<Image *> imgs;
277  for (; current != last; current++)
278  {
279  Image *img;
280  if (current->Is_data() && (current->get_label() == SelLine::ACTIVE) &&
281  (img = Image::LoadImage(current->get_text())))
282  imgs.push_back(img);
283  }
284  if (!ImagicWriteImagicFile(hed_fname, imgs))
285  REPORT_ERROR(1553, "Error writing selfile to Imagic file " + sel_name);
286  for (std::vector<Image *>::iterator i = imgs.begin(); i != imgs.end(); i++)
287  delete(*i);
288  }
289  else
290 #endif
291  {
292  // Write Xmipp selfile
293  // Open file
294  fh_sel.open(fn_sel.c_str(), std::ios::out);
295  if (!fh_sel)
296  REPORT_ERROR(ERR_IO_NOWRITE, fn_sel);
297 
298  // Read each line and keep it in the list of the SelFile object
299  while (current != last)
300  fh_sel << *(current++);
301 
302  // Close file
303  fh_sel.close();
304  }
305 }
#define REPORT_ERROR(nerr, ErrormMsg)
Definition: xmipp_error.h:211
const SelLine & current()
Definition: selfile.h:526
Couldn&#39;t write to file.
Definition: xmipp_error.h:140
#define i

Friends And Related Function Documentation

◆ compare

SelFile compare ( SelFile SF1,
SelFile SF2,
const int  mode 
)
friend

Compare two selection files.

The result is another selection file. At the beginning of it there is information about the number of active and discarded images on both input selection files, about the number of matching files (a file is said to match if it is active in both selection files), the number of active files which are only in the first selection file, and the number of active files which are only in the second. Then goes the list of matching files, the list of files only in SF1 and the list of files only in SF2. There are comments enough to know where things start and finish, and what the numbers are at the beginning. If a file is active in a file and discarded in the other, then it is said to match and it is kept as active, a preceding comment warns of this situation.

if mode<0, output file will contain all information if mode=0, output will be selfile with overlapping images if mode=1, output will be selfile with images only in file 1 if mode=2, output will be selfile with images only in file 2

sel3 = compare(sel1, sel2);

Definition at line 872 of file selfile.cpp.

873 {
874  std::vector<SelLine> only_in_SF1;
875  std::vector<SelLine> only_in_SF2;
876  std::vector<SelLine> in_both;
877  SelFile result;
878  SelLine temp;
879  int SF1_discarded = 0, SF2_discarded = 0;
880  int maxLen = 15;
881  char str[maxLen + 1]; // + terminating null character
882 
883  // Search in File 1
884  std::vector<SelLine>::iterator current = SF1.text_line.begin();
885  std::vector<SelLine>::iterator last = SF1.text_line.end();
886  std::vector<SelLine>::iterator last_SF = SF2.text_line.end();
887  std::vector<SelLine>::iterator found;
888 
889  while (current != last)
890  {
891  // Skip if not active
892  if ((*current).line_type != SelLine::DATALINE)
893  {
894  current++;
895  continue;
896  }
897  if ((*current).label == SelLine::DISCARDED)
898  {
899  SF1_discarded++;
900  current++;
901  continue;
902  }
903 
904  // Try to find this archive into Sel File 2
905  found = SF2.find((*current).text);
906  if (found == last_SF)
907  only_in_SF1.push_back(*current);
908  else
909  if ((*found).label == SelLine::DISCARDED)
910  only_in_SF1.push_back(*current);
911  else
912  in_both.push_back(*current);
913  current++;
914  }
915 
916  // Search in File 2
917  current = SF2.text_line.begin();
918  last = SF2.text_line.end();
919 
920  while (current != last)
921  {
922  // Skip if not active
923  if ((*current).line_type != SelLine::DATALINE)
924  {
925  current++;
926  continue;
927  }
928  if ((*current).label == SelLine::DISCARDED)
929  {
930  SF2_discarded++;
931  current++;
932  continue;
933  }
934 
935  // Try to find this archive into Sel File 2
936  found = find(in_both, (*current).text);
937  if (found != in_both.end())
938  {
939  current++;
940  continue;
941  }
942  only_in_SF2.push_back(*current);
943  current++;
944  }
945 
946  // Write Statistics
947  if (mode < 0)
948  {
949  temp.line_type = SelLine::COMMENT;
950  temp.label = SelLine::DISCARDED;
951  temp.text = "# Statistics of comparison";
952  result.text_line.push_back(temp);
953  temp.text = "# -------------------------------------------------------------";
954  result.text_line.push_back(temp);
955  snprintf(str, maxLen, "%6d", SF1.no_imgs);
956  temp.text = "# File 1: " + SF1.fn_sel + "(VALID: " + str;
957  snprintf(str, maxLen, "%6d", SF1_discarded);
958  temp.text += (std::string) " DISCARDED: " + str + ")";
959  result.text_line.push_back(temp);
960  snprintf(str, maxLen, "%6d", SF2.no_imgs);
961  temp.text = "# File 2: " + SF2.fn_sel + "(VALID: " + str;
962  snprintf(str, maxLen, "%6d", SF2_discarded);
963  temp.text += (std::string) " DISCARDED: " + str + ")";
964  result.text_line.push_back(temp);
965  temp.text = "";
966  result.text_line.push_back(temp);
967  snprintf(str, maxLen, "%6lu", (unsigned long int)in_both.size());
968  temp.text = (std::string)"# Matching Files: " + str;
969  result.text_line.push_back(temp);
970  snprintf(str, maxLen, "%6lu", (unsigned long int)only_in_SF1.size());
971  temp.text = (std::string)"# Only in file 1: " + str;
972  result.text_line.push_back(temp);
973  snprintf(str, maxLen, "%6lu", (unsigned long int)only_in_SF2.size());
974  temp.text = (std::string)"# Only in file 2: " + str;
975  result.text_line.push_back(temp);
976  temp.text = "# -------------------------------------------------------------";
977  result.text_line.push_back(temp);
978 
979  // Write files in both
980  temp.text = "";
981  result.text_line.push_back(temp);
982  temp.text = "# Files in both .sel files";
983  result.text_line.push_back(temp);
984  }
985  if (mode<0 || mode==0)
986  {
987  current = in_both.begin();
988  last = in_both.end();
989  while (current != last)
990  result.text_line.push_back(*current++);
991  }
992 
993  if (mode<0)
994  {
995  // Write files only in Sel File 1
996  temp.text = "";
997  result.text_line.push_back(temp);
998  temp.text = "# Files only in the first file";
999  result.text_line.push_back(temp);
1000  }
1001  if (mode<0 || mode==1)
1002  {
1003  current = only_in_SF1.begin();
1004  last = only_in_SF1.end();
1005  while (current != last)
1006  result.text_line.push_back(*current++);
1007  }
1008 
1009  if (mode<0)
1010  {
1011  // Write files only in Sel File 2
1012  temp.text = "";
1013  result.text_line.push_back(temp);
1014  temp.text = "# Files only in the second file";
1015  result.text_line.push_back(temp);
1016  }
1017  if (mode<0 || mode==2)
1018  {
1019  current = only_in_SF2.begin();
1020  last = only_in_SF2.end();
1021  while (current != last)
1022  result.text_line.push_back(*current++);
1023  }
1024  // Adjust the remaining fields
1025  if (mode<0)
1026  result.no_imgs = in_both.size() + only_in_SF1.size() + only_in_SF2.size();
1027  else if (mode==0)
1028  result.no_imgs = in_both.size();
1029  else if (mode==1)
1030  result.no_imgs = only_in_SF1.size();
1031  else if (mode==2)
1032  result.no_imgs = only_in_SF2.size();
1033  result.current_line = result.text_line.begin();
1034 
1035  return result;
1036 }
const SelLine & current()
Definition: selfile.h:526
void mode

◆ operator<<

std::ostream& operator<< ( std::ostream &  o,
const SelFile sel 
)
friend

Show a selection file.

Shows all the lines either they are comments, active images or discarded images. A new line is printed at the end.

std::cout << sel;

Definition at line 145 of file selfile.cpp.

146 {
147  std::vector<SelLine>::const_iterator current = SF.text_line.begin();
148  std::vector<SelLine>::const_iterator last = SF.text_line.end();
149  while (current != last)
150  {
151  o << *current;
152  current++;
153  }
154  return o;
155 }
const SelLine & current()
Definition: selfile.h:526

The documentation for this class was generated from the following files: