34 #include <shared_mutex> 36 namespace fs = std::filesystem;
47 case bond_type::sing:
return "sing";
48 case bond_type::doub:
return "doub";
49 case bond_type::trip:
return "trip";
50 case bond_type::quad:
return "quad";
51 case bond_type::arom:
return "arom";
52 case bond_type::poly:
return "poly";
53 case bond_type::delo:
return "delo";
56 throw std::invalid_argument(
"Invalid bondType");
62 return bond_type::sing;
64 return bond_type::doub;
66 return bond_type::trip;
68 return bond_type::quad;
70 return bond_type::arom;
72 return bond_type::poly;
74 return bond_type::delo;
77 throw std::invalid_argument(
"Invalid bondType: " + bondType);
85 bool operator()(
const compound_atom &
a,
const compound_atom &
b)
const 87 int d = a.id.compare(b.id);
89 d = a.type_symbol - b.type_symbol;
96 bool operator()(
const compound_bond &
a,
const compound_bond &
b)
const 98 int d = a.atom_id[0].compare(b.atom_id[0]);
100 d = a.atom_id[1].compare(b.atom_id[1]);
102 d =
static_cast<int>(a.type) - static_cast<int>(b.type);
110 compound::compound(cif::datablock &db)
112 auto &chemComp = db[
"chem_comp"];
114 if (chemComp.size() != 1)
115 throw std::runtime_error(
"Invalid compound file, chem_comp should contain a single row");
117 cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge) =
118 chemComp.front().get(
"id",
"name",
"type",
"formula",
"formula_weight",
"pdbx_formal_charge");
123 m_group =
"non-polymer";
125 auto &chemCompAtom = db[
"chem_comp_atom"];
126 for (
auto row : chemCompAtom)
129 std::string type_symbol;
130 cif::tie(atom.id, type_symbol, atom.charge, atom.aromatic, atom.leaving_atom, atom.stereo_config, atom.x, atom.y, atom.z) =
131 row.get(
"atom_id",
"type_symbol",
"charge",
"pdbx_aromatic_flag",
"pdbx_leaving_atom_flag",
"pdbx_stereo_config",
132 "model_Cartn_x",
"model_Cartn_y",
"model_Cartn_z");
133 atom.type_symbol = atom_type_traits(type_symbol).type();
134 m_atoms.push_back(std::move(atom));
137 auto &chemCompBond = db[
"chem_comp_bond"];
138 for (
auto row : chemCompBond)
141 std::string valueOrder;
142 cif::tie(bond.atom_id[0], bond.atom_id[1], valueOrder, bond.aromatic, bond.stereo_config) = row.get(
"atom_id_1",
"atom_id_2",
"value_order",
"pdbx_aromatic_flag",
"pdbx_stereo_config");
144 m_bonds.push_back(std::move(bond));
148 compound::compound(cif::datablock &db,
const std::string &
id,
const std::string &name,
const std::string &
type,
const std::string &group)
154 auto &chemCompAtom = db[
"chem_comp_atom"];
155 for (
auto row : chemCompAtom)
158 std::string type_symbol;
159 cif::tie(atom.id, type_symbol, atom.charge, atom.x, atom.y, atom.z) =
160 row.get(
"atom_id",
"type_symbol",
"charge",
"x",
"y",
"z");
161 atom.type_symbol = atom_type_traits(type_symbol).type();
163 m_formal_charge += atom.charge;
164 m_formula_weight += atom_type_traits(atom.type_symbol).weight();
166 m_atoms.push_back(std::move(atom));
169 auto &chemCompBond = db[
"chem_comp_bond"];
170 for (
auto row : chemCompBond)
174 cif::tie(bond.atom_id[0], bond.atom_id[1], btype, bond.aromatic) = row.get(
"atom_id_1",
"atom_id_2",
"type",
"aromatic");
179 bond.type = bond_type::sing;
180 else if (
iequals(btype,
"double"))
181 bond.type = bond_type::doub;
182 else if (
iequals(btype,
"triple"))
183 bond.type = bond_type::trip;
185 bond.type = bond_type::delo;
189 std::cerr <<
"Unimplemented chem_comp_bond.type " << btype <<
" in " <<
id << std::endl;
190 bond.type = bond_type::sing;
192 m_bonds.push_back(std::move(bond));
196 compound_atom compound::get_atom_by_atom_id(
const std::string &atom_id)
const 198 compound_atom result = {};
199 for (
auto &
a : m_atoms)
208 if (result.id != atom_id)
209 throw std::out_of_range(
"No atom " + atom_id +
" in compound " + m_id);
214 bool compound::atoms_bonded(
const std::string &atomId_1,
const std::string &atomId_2)
const 216 auto i = find_if(m_bonds.begin(), m_bonds.end(),
217 [&](
const compound_bond &
b)
219 return (
b.atom_id[0] == atomId_1 and
b.atom_id[1] == atomId_2)
or (
b.atom_id[0] == atomId_2 and
b.atom_id[1] == atomId_1);
222 return i != m_bonds.end();
225 float compound::bond_length(
const std::string &atomId_1,
const std::string &atomId_2)
const 227 auto i = find_if(m_bonds.begin(), m_bonds.end(),
228 [&](
const compound_bond &
b)
230 return (
b.atom_id[0] == atomId_1 and
b.atom_id[1] == atomId_2)
or (
b.atom_id[0] == atomId_2 and
b.atom_id[1] == atomId_1);
235 if (
i != m_bonds.end())
237 auto a = get_atom_by_atom_id(atomId_1);
238 auto b = get_atom_by_atom_id(atomId_2);
250 const std::map<std::string, char> compound_factory::kAAMap{
275 const std::map<std::string, char> compound_factory::kBaseMap{
299 for (
auto c : m_compounds)
303 compound *
get(std::string id)
307 std::shared_lock lock(mMutex);
309 compound *result =
nullptr;
312 for (
auto impl = shared_from_this(); impl; impl = impl->m_next)
314 for (
auto cmp : impl->m_compounds)
327 if (result ==
nullptr and m_missing.count(
id) == 0)
329 for (
auto impl = shared_from_this(); impl; impl = impl->m_next)
331 result = impl->create(
id);
332 if (result !=
nullptr)
336 if (result ==
nullptr)
337 m_missing.insert(
id);
343 std::shared_ptr<compound_factory_impl>
next()
const 350 return m_known_peptides.count(resName)
or 351 (m_next and m_next->is_known_peptide(resName));
356 return m_known_bases.count(resName)
or 357 (m_next and m_next->is_known_base(resName));
361 virtual compound *
create(
const std::string &
id)
373 std::shared_ptr<compound_factory_impl>
m_next;
381 for (
const auto &[key, value] : compound_factory::kAAMap)
384 for (
const auto &[key, value] : compound_factory::kBaseMap)
391 cif::file cifFile(file);
393 if (cifFile.contains(
"comp_list"))
395 auto &compList = cifFile[
"comp_list"];
396 auto &chemComp = compList[
"chem_comp"];
398 for (
const auto &[
id, name, group] : chemComp.rows<std::string, std::string, std::string>(
"id",
"name",
"group"))
422 type =
"peptide linking";
424 type =
"L-peptide linking";
426 type =
"DNA linking";
428 type =
"RNA linking";
430 type =
"non-polymer";
432 auto &db = cifFile[
"comp_" + id];
434 m_compounds.push_back(
new compound(db,
id, name, type, group));
442 cifFile.load_dictionary(
"mmcif_pdbx.dic");
444 if (not cifFile.is_valid())
446 std::cerr <<
"The components file " << file <<
" is not valid" << std::endl;
448 std::cerr <<
"(use --verbose to see why)" << std::endl;
451 catch (
const std::exception &e)
453 std::cerr <<
"When trying to load the components file " << file <<
" there was an exception:" << std::endl
454 << e.what() << std::endl;
457 for (
auto &db : cifFile)
470 , mCompoundsFile(file)
479 compound *
create(
const std::string &
id)
override;
487 compound *result =
nullptr;
489 std::unique_ptr<std::istream> ccd;
491 if (mCompoundsFile.empty())
496 std::cerr <<
"Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data." << std::endl;
501 ccd.reset(
new std::ifstream(mCompoundsFile));
509 std::cout <<
"Creating component index " 514 cif::parser parser(*ccd, file);
515 mIndex = parser.index_datablocks();
518 std::cout <<
" done" << std::endl;
521 if (mCompoundsFile.empty())
525 throw std::runtime_error(
"Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
528 ccd.reset(
new std::ifstream(mCompoundsFile));
533 std::cout <<
"Loading component " <<
id <<
"...";
537 cif::parser parser(*ccd, file);
538 parser.parse_single_datablock(
id, mIndex);
541 std::cout <<
" done" << std::endl;
543 if (not file.empty())
545 auto &db = file.front();
548 result =
new compound(db);
550 std::shared_lock lock(
mMutex);
556 std::cerr <<
"Could not locate compound " <<
id <<
" in the CCD components file" << std::endl;
569 compound *
create(
const std::string &
id)
override;
573 fs::path m_CLIBD_MON;
578 , m_file((clibd_mon /
"list" /
"mon_lib_list.cif").string())
579 , m_CLIBD_MON(clibd_mon)
581 const std::regex peptideRx(
"(?:[lmp]-)?peptide", std::regex::icase);
583 auto &chemComps = m_file[
"comp_list"][
"chem_comp"];
585 for (
const auto &[group, threeLetterCode] : chemComps.rows<std::string, std::string>(
"group",
"three_letter_code"))
587 if (std::regex_match(group, peptideRx))
596 compound *result =
nullptr;
598 auto &cat = m_file[
"comp_list"][
"chem_comp"];
600 auto rs = cat.find(cif::key(
"three_letter_code") ==
id);
604 auto row = rs.front();
606 std::string name, group;
607 uint32_t numberAtomsAll, numberAtomsNh;
608 cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
609 row.get(
"name",
"group",
"number_atoms_all",
"number_atoms_nh");
613 if (not fs::exists(resFile) and (
id ==
"COM" or id ==
"CON" or "PRN"))
614 resFile = m_CLIBD_MON /
cif::to_lower_copy(
id.substr(0, 1)) / (
id +
'_' +
id +
".cif");
616 if (fs::exists(resFile))
618 cif::file cf(resFile.string());
621 auto &db = cf[
"comp_" + id];
645 type =
"peptide linking";
647 type =
"L-peptide linking";
649 type =
"DNA linking";
651 type =
"RNA linking";
653 type =
"non-polymer";
655 m_compounds.push_back(
new compound(db,
id, name, type, group));
665 std::unique_ptr<compound_factory> compound_factory::s_instance;
666 thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
667 bool compound_factory::s_use_thread_local_instance;
669 void compound_factory::init(
bool useThreadLocalInstanceOnly)
671 s_use_thread_local_instance = useThreadLocalInstanceOnly;
674 compound_factory::compound_factory()
679 m_impl = std::make_shared<CCD_compound_factory_impl>(m_impl);
681 std::cerr <<
"CCD components.cif file was not found" << std::endl;
683 const char *clibd_mon = getenv(
"CLIBD_MON");
684 if (clibd_mon !=
nullptr and fs::is_directory(clibd_mon))
685 m_impl = std::make_shared<CCP4_compound_factory_impl>(clibd_mon, m_impl);
687 std::cerr <<
"CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
690 compound_factory::~compound_factory()
694 compound_factory &compound_factory::instance()
696 if (s_use_thread_local_instance)
699 tl_instance.reset(
new compound_factory());
705 s_instance.reset(
new compound_factory());
710 void compound_factory::clear()
712 if (s_use_thread_local_instance)
713 tl_instance.reset(
nullptr);
718 void compound_factory::set_default_dictionary(
const fs::path &inDictFile)
720 if (not fs::exists(inDictFile))
721 throw std::runtime_error(
"file not found: " + inDictFile.string());
727 catch (
const std::exception &)
729 std::throw_with_nested(std::runtime_error(
"Error loading dictionary " + inDictFile.string()));
733 void compound_factory::push_dictionary(
const fs::path &inDictFile)
735 if (not fs::exists(inDictFile))
736 throw std::runtime_error(
"file not found: " + inDictFile.string());
742 catch (
const std::exception &)
744 std::throw_with_nested(std::runtime_error(
"Error loading dictionary " + inDictFile.string()));
748 void compound_factory::pop_dictionary()
751 m_impl = m_impl->next();
754 const compound *compound_factory::create(std::string
id)
756 return m_impl ? m_impl->get(
id) :
nullptr;
759 bool compound_factory::is_known_peptide(
const std::string &resName)
const 761 return m_impl ? m_impl->is_known_peptide(resName) : kAAMap.count(resName) > 0;
764 bool compound_factory::is_known_base(
const std::string &resName)
const 766 return m_impl ? m_impl->is_known_base(resName) : kBaseMap.count(resName) > 0;
void to_upper(std::string &s)
CCD_compound_factory_impl(std::shared_ptr< compound_factory_impl > next)
void replace_all(std::string &s, std::string_view what, std::string_view with)
bool is_known_base(const std::string &resName)
compound * create(const std::string &id) override
bool is_known_peptide(const std::string &resName)
CCP4_compound_factory_impl(const fs::path &clibd_mon, std::shared_ptr< compound_factory_impl > next=nullptr)
CCD_compound_factory_impl(std::shared_ptr< compound_factory_impl > next, const fs::path &file)
std::vector< compound * > m_compounds
std::unique_ptr< std::istream > load_resource(std::filesystem::path name)
bool iequals(std::string_view a, std::string_view b)
std::set< std::string > m_missing
compound_factory_impl(std::shared_ptr< compound_factory_impl > next)
std::string to_lower_copy(std::string_view s)
compound * create(const std::string &id) override
void max(Image< double > &op1, const Image< double > &op2)
std::set< std::string > m_known_peptides
std::set< std::string > m_known_bases
TYPE distance(struct Point_T *p, struct Point_T *q)
bool operator()(const compound_bond &a, const compound_bond &b) const
std::shared_ptr< compound_factory_impl > m_next
virtual compound * create(const std::string &id)
std::shared_ptr< compound_factory_impl > next() const
virtual ~compound_factory_impl()
std::shared_timed_mutex mMutex
std::string to_string(bond_type bondType)
bool operator()(const compound_atom &a, const compound_atom &b) const
bond_type from_string(const std::string &bondType)
cif::parser::datablock_index mIndex