Xmipp  v3.23.11-Nereus
pdb2cif_remark_3.cpp
Go to the documentation of this file.
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "cif++.hpp"
28 #include "cif++/pdb/pdb2cif_remark_3.hpp"
29 
30 #include <map>
31 #include <set>
32 
33 namespace cif::pdb
34 {
35 
36 // --------------------------------------------------------------------
37 
39 {
40  const char *rx;
42  const char *category;
43  std::initializer_list<const char *> items;
44  const char *lsRestrType = nullptr;
45  bool createNew;
46 };
47 
48 // --------------------------------------------------------------------
49 
51  /* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
52  /* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\) :\s+(.+?))", 1, "refine", { "ls_d_res_high" } },
53  /* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\) :\s+(.+?))", 1, "refine", { "ls_d_res_low" } },
54  /* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\) :\s+(.+?))", 1, "refine", { "pdbx_ls_sigma_F" } },
55  /* 4 */ { R"(COMPLETENESS FOR RANGE \(%\) :\s+(.+?))", 1, "refine", { "ls_percent_reflns_obs" } },
56  /* 5 */ { R"(NUMBER OF REFLECTIONS :\s+(.+?))", 1, "refine", { "ls_number_reflns_obs" } },
57  /* 6 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
58  /* 7 */ { R"(CROSS-VALIDATION METHOD :\s+(.+?))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
59  /* 8 */ { R"(FREE R VALUE TEST SET SELECTION :\s+(.+?))", 1, "refine", { "pdbx_R_Free_selection_details" } },
60  /* 9 */ { R"(R VALUE \(WORKING ?\+ ?TEST SET\) :\s+(.+?))", 1, "refine", { "ls_R_factor_obs" } },
61  /* 10 */ { R"(R VALUE \(WORKING SET\) :\s+(.+?))", 1, "refine", { "ls_R_factor_R_work" } },
62  /* 11 */ { R"(FREE R VALUE :\s+(.+?))", 1, "refine", { "ls_R_factor_R_free" } },
63  /* 12 */ { R"(FREE R VALUE TEST SET SIZE \(%\) :\s+(.+?))", 1, "refine", { "ls_percent_reflns_R_free" } },
64  /* 13 */ { R"(FREE R VALUE TEST SET COUNT :\s+(.+?))", 1, "refine", { "ls_number_reflns_R_free" } },
65  /* 14 */ { R"(ESTIMATED ERROR OF FREE R VALUE :\s+(.+?))", 1, "refine", { "ls_R_factor_R_free_error" } },
66  /* 15 */ { R"(FIT IN THE HIGHEST RESOLUTION BIN\.)", 1 },
67  /* 16 */ { R"(TOTAL NUMBER OF BINS USED :\s+(.+?))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
68  /* 17 */ { R"(BIN RESOLUTION RANGE HIGH \(A(?:NGSTROMS)?\) :\s+(.+?))", 1, "refine_ls_shell", { "d_res_high" } },
69  /* 18 */ { R"(BIN RESOLUTION RANGE LOW \(A(?:NGSTROMS)?\) :\s+(.+?))", 1, "refine_ls_shell", { "d_res_low" } },
70  /* 19 */ { R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\) :\s+(.+?))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
71  /* 20 */ { R"(REFLECTIONS IN BIN \(WORKING ?\+ ?TEST(?: SET)?\) :\s+(.+?))", 1, "refine_ls_shell", { "number_reflns_all" } },
72  /* 21 */ { R"(BIN R VALUE \(WORKING ?\+ ?TEST(?: SET)?\) :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_all" } },
73  /* 22 */ { R"(REFLECTIONS IN BIN \(WORKING SET\) :\s+(.+?))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
74  /* 23 */ { R"(BIN R VALUE \(WORKING SET\) :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_R_work" } },
75  /* 24 */ { R"(BIN FREE R VALUE :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_R_free" } },
76  /* 25 */ { R"(BIN FREE R VALUE TEST SET SIZE \(%\) :\s+(.+?))", 1, "refine_ls_shell", { "percent_reflns_R_free" } },
77  /* 26 */ { R"(BIN FREE R VALUE TEST SET COUNT :\s+(.+?))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
78  /* 27 */ { R"(ESTIMATED ERROR OF BIN FREE R VALUE :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_R_free_error" } },
79  /* 28 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
80  /* 29 */ { R"(PROTEIN ATOMS :\s+(.+?))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
81  /* 30 */ { R"(NUCLEIC ACID ATOMS :\s+(.+?))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
82  /* 31 */ { R"(HETEROGEN ATOMS :\s+(.+?))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
83  /* 32 */ { R"(SOLVENT ATOMS :\s+(.+?))", 1, "refine_hist", { "number_atoms_solvent" } },
84  /* 33 */ { R"(B VALUES\.)", 1 },
85  /* 34 */ { R"(B VALUE TYPE :\s+(.+?))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
86  /* 35 */ { R"(FROM WILSON PLOT \(A\*\*2\) :\s+(.+?))", 1, "reflns", { "B_iso_Wilson_estimate" } },
87  /* 36 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\) :\s+(.+?))", 1, "refine", { "B_iso_mean" } },
88  /* 37 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
89  /* 38 */ { R"(B11 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[1][1]" } },
90  /* 39 */ { R"(B22 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[2][2]" } },
91  /* 40 */ { R"(B33 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[3][3]" } },
92  /* 41 */ { R"(B12 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[1][2]" } },
93  /* 42 */ { R"(B13 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[1][3]" } },
94  /* 43 */ { R"(B23 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[2][3]" } },
95  /* 44 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
96  /* 45 */ { R"(ESD FROM LUZZATI PLOT \(A\) :\s+(.+?))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
97  /* 46 */ { R"(DPI \(BLOW EQ-10\) BASED ON R VALUE \(A\) :\s+(.+?))", 1, "refine", { "pdbx_overall_SU_R_Blow_DPI" } },
98  /* 47 */ { R"(DPI \(BLOW EQ-9\) BASED ON FREE R VALUE \(A\) :\s+(.+?))", 1, "refine", { "pdbx_overall_SU_R_free_Blow_DPI" } },
99  /* 48 */ { R"(DPI \(CRUICKSHANK\) BASED ON R VALUE \(A\) :\s+(.+?))", 1, "refine", { "overall_SU_R_Cruickshank_DPI" } },
100  /* 49 */ { R"(DPI \(CRUICKSHANK\) BASED ON FREE R VALUE \(A\) :\s+(.+?))", 1, "refine", { "pdbx_overall_SU_R_free_Cruickshank_DPI" } },
101  /* 50 */ { R"(REFERENCES: BLOW.+)", 1 },
102  /* 51 */ { R"(CORRELATION COEFFICIENTS\.)", 1 },
103  /* 52 */ { R"(CORRELATION COEFFICIENT FO-FC :\s+(.+?))", 1, "refine", { "correlation_coeff_Fo_to_Fc" } },
104  /* 53 */ { R"(CORRELATION COEFFICIENT FO-FC FREE :\s+(.+?))", 1, "refine", { "correlation_coeff_Fo_to_Fc_free" } },
105  /* 54 */ { R"(NUMBER OF GEOMETRIC FUNCTION TERMS DEFINED : 15)", 1 },
106  /* 55 */ { R"(TERM COUNT WEIGHT FUNCTION\.)", 1 },
107  /* 56 */ { R"(BOND LENGTHS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_bond_d", true },
108  /* 57 */ { R"(BOND ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_angle_deg", true },
109  /* 58 */ { R"(TORSION ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_dihedral_angle_d", true },
110  /* 59 */ { R"(TRIGONAL CARBON PLANES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_trig_c_planes", true },
111  /* 60 */ { R"(GENERAL PLANES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_gen_planes", true },
112  /* 61 */ { R"(ISOTROPIC THERMAL FACTORS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_it", true },
113  /* 62 */ { R"(BAD NON-BONDED CONTACTS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_nbd", true },
114  /* 63 */ { R"(IMPROPER TORSIONS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_improper_torsion", true },
115  /* 64 */ { R"(PSEUDOROTATION ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_pseud_angle", true },
116  /* 65 */ { R"(CHIRAL IMPROPER TORSION :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_chiral_improper_torsion", true },
117  /* 66 */ { R"(SUM OF OCCUPANCIES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_sum_occupancies", true },
118  /* 67 */ { R"(UTILITY DISTANCES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_utility_distance", true },
119  /* 68 */ { R"(UTILITY ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_utility_angle", true },
120  /* 69 */ { R"(UTILITY TORSION :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_utility_torsion", true },
121  /* 70 */ { R"(IDEAL-DIST CONTACT TERM :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_ideal_dist_contact", true },
122  /* 71 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
123  /* 72 */ { R"(BOND LENGTHS \(A\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_bond_d", false },
124  /* 73 */ { R"(BOND ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_angle_deg", false },
125  /* 74 */ { R"(TORSION ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_dihedral_angle_d", false },
126  /* 75 */ { R"(PSEUDO ROTATION ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_pseud_angle", false },
127  /* 76 */ { R"(TRIGONAL CARBON PLANES \(A\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_trig_c_planes", false },
128  /* 77 */ { R"(GENERAL PLANES \(A\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_gen_planes", false },
129  /* 78 */ { R"(ISOTROPIC THERMAL FACTORS \(A\*\*2\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_it", false },
130  /* 79 */ { R"(NON-BONDED CONTACTS \(A\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_nbd", false },
131  /* 80 */ { R"(PEPTIDE OMEGA TORSION ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_omega_torsion", false },
132  /* 81 */ { R"(OTHER TORSION ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_other_torsion", false },
133  /* 82 */ { R"(TLS DETAILS\.?)", 1 },
134  /* 83 */ { R"(NUMBER OF TLS GROUPS :.+)", 1 },
135  /* 84 */ { R"(TLS GROUP :\s*(\d+))", 1, "pdbx_refine_tls", { "id" }, nullptr, true },
136  /* 85 */ { R"((?:SELECTION|SET) *:\s+(.+?))", 1, "pdbx_refine_tls_group", { "selection_details" }, nullptr, true },
137  /* 86 */ { R"(ORIGIN FOR THE GROUP \(A\):\s+(.+?)\s+(.+?)\s+(.+?))", 1, "pdbx_refine_tls", { "origin_x", "origin_y", "origin_z" } },
138  /* 87 */ { R"(T TENSOR)", 1 },
139  /* 88 */ { R"(T11:\s+(.+?) T22:\s+(.+?))", 1, "pdbx_refine_tls", { "T[1][1]", "T[2][2]" } },
140  /* 89 */ { R"(T33:\s+(.+?) T12:\s+(.+?))", 1, "pdbx_refine_tls", { "T[3][3]", "T[1][2]" } },
141  /* 90 */ { R"(T13:\s+(.+?) T23:\s+(.+?))", 1, "pdbx_refine_tls", { "T[1][3]", "T[2][3]" } },
142  /* 91 */ { R"(L TENSOR)", 1 },
143  /* 92 */ { R"(L11:\s+(.+?) L22:\s+(.+?))", 1, "pdbx_refine_tls", { "L[1][1]", "L[2][2]" } },
144  /* 93 */ { R"(L33:\s+(.+?) L12:\s+(.+?))", 1, "pdbx_refine_tls", { "L[3][3]", "L[1][2]" } },
145  /* 94 */ { R"(L13:\s+(.+?) L23:\s+(.+?))", 1, "pdbx_refine_tls", { "L[1][3]", "L[2][3]" } },
146  /* 95 */ { R"(S TENSOR)", 1 },
147  /* 96 */ { R"(S11:\s+(.+?) S12:\s+(.+?) S13:\s+(.+?))", 1, "pdbx_refine_tls", { "S[1][1]", "S[1][2]", "S[1][3]" } },
148  /* 97 */ { R"(S21:\s+(.+?) S22:\s+(.+?) S23:\s+(.+?))", 1, "pdbx_refine_tls", { "S[2][1]", "S[2][2]", "S[2][3]" } },
149  /* 98 */ { R"(S31:\s+(.+?) S32:\s+(.+?) S33:\s+(.+?))", 84 - 98, "pdbx_refine_tls", { "S[3][1]", "S[3][2]", "S[3][3]" } },
150 };
151 
152 class BUSTER_TNT_Remark3Parser : public Remark3Parser
153 {
154  public:
155  BUSTER_TNT_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
156  : Remark3Parser(name, expMethod, r, db,
157  kBusterTNT_Template, sizeof(kBusterTNT_Template) / sizeof(TemplateLine),
158  std::regex(R"((BUSTER(?:-TNT)?)(?: (\d+(?:\..+)?))?)"))
159  {
160  }
161 };
162 
164  /* 0 */ { R"(REFINEMENT TARGET\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
165  /* 1 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
166  /* 2 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
167  /* 3 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
168  /* 4 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
169  /* 5 */ { R"(DATA CUTOFF HIGH \(ABS\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_data_cutoff_high_absF" } },
170  /* 6 */ { R"(DATA CUTOFF LOW \(ABS\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_data_cutoff_low_absF" } },
171  /* 7 */ { R"(COMPLETENESS \(WORKING\+TEST\) \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
172  /* 8 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
173  /* 9 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
174  /* 10 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
175  /* 11 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
176  /* 12 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
177  /* 13 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
178  /* 14 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
179  /* 15 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
180  /* 16 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
181  /* 17 */ { R"(ESTIMATED ERROR OF FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free_error" } },
182  /* 18 */ { R"(FIT/AGREEMENT OF MODEL WITH ALL DATA\.)", 1 },
183  /* 19 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
184  /* 20 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
185  /* 21 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
186  /* 22 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
187  /* 23 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
188  /* 24 */ { R"(ESTIMATED ERROR OF FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_error_no_cutoff" } },
189  /* 25 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
190  /* 26 */ { R"(FIT IN THE HIGHEST RESOLUTION BIN\.)", 1 },
191  /* 27 */ { R"(TOTAL NUMBER OF BINS USED\s*:\s*(.+))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
192  /* 28 */ { R"(BIN RESOLUTION RANGE HIGH \(A\)\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_high" } },
193  /* 29 */ { R"(BIN RESOLUTION RANGE LOW \(A\)\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_low" } },
194  /* 30 */ { R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\)\s*:\s*(.+))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
195  /* 31 */ { R"(REFLECTIONS IN BIN \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
196  /* 32 */ { R"(BIN R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_work" } },
197  /* 33 */ { R"(BIN FREE R VALUE\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_free" } },
198  /* 34 */ { R"(BIN FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine_ls_shell", { "percent_reflns_R_free" } },
199  /* 35 */ { R"(BIN FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
200  /* 36 */ { R"(ESTIMATED ERROR OF BIN FREE R VALUE\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_free_error" } },
201  /* 37 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
202  /* 38 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
203  /* 39 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
204  /* 40 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
205  /* 41 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
206  /* 42 */ { R"(B VALUES\.)", 1 },
207  /* 43 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
208  /* 44 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
209  /* 45 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
210  /* 46 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
211  /* 47 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
212  /* 48 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
213  /* 49 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
214  /* 50 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
215  /* 51 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
216  /* 52 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
217  /* 53 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
218  /* 54 */ { R"(ESD FROM LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
219  /* 55 */ { R"(ESD FROM SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
220  /* 56 */ { R"(LOW RESOLUTION CUTOFF \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
221  /* 57 */ { R"(CROSS-VALIDATED ESTIMATED COORDINATE ERROR\.)", 1 },
222  /* 58 */ { R"(ESD FROM C-V LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_free" } },
223  /* 59 */ { R"(ESD FROM C-V SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_free" } },
224  /* 60 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
225  /* 61 */ { R"(BOND LENGTHS \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_bond_d", false },
226  /* 62 */ { R"(BOND ANGLES \(DEGREES\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_angle_deg", false },
227  /* 63 */ { R"(DIHEDRAL ANGLES \(DEGREES\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_dihedral_angle_d", false },
228  /* 64 */ { R"(IMPROPER ANGLES \(DEGREES\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_improper_angle_d", false },
229  /* 65 */ { R"(ISOTROPIC THERMAL MODEL\s*:\s*(.+))", 1, "refine", { "pdbx_isotropic_thermal_model" } },
230  /* 66 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
231  /* 67 */ { R"(MAIN-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_mcbond_it", false },
232  /* 68 */ { R"(MAIN-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_mcangle_it", false },
233  /* 69 */ { R"(SIDE-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_scbond_it", false },
234  /* 70 */ { R"(SIDE-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_scangle_it", false },
235  /* 71 */ { R"(BULK SOLVENT MODELING\.)", 1 },
236  /* 72 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
237  /* 73 */ { R"(KSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_ksol" } },
238  /* 74 */ { R"(BSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_bsol" } },
239  /* 75 */ { R"(NCS MODEL\s*:\s*(.+))", 1, /* "refine_ls_restr_ncs", { "ncs_model_details" } */ },
240  /* 76 */ { R"(NCS RESTRAINTS\. RMS SIGMA/WEIGHT)", 1 },
241  /* 77 */ { R"(GROUP (\d+) POSITIONAL \(A\)\s*:\s*(.+))", 1, /* "refine_ls_restr_ncs", { "dom_id", "rms_dev_position", "weight_position" } */ },
242  /* 78 */ { R"(GROUP (\d+) B-FACTOR \(A\*\*2\)\s*:\s*(.+))", 1, /* "refine_ls_restr_ncs", { "dom_id", "rms_dev_B_iso", "weight_B_iso" } */ },
243  /* 79 */ { R"(PARAMETER FILE (\d+) :\s+(.+))", 1, /* "pdbx_xplor_file", { "serial_no", "param_file" } */ },
244  /* 80 */ { R"(TOPOLOGY FILE (\d+) :\s+(.+))", 1, /* "pdbx_xplor_file", { "serial_no", "topol_file" } */ },
245 };
246 
247 class CNS_Remark3Parser : public Remark3Parser
248 {
249  public:
250  CNS_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
251  : Remark3Parser(name, expMethod, r, db, kCNS_Template,
252  sizeof(kCNS_Template) / sizeof(TemplateLine), std::regex(R"((CN[SX])(?: (\d+(?:\.\d+)?))?)"))
253  {
254  }
255 };
256 
258  /* 0 */ { R"(REFINEMENT TARGET\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
259  /* 1 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
260  /* 2 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
261  /* 3 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
262  /* 4 */ { R"(MIN\(FOBS/SIGMA_FOBS\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
263  /* 5 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
264  /* 6 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
265  /* 7 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
266  /* 8 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
267  /* 9 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
268  /* 10 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
269  /* 11 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
270  /* 12 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
271  /* 13 */ { R"(FIT TO DATA USED IN REFINEMENT \(IN BINS\)\.)", 1 },
272  /* 14 */ { R"(BIN RESOLUTION RANGE COMPL\. NWORK NFREE RWORK RFREE)", 1 },
273  /* 15 */ { R"(\d+ (\d+(?:\.\d+)?) - (\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+) (\d+) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?))", 0, "refine_ls_shell", { "d_res_low", "d_res_high", "percent_reflns_obs", "number_reflns_R_work", "number_reflns_R_free", "R_factor_R_work", "R_factor_R_free" }, nullptr, true },
274  /* 16 */ { R"(BULK SOLVENT MODELLING\.)", 1 },
275  /* 17 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
276  /* 18 */ { R"(SOLVENT RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_vdw_probe_radii" } },
277  /* 19 */ { R"(SHRINKAGE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_shrinkage_radii" } },
278  /* 20 */ { R"(K_SOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_ksol" } },
279  /* 21 */ { R"(B_SOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_bsol" } },
280  /* 22 */ { R"(ERROR ESTIMATES\.)", 1 },
281  /* 23 */ { R"(COORDINATE ERROR \(MAXIMUM-LIKELIHOOD BASED\)\s*:\s*(.+))", 1, "refine", { "overall_SU_ML" } },
282  /* 24 */ { R"(PHASE ERROR \(DEGREES, MAXIMUM-LIKELIHOOD BASED\)\s*:\s*(.+))", 1, "refine", { "pdbx_overall_phase_error" } },
283  /* 25 */ { R"(B VALUES\.)", 1 },
284  /* 26 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
285  /* 27 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
286  /* 28 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
287  /* 29 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
288  /* 30 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
289  /* 31 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
290  /* 32 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
291  /* 33 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
292  /* 34 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
293  /* 35 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
294  /* 36 */ { R"(TWINNING INFORMATION\.)", 1 },
295  /* 37 */ { R"(FRACTION:\s*(.+))", 1, "pdbx_reflns_twin", { "fraction" } },
296  /* 38 */ { R"(OPERATOR:\s*(.+))", 1, "pdbx_reflns_twin", { "operator" } },
297  /* 39 */ { R"(DEVIATIONS FROM IDEAL VALUES\.)", 1 },
298  /* 40 */ { R"(RMSD COUNT)", 1 },
299  /* 41 */ { R"(BOND\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_bond_d", false },
300  /* 42 */ { R"(ANGLE\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_angle_d", false },
301  /* 43 */ { R"(CHIRALITY\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_chiral_restr", false },
302  /* 44 */ { R"(PLANARITY\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_plane_restr", false },
303  /* 45 */ { R"(DIHEDRAL\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_dihedral_angle_d", false },
304  /* 46 */ { R"(TLS DETAILS)", 1 },
305  /* 47 */ { R"(NUMBER OF TLS GROUPS\s*:\s*(.+))", 1 },
306  /* 48 */ { R"(TLS GROUP\s*:\s*(.+))", 1, "pdbx_refine_tls", { "id" }, nullptr, true },
307  /* 49 */ { R"(SELECTION:\s*(.+))", 1, "pdbx_refine_tls_group", { "selection_details" }, nullptr, true },
308  /* 50 */ { R"(ORIGIN FOR THE GROUP(?:\s*\(A\))?\s*:\s*(\S+)\s+(\S+)\s+(\S+))", 1, "pdbx_refine_tls", { "origin_x", "origin_y", "origin_z" } },
309  /* 51 */ { R"(T TENSOR)", 1 },
310  /* 52 */ { R"(T11\s*:\s*(.+) T22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][1]", "T[2][2]" } },
311  /* 53 */ { R"(T33\s*:\s*(.+) T12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[3][3]", "T[1][2]" } },
312  /* 54 */ { R"(T13\s*:\s*(.+) T23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][3]", "T[2][3]" } },
313  /* 55 */ { R"(L TENSOR)", 1 },
314  /* 56 */ { R"(L11\s*:\s*(.+) L22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][1]", "L[2][2]" } },
315  /* 57 */ { R"(L33\s*:\s*(.+) L12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[3][3]", "L[1][2]" } },
316  /* 58 */ { R"(L13\s*:\s*(.+) L23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][3]", "L[2][3]" } },
317  /* 59 */ { R"(S TENSOR)", 1 },
318  /* 60 */ { R"(S11\s*:\s*(.+) S12\s*:\s*(.+) S13\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[1][1]", "S[1][2]", "S[1][3]" } },
319  /* 61 */ { R"(S21\s*:\s*(.+) S22\s*:\s*(.+) S23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[2][1]", "S[2][2]", "S[2][3]" } },
320  /* 62 */ { R"(S31\s*:\s*(.+) S32\s*:\s*(.+) S33\s*:\s*(.+))", 48 - 62, "pdbx_refine_tls", { "S[3][1]", "S[3][2]", "S[3][3]" } },
321  /* 63 */ { R"(ANOMALOUS SCATTERER GROUPS DETAILS\.)", 1 },
322  /* 64 */ { R"(NUMBER OF ANOMALOUS SCATTERER GROUPS\s*:\s*\d+)", 1 },
323  /* 65 */ { R"(ANOMALOUS SCATTERER GROUP\s*:\s*\d+)", 1 },
324  /* 66 */ { R"(SELECTION: .+)", 1 },
325  /* 67 */ { R"(fp\s*:\s*.+)", 1 },
326  /* 68 */ { R"(fdp\s*:\s*.+)", 63 - 68 },
327  /* 69 */ { R"(NCS DETAILS)", 1 },
328  /* 70 */ { R"(NUMBER OF NCS GROUPS\s*:\s*(.+))", 1 },
329 };
330 
331 class PHENIX_Remark3Parser : public Remark3Parser
332 {
333  public:
334  PHENIX_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
335  : Remark3Parser(name, expMethod, r, db, kPHENIX_Template, sizeof(kPHENIX_Template) / sizeof(TemplateLine),
336  std::regex(R"((PHENIX)(?: \(PHENIX\.REFINE:) (\d+(?:\.[^)]+)?)\)?)"))
337  {
338  }
339 
340  virtual void fixup();
341 };
342 
344 {
345  for (auto r : mDb["refine_ls_shell"])
346  {
347  try
348  {
349  float val = r["percent_reflns_obs"].as<float>();
350  int perc = static_cast<int>(val * 100);
351  r["percent_reflns_obs"] = perc;
352  }
353  catch (...)
354  {
355  }
356  }
357 }
358 
360  /* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
361  /* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
362  /* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
363  /* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
364  /* 4 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
365  /* 5 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
366  /* 6 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
367  /* 7 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
368  /* 8 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
369  /* 9 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
370  /* 10 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
371  /* 11 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
372  /* 12 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
373  /* 13 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
374  /* 14 */ { R"(FIT/AGREEMENT OF MODEL WITH ALL DATA\.)", 1 },
375  /* 15 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_all" } },
376  /* 16 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
377  /* 17 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
378  /* 18 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
379  /* 19 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
380  /* 20 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
381  /* 21 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
382  /* 22 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
383  /* 23 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
384  /* 24 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
385  /* 25 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
386  /* 26 */ { R"(B VALUES\.)", 1 },
387  /* 27 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
388  /* 28 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
389  /* 29 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
390  /* 30 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
391  /* 31 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
392  /* 32 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
393  /* 33 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
394  /* 34 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
395  /* 35 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
396  /* 36 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
397  /* 37 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
398  /* 38 */ { R"(ESD FROM LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
399  /* 39 */ { R"(ESD FROM SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
400  /* 40 */ { R"(LOW RESOLUTION CUTOFF \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
401  /* 41 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
402  /* 42 */ { R"(DISTANCE RESTRAINTS\. RMS SIGMA)", 1 },
403  /* 43 */ { R"(SUGAR-BASE BOND DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_sugar_bond_d", false },
404  /* 44 */ { R"(SUGAR-BASE BOND ANGLE DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_sugar_bond_angle_d", false },
405  /* 45 */ { R"(PHOSPHATE BONDS DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_phos_bond_d", false },
406  /* 46 */ { R"(PHOSPHATE BOND ANGLE, H-BOND \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_phos_bond_angle_d", false },
407  /* 47 */ { R"(PLANE RESTRAINT \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_plane_restr", false },
408  /* 48 */ { R"(CHIRAL-CENTER RESTRAINT \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_chiral_restr", false },
409  /* 49 */ { R"(NON-BONDED CONTACT RESTRAINTS\.)", 1 },
410  /* 50 */ { R"(SINGLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_singtor_nbd", false },
411  /* 51 */ { R"(MULTIPLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_multtor_nbd", false },
412  /* 59 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
413  /* 60 */ { R"(SUGAR-BASE BONDS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_sugar_bond_it", false },
414  /* 61 */ { R"(SUGAR-BASE ANGLES \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_sugar_angle_it", false },
415  /* 62 */ { R"(PHOSPHATE BONDS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_phos_bond_it", false },
416  /* 63 */ { R"(PHOSPHATE BOND ANGLE, H-BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_phos_angle_it", false },
417 };
418 
419 class NUCLSQ_Remark3Parser : public Remark3Parser
420 {
421  public:
422  NUCLSQ_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
423  : Remark3Parser(name, expMethod, r, db, kNUCLSQ_Template, sizeof(kNUCLSQ_Template) / sizeof(TemplateLine),
424  std::regex(R"((NUCLSQ)(?: (\d+(?:\.\d+)?))?)"))
425  {
426  }
427 
428  virtual void fixup()
429  {
430  for (auto r : mDb["refine_hist"])
431  {
432  try
433  {
434  int p, n, h, s;
435  cif::tie(p, n, h, s) = r.get("pdbx_number_atoms_protein", "pdbx_number_atoms_nucleic_acid", "pdbx_number_atoms_ligand", "number_atoms_solvent");
436  r["number_atoms_total"] = p + n + h + s;
437  }
438  catch (...)
439  {
440  }
441  }
442  }
443 };
444 
446  /* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
447  /* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
448  /* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
449  /* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
450  /* 4 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
451  /* 5 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
452  /* 6 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
453  /* 7 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
454  /* 8 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
455  /* 9 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
456  /* 10 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
457  /* 11 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
458  /* 12 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
459  /* 13 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
460  /* 14 */ { R"(FIT/AGREEMENT OF MODEL WITH ALL DATA\.)", 1 },
461  /* 15 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_all" } },
462  /* 16 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
463  /* 17 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
464  /* 18 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
465  /* 19 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
466  /* 20 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
467  /* 21 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
468  /* 22 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
469  /* 23 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
470  /* 24 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
471  /* 25 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
472  /* 26 */ { R"(B VALUES\.)", 1 },
473  /* 27 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
474  /* 28 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
475  /* 29 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
476  /* 30 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
477  /* 31 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
478  /* 32 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
479  /* 33 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
480  /* 34 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
481  /* 35 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
482  /* 36 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
483  /* 37 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
484  /* 38 */ { R"(ESD FROM LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
485  /* 39 */ { R"(ESD FROM SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
486  /* 40 */ { R"(LOW RESOLUTION CUTOFF \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
487  /* 41 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
488  /* 42 */ { R"(DISTANCE RESTRAINTS\. RMS SIGMA)", 1 },
489  /* 43 */ { R"(BOND LENGTH \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_bond_d", false },
490  /* 44 */ { R"(ANGLE DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_angle_d", false },
491  /* 45 */ { R"(INTRAPLANAR 1-4 DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_d", false },
492  /* 46 */ { R"(H-BOND OR METAL COORDINATION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_hb_or_metal_coord", false },
493  /* 47 */ { R"(PLANE RESTRAINT \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_plane_restr", false },
494  /* 48 */ { R"(CHIRAL-CENTER RESTRAINT \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_chiral_restr", false },
495  /* 49 */ { R"(NON-BONDED CONTACT RESTRAINTS\.)", 1 },
496  /* 50 */ { R"(SINGLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_singtor_nbd", false },
497  /* 51 */ { R"(MULTIPLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_multtor_nbd", false },
498  /* 52 */ { R"(H-BOND \(X\.\.\.Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xyhbond_nbd", false },
499  /* 53 */ { R"(H-BOND \(X-H\.\.\.Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xhyhbond_nbd", false },
500  /* 54 */ { R"(CONFORMATIONAL TORSION ANGLE RESTRAINTS\.)", 1 },
501  /* 55 */ { R"(SPECIFIED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_special_tor", false },
502  /* 56 */ { R"(PLANAR \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_tor", false },
503  /* 57 */ { R"(STAGGERED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_staggered_tor", false },
504  /* 58 */ { R"(TRANSVERSE \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_transverse_tor", false },
505  /* 59 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
506  /* 60 */ { R"(MAIN-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcbond_it", false },
507  /* 61 */ { R"(MAIN-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcangle_it", false },
508  /* 62 */ { R"(SIDE-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scbond_it", false },
509  /* 63 */ { R"(SIDE-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scangle_it", false },
510 };
511 
512 class PROLSQ_Remark3Parser : public Remark3Parser
513 {
514  public:
515  PROLSQ_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
516  : Remark3Parser(name, expMethod, r, db, kPROLSQ_Template, sizeof(kPROLSQ_Template) / sizeof(TemplateLine),
517  std::regex(R"((PROLSQ)(?: (\d+(?:\.\d+)?))?)"))
518  {
519  }
520 
521  virtual void fixup()
522  {
523  for (auto r : mDb["refine_hist"])
524  {
525  try
526  {
527  int p, n, h, s;
528  cif::tie(p, n, h, s) = r.get("pdbx_number_atoms_protein", "pdbx_number_atoms_nucleic_acid", "pdbx_number_atoms_ligand", "number_atoms_solvent");
529  r["number_atoms_total"] = p + n + h + s;
530  }
531  catch (...)
532  {
533  }
534  }
535  }
536 };
537 
539  /* 0 */ { "DATA USED IN REFINEMENT.", 1 },
540  /* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
541  /* 3 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
542  /* 4 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
543  /* 5 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
544  /* 6 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
545  /* 7 */ { R"(FIT TO DATA USED IN REFINEMENT.)", 1 },
546  /* 8 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
547  /* 9 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
548  /* 10 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
549  /* 11 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
550  /* 12 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
551  /* 13 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
552  /* 14 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
553  /* 15 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.)", 1 },
554  /* 16 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
555  /* 17 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
556  /* 18 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
557  /* 19 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
558  /* 20 */ { R"(ALL ATOMS\s*:\s*(.+))", 1, /* "refine_hist", "pdbx_number_atoms_protein" */ },
559  /* 21 */ { R"(B VALUES\..*)", 1 },
560  /* 22 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
561  /* 23 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
562  /* 24 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
563  /* 25 */ { R"(OVERALL ANISOTROPIC B VALUE.)", 1 },
564  /* 26 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
565  /* 27 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
566  /* 28 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
567  /* 29 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
568  /* 30 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
569  /* 31 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
570  /* 32 */ { R"(ESTIMATED OVERALL COORDINATE ERROR.)", 1 },
571  /* 33 */ { R"(ESU BASED ON R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R" } },
572  /* 34 */ { R"(ESU BASED ON FREE R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R_Free" } },
573  /* 35 */ { R"(ESU BASED ON MAXIMUM LIKELIHOOD(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "overall_SU_ML" } },
574  /* 36 */ { R"(ESU FOR B VALUES BASED ON MAXIMUM LIKELIHOOD \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "overall_SU_B" } },
575  /* 37 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES.)", 1 },
576  /* 38 */ { R"(DISTANCE RESTRAINTS. RMS SIGMA)", 1 },
577  /* 39 */ { R"(BOND LENGTH \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_bond_d", false },
578  /* 40 */ { R"(ANGLE DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_angle_d", false },
579  /* 41 */ { R"(INTRAPLANAR 1-4 DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_d", false },
580  /* 42 */ { R"(H-BOND OR METAL COORDINATION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_hb_or_metal_coord", false },
581  /* 43 */ { R"(PLANE RESTRAINT \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_plane_restr", false },
582  /* 44 */ { R"(CHIRAL-CENTER RESTRAINT \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_chiral_restr", false },
583  /* 45 */ { R"(NON-BONDED CONTACT RESTRAINTS.)", 1 },
584  /* 46 */ { R"(SINGLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_singtor_nbd", false },
585  /* 47 */ { R"(MULTIPLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_multtor_nbd", false },
586  /* 48 */ { R"(H-BOND \(X\.\..Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xyhbond_nbd", false },
587  /* 49 */ { R"(H-BOND \(X-H\.\.\.Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xhyhbond_nbd", false },
588  /* 50 */ { R"(CONFORMATIONAL TORSION ANGLE RESTRAINTS.)", 1 },
589  /* 51 */ { R"(SPECIFIED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_special_tor", false },
590  /* 52 */ { R"(PLANAR \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_tor", false },
591  /* 53 */ { R"(STAGGERED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_staggered_tor", false },
592  /* 54 */ { R"(TRANSVERSE \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_transverse_tor", false },
593  /* 55 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA)", 1 },
594  /* 56 */ { R"(MAIN-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcbond_it", false },
595  /* 57 */ { R"(MAIN-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcangle_it", false },
596  /* 58 */ { R"(SIDE-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scbond_it", false },
597  /* 59 */ { R"(SIDE-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scangle_it", false },
598 };
599 
600 class REFMAC_Remark3Parser : public Remark3Parser
601 {
602  public:
603  REFMAC_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
604  : Remark3Parser(name, expMethod, r, db, kREFMAC_Template, sizeof(kREFMAC_Template) / sizeof(TemplateLine),
605  std::regex(".+"))
606  {
607  }
608 
609  virtual std::string program() { return "REFMAC"; }
610  virtual std::string version() { return ""; }
611 };
612 
614  /* 0 */ { R"(REFINEMENT TARGET\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
615  /* 1 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
616  /* 2 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
617  /* 3 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
618  /* 4 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
619  /* 5 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
620  /* 6 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
621  /* 7 */ { R"(FIT TO DATA USED IN REFINEMENT.)", 1 },
622  /* 8 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
623  /* 9 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
624  /* 10 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
625  /* 11 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
626  /* 12 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
627  /* 13 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
628  /* 14 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
629  /* 15 */ { R"(FIT IN THE HIGHEST RESOLUTION BIN.)", 1 },
630  /* 16 */ { R"(TOTAL NUMBER OF BINS USED\s*:\s*(.+))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
631  /* 17 */ { R"(BIN RESOLUTION RANGE HIGH(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_high" } },
632  /* 18 */ { R"(BIN RESOLUTION RANGE LOW(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_low" } },
633  /* 19 */ { R"(REFLECTION IN BIN \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
634  /* 20 */ { R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\)\s*:\s*(.+))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
635  /* 21 */ { R"(BIN R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_work" } },
636  /* 22 */ { R"(BIN FREE R VALUE SET COUNT\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
637  /* 23 */ { R"(BIN FREE R VALUE\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_free" } },
638  /* 24 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.)", 1 },
639  /* 25 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
640  /* 26 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
641  /* 27 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
642  /* 28 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
643  /* 29 */ { R"(ALL ATOMS\s*:\s*(.+))", 1, /* "refine_hist", { "pdbx_number_atoms_protein" } */ },
644  /* 30 */ { R"(B VALUES\..*)", 1 },
645  /* 31 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
646  /* 32 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
647  /* 33 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
648  /* 34 */ { R"(OVERALL ANISOTROPIC B VALUE.)", 1 },
649  /* 35 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
650  /* 36 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
651  /* 37 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
652  /* 38 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
653  /* 39 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
654  /* 40 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
655  /* 41 */ { R"(ESTIMATED OVERALL COORDINATE ERROR.)", 1 },
656  /* 42 */ { R"(ESU BASED ON R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R" } },
657  /* 43 */ { R"(ESU BASED ON FREE R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R_Free" } },
658  /* 44 */ { R"(ESU BASED ON MAXIMUM LIKELIHOOD(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "overall_SU_ML" } },
659  /* 45 */ { R"(ESU FOR B VALUES BASED ON MAXIMUM LIKELIHOOD \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "overall_SU_B" } },
660  /* 46 */ { R"(CORRELATION COEFFICIENTS.)", 1 },
661  /* 47 */ { R"(CORRELATION COEFFICIENT FO-FC\s*:\s*(.+))", 1, "refine", { "correlation_coeff_Fo_to_Fc" } },
662  /* 48 */ { R"(CORRELATION COEFFICIENT FO-FC FREE\s*:\s*(.+))", 1, "refine", { "correlation_coeff_Fo_to_Fc_free" } },
663  /* 49 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES COUNT RMS WEIGHT)", 1 },
664  /* 50 */ { R"(BOND LENGTHS REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_bond_refined_d", false },
665  /* 51 */ { R"(BOND LENGTHS OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_bond_other_d", false },
666  /* 52 */ { R"(BOND ANGLES REFINED ATOMS \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_angle_refined_deg", false },
667  /* 53 */ { R"(BOND ANGLES OTHERS \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_angle_other_deg", false },
668  /* 54 */ { R"(TORSION ANGLES, PERIOD 1 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_1_deg", false },
669  /* 55 */ { R"(TORSION ANGLES, PERIOD 2 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_2_deg", false },
670  /* 56 */ { R"(TORSION ANGLES, PERIOD 3 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_3_deg", false },
671  /* 57 */ { R"(TORSION ANGLES, PERIOD 4 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_4_deg", false },
672  /* 58 */ { R"(CHIRAL-CENTER RESTRAINTS \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_chiral_restr", false },
673  /* 59 */ { R"(GENERAL PLANES REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_gen_planes_refined", false },
674  /* 60 */ { R"(GENERAL PLANES OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_gen_planes_other", false },
675  /* 61 */ { R"(NON-BONDED CONTACTS REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbd_refined", false },
676  /* 62 */ { R"(NON-BONDED CONTACTS OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbd_other", false },
677  /* 63 */ { R"(NON-BONDED TORSION REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbtor_refined", false },
678  /* 64 */ { R"(NON-BONDED TORSION OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbtor_other", false },
679  /* 65 */ { R"(H-BOND \(X...Y\) REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_xyhbond_nbd_refined", false },
680  /* 66 */ { R"(H-BOND \(X...Y\) OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_xyhbond_nbd_other", false },
681  /* 67 */ { R"(POTENTIAL METAL-ION REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_metal_ion_refined", false },
682  /* 68 */ { R"(POTENTIAL METAL-ION OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_metal_ion_other", false },
683  /* 69 */ { R"(SYMMETRY VDW REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_vdw_refined", false },
684  /* 70 */ { R"(SYMMETRY VDW OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_vdw_other", false },
685  /* 71 */ { R"(SYMMETRY H-BOND REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_hbond_refined", false },
686  /* 72 */ { R"(SYMMETRY H-BOND OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_hbond_other", false },
687  /* 73 */ { R"(SYMMETRY METAL-ION REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_metal_ion_refined", false },
688  /* 74 */ { R"(SYMMETRY METAL-ION OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_metal_ion_other", false },
689  /* 75 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS. COUNT RMS WEIGHT)", 1 },
690  /* 76 */ { R"(MAIN-CHAIN BOND REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcbond_it", false },
691  /* 77 */ { R"(MAIN-CHAIN BOND OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcbond_other", false },
692  /* 78 */ { R"(MAIN-CHAIN ANGLE REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcangle_it", false },
693  /* 79 */ { R"(MAIN-CHAIN ANGLE OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcangle_other", false },
694  /* 80 */ { R"(SIDE-CHAIN BOND REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scbond_it", false },
695  /* 81 */ { R"(SIDE-CHAIN BOND OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scbond_other", false },
696  /* 82 */ { R"(SIDE-CHAIN ANGLE REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scangle_it", false },
697  /* 83 */ { R"(SIDE-CHAIN ANGLE OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scangle_other", false },
698  /* 84 */ { R"(LONG RANGE B REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_long_range_B_refined", false },
699  /* 85 */ { R"(LONG RANGE B OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_long_range_B_other", false },
700  /* 86 */ { R"(ANISOTROPIC THERMAL FACTOR RESTRAINTS. COUNT RMS WEIGHT)", 1 },
701  /* 87 */ { R"(RIGID-BOND RESTRAINTS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_rigid_bond_restr", false },
702  /* 88 */ { R"(SPHERICITY; FREE ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_sphericity_free", false },
703  /* 89 */ { R"(SPHERICITY; BONDED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_sphericity_bonded", false },
704  // Simply ignore NCS, you can ask Robbie why
705  /* 90 */ { R"(NCS RESTRAINTS STATISTICS)", 1 },
706  /* 91 */ { R"(NUMBER OF DIFFERENT NCS GROUPS\s*:\s*(.+))", 1 },
707  /* 92 */ { R"(NCS GROUP NUMBER\s*:\s*(\d+))", 1, /*"struct_ncs_dom", { "pdbx_ens_id" }*/ },
708  /* 93 */ { R"(CHAIN NAMES\s*:\s*(.+))", 1, /*"struct_ncs_dom", { "details" }*/ },
709  /* 94 */ { R"(NUMBER OF COMPONENTS NCS GROUP\s*:\s*(\d+))", 1 },
710  /* 95 */ { R"(COMPONENT C SSSEQI TO C SSSEQI CODE)", 1 },
712  /* 97 */ { R"((\d+)\s+(.)\s+(\d+)(.)\s+(.)\s+(\d+)(.)\s+(.+))", 0 }, //, "struct_ncs_dom_lim", { "pdbx_component_id", "beg_auth_asym_id", "beg_auth_seq_id", "beg_auth_icode", "end_auth_asym_id", "end_auth_seq_id", "end_auth_icode", "pdbx_refine_code" }, {}, 1 },
713  /* 98 */ { R"((\d+)\s+(.)\s+(\d+)\s+(.)\s+(\d+)\s+(.+))", 0 }, //, "struct_ncs_dom_lim", { "pdbx_component_id", "beg_auth_asym_id", "beg_auth_seq_id", "end_auth_asym_id", "end_auth_seq_id", "pdbx_refine_code" }, {}, 1 },
714  /* 96 */ { R"(GROUP CHAIN COUNT RMS WEIGHT)", 1 }, /*, "refine_ls_restr_ncs", { "pdbx_type", "dom_id", "pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position", }*/
715  /* 99 */ { R"(TIGHT POSITIONAL\s+\d+\s+(.)\s+\(A\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 }, // , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "tight positional"}, 1 },
716  /* 100 */ { R"(MEDIUM POSITIONAL\s+\d+\s+(.)\s+\(A\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 }, // , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "medium positional"}, 1 },
717  /* 101 */ { R"(LOOSE POSITIONAL\s+\d+\s+(.)\s+\(A\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 }, // , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "loose positional"}, 1 },
718  /* 102 */ { R"(TIGHT THERMAL\s+\d+\s+(.)\s+\(A\*\*2\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 }, // , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "tight thermal", }, 1 },
719  /* 103 */ { R"(MEDIUM THERMAL\s+\d+\s+(.)\s+\(A\*\*2\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 }, // , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "medium thermal", }, 1 },
720  /* 104 */ { R"(LOOSE THERMAL\s+\d+\s+(.)\s+\(A\*\*2\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 }, // , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "loose thermal", }, 10 },
721  /* 105 */ { R"(NCS GROUP NUMBER\s*:\s*(\d+))", 93 - 105, /*"struct_ncs_dom", { "pdbx_ens_id" }*/ },
722  /* 106 */ { R"(TWIN DETAILS)", 1 },
723  /* 107 */ { R"(NUMBER OF TWIN DOMAINS\s*:\s*(\d*))", 1 },
724  /* 108 */ { R"(TWIN DOMAIN\s*:\s*(.+))", 1, "pdbx_reflns_twin", { "domain_id" }, nullptr, true },
725  /* 109 */ { R"(TWIN OPERATOR\s*:\s*(.+))", 1, "pdbx_reflns_twin", { "operator" } },
726  /* 110 */ { R"(TWIN FRACTION\s*:\s*(.+))", 108 - 110, "pdbx_reflns_twin", { "fraction" } },
727  /* 111 */ { R"(TLS DETAILS)", 1 },
728  /* 112 */ { R"(NUMBER OF TLS GROUPS\s*:\s*(.+))", 1 },
729  /* 113 */ { R"(TLS GROUP\s*:\s*(.+))", 1, "pdbx_refine_tls", { "id" }, nullptr, true },
730  /* 114 */ { R"(NUMBER OF COMPONENTS GROUP\s*:\s*(.+))", 1 },
731  /* 115 */ { R"(COMPONENTS C SSSEQI TO C SSSEQI)", 1 },
732  /* 116 */ { R"(RESIDUE RANGE\s*:\s+(\S+)\s+(\d*\S)\s+(\S+)\s+(\d*\S))", 0, "pdbx_refine_tls_group", { "beg_auth_asym_id", "beg_auth_seq_id", "end_auth_asym_id", "end_auth_seq_id" }, nullptr, true },
733  /* 117 */ { R"(ORIGIN FOR THE GROUP(?:\s*\(A\))?\s*:\s*([-+]?\d+(?:\.\d+)?)\s*([-+]?\d+(?:\.\d+)?)\s*([-+]?\d+(?:\.\d+)?))", 1, "pdbx_refine_tls", { "origin_x", "origin_y", "origin_z" } },
734  /* 118 */ { R"(T TENSOR)", 1 },
735  /* 119 */ { R"(T11\s*:\s*(.+) T22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][1]", "T[2][2]" } },
736  /* 120 */ { R"(T33\s*:\s*(.+) T12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[3][3]", "T[1][2]" } },
737  /* 121 */ { R"(T13\s*:\s*(.+) T23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][3]", "T[2][3]" } },
738  /* 122 */ { R"(L TENSOR)", 1 },
739  /* 123 */ { R"(L11\s*:\s*(.+) L22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][1]", "L[2][2]" } },
740  /* 124 */ { R"(L33\s*:\s*(.+) L12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[3][3]", "L[1][2]" } },
741  /* 125 */ { R"(L13\s*:\s*(.+) L23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][3]", "L[2][3]" } },
742  /* 126 */ { R"(S TENSOR)", 1 },
743  /* 127 */ { R"(S11\s*:\s*(.+) S12\s*:\s*(.+) S13\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[1][1]", "S[1][2]", "S[1][3]" } },
744  /* 128 */ { R"(S21\s*:\s*(.+) S22\s*:\s*(.+) S23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[2][1]", "S[2][2]", "S[2][3]" } },
745  /* 129 */ { R"(S31\s*:\s*(.+) S32\s*:\s*(.+) S33\s*:\s*(.+))", 113 - 129, "pdbx_refine_tls", { "S[3][1]", "S[3][2]", "S[3][3]" } },
746  /* 130 */ { R"(BULK SOLVENT MODELLING.)", 1 },
747  /* 131 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
748  /* 132 */ { R"(PARAMETERS FOR MASK CALCULATION)", 1 },
749  /* 133 */ { R"(VDW PROBE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_vdw_probe_radii" } },
750  /* 134 */ { R"(ION PROBE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_ion_probe_radii" } },
751  /* 135 */ { R"(SHRINKAGE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_shrinkage_radii" } },
752 };
753 
754 class REFMAC5_Remark3Parser : public Remark3Parser
755 {
756  public:
757  REFMAC5_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
758  : Remark3Parser(name, expMethod, r, db, kREFMAC5_Template, sizeof(kREFMAC5_Template) / sizeof(TemplateLine),
759  std::regex(R"((REFMAC)(?: (\d+(?:\..+)?))?)"))
760  {
761  }
762 };
763 
765  /* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
766  /* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
767  /* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
768  /* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
769  /* 4 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
770  /* 5 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
771  /* 6 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
772  /* 7 */ { R"(FIT TO DATA USED IN REFINEMENT \(NO CUTOFF\)\.)", 1 },
773  /* 8 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
774  /* 9 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
775  /* 10 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
776  /* 11 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
777  /* 12 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
778  /* 13 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
779  /* 14 */ { R"(FIT/AGREEMENT OF MODEL FOR DATA WITH F>4SIG\(F\)\.)", 1 },
780  /* 15 */ { R"(R VALUE \(WORKING \+ TEST SET, F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_4sig_cutoff" } },
781  /* 16 */ { R"(R VALUE \(WORKING SET, F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_4sig_cutoff" } },
782  /* 17 */ { R"(FREE R VALUE \(F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_4sig_cutoff" } },
783  /* 18 */ { R"(FREE R VALUE TEST SET SIZE \(%, F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_4sig_cutoff" } },
784  /* 19 */ { R"(FREE R VALUE TEST SET COUNT \(F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_4sig_cutoff" } },
785  /* 20 */ { R"(TOTAL NUMBER OF REFLECTIONS \(F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "number_reflns_obs_4sig_cutoff" } },
786  /* 21 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
787  /* 22 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
788  /* 23 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
789  /* 24 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
790  /* 25 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
791  /* 26 */ { R"(MODEL REFINEMENT\.)", 1 },
792  /* 27 */ { R"(OCCUPANCY SUM OF NON-HYDROGEN ATOMS\s*:\s*(.+))", 1, "refine_analyze", { "occupancy_sum_non_hydrogen" } },
793  /* 28 */ { R"(OCCUPANCY SUM OF HYDROGEN ATOMS\s*:\s*(.+))", 1, "refine_analyze", { "occupancy_sum_hydrogen" } },
794  /* 29 */ { R"(NUMBER OF DISCRETELY DISORDERED RESIDUES\s*:\s*(.+))", 1, "refine_analyze", { "number_disordered_residues" } },
795  /* 30 */ { R"(NUMBER OF LEAST-SQUARES PARAMETERS\s*:\s*(.+))", 1, "refine", { "ls_number_parameters" } },
796  /* 31 */ { R"(NUMBER OF RESTRAINTS\s*:\s*(.+))", 1, "refine", { "ls_number_restraints" } },
797  /* 32 */ { R"(RMS DEVIATIONS FROM RESTRAINT TARGET VALUES\.)", 1 },
798  /* 33 */ { R"(BOND LENGTHS \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_bond_d", false },
799  /* 34 */ { R"(ANGLE DISTANCES \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_angle_d", false },
800  /* 35 */ { R"(SIMILAR DISTANCES \(NO TARGET VALUES\) \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_similar_dist", false },
801  /* 36 */ { R"(DISTANCES FROM RESTRAINT PLANES \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_from_restr_planes", false },
802  /* 37 */ { R"(ZERO CHIRAL VOLUMES \(A\*\*3\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_zero_chiral_vol", false },
803  /* 38 */ { R"(NON-ZERO CHIRAL VOLUMES \(A\*\*3\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_non_zero_chiral_vol", false },
804  /* 39 */ { R"(ANTI-BUMPING DISTANCE RESTRAINTS \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_anti_bump_dis_restr", false },
805  /* 40 */ { R"(RIGID-BOND ADP COMPONENTS \(A\*\*2\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_rigid_bond_adp_cmpnt", false },
806  /* 41 */ { R"(SIMILAR ADP COMPONENTS \(A\*\*2\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_similar_adp_cmpnt", false },
807  /* 42 */ { R"(APPROXIMATELY ISOTROPIC ADPS \(A\*\*2\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_approx_iso_adps", false },
808  /* 43 */ { R"(BULK SOLVENT MODELING\.)", 1 },
809  /* 44 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
810  /* 45 */ { R"(STEREOCHEMISTRY TARGET VALUES\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
811  /* 46 */ { R"(SPECIAL CASE\s*:\s*(.+))", 1, "refine", { "pdbx_stereochem_target_val_spec_case" } },
812 };
813 
814 class SHELXL_Remark3Parser : public Remark3Parser
815 {
816  public:
817  SHELXL_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
818  : Remark3Parser(name, expMethod, r, db, kSHELXL_Template, sizeof(kSHELXL_Template) / sizeof(TemplateLine),
819  std::regex(R"((SHELXL)(?:-(\d+(?:\..+)?)))"))
820  {
821  }
822 };
823 
825  /* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
826  /* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
827  /* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
828  /* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
829  /* 4 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
830  /* 5 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
831  /* 6 */ { R"(USING DATA ABOVE SIGMA CUTOFF\.)", 1 },
832  /* 7 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
833  /* 8 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
834  /* 9 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
835  /* 10 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
836  /* 11 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
837  /* 12 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
838  /* 13 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
839  /* 14 */ { R"(USING ALL DATA, NO SIGMA CUTOFF\.)", 1 },
840  /* 15 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
841  /* 16 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
842  /* 17 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
843  /* 18 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
844  /* 19 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
845  /* 20 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
846  /* 21 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
847  /* 22 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
848  /* 23 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
849  /* 24 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
850  /* 25 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
851  /* 26 */ { R"(WILSON B VALUE \(FROM FCALC, A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
852  /* 27 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\. RMS WEIGHT COUNT)", 1 },
853  /* 28 */ { R"(BOND LENGTHS \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_bond_d", false },
854  /* 29 */ { R"(BOND ANGLES \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_angle_deg", false },
855  /* 30 */ { R"(TORSION ANGLES \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_dihedral_angle_d", false },
856  /* 31 */ { R"(PSEUDOROTATION ANGLES \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_pseud_angle", false },
857  /* 32 */ { R"(TRIGONAL CARBON PLANES \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_trig_c_planes", false },
858  /* 33 */ { R"(GENERAL PLANES \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_gen_planes", false },
859  /* 34 */ { R"(ISOTROPIC THERMAL FACTORS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_it", false },
860  /* 35 */ { R"(NON-BONDED CONTACTS \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_nbd", false },
861  /* 36 */ { R"(INCORRECT CHIRAL-CENTERS \(COUNT\)\s*:\s*(.+)\s*)", 1, "refine_ls_restr", { "number" }, "t_incorr_chiral_ct", false },
862  /* 37 */ { R"(BULK SOLVENT MODELING\.)", 1 },
863  /* 38 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
864  /* 39 */ { R"(KSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_ksol" } },
865  /* 40 */ { R"(BSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_bsol" } },
866  /* 41 */ { R"(RESTRAINT LIBRARIES\.)", 1 },
867  /* 42 */ { R"(STEREOCHEMISTRY\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
868  /* 43 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\s*:\s*(.+))", 1, "refine", { "pdbx_isotropic_thermal_model" } },
869 };
870 
871 class TNT_Remark3Parser : public Remark3Parser
872 {
873  public:
874  TNT_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
875  : Remark3Parser(name, expMethod, r, db, kTNT_Template, sizeof(kTNT_Template) / sizeof(TemplateLine),
876  std::regex(R"((TNT)(?: V. (\d+.+)?)?)"))
877  {
878  }
879 };
880 
882  /* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
883  /* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\) :\s+(.+))", 1, "refine", { "ls_d_res_high" } },
884  /* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\) :\s+(.+))", 1, "refine", { "ls_d_res_low" } },
885  /* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\) :\s+(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
886  /* 4 */ { R"(DATA CUTOFF HIGH \(ABS\(F\)\) :\s+(.+))", 1, "refine", { "pdbx_data_cutoff_high_absF" } },
887  /* 5 */ { R"(DATA CUTOFF LOW \(ABS\(F\)\) :\s+(.+))", 1, "refine", { "pdbx_data_cutoff_low_absF" } },
888  /* 6 */ { R"(COMPLETENESS \(WORKING\+TEST\) \(%\) :\s+(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
889  /* 7 */ { R"(NUMBER OF REFLECTIONS :\s+(.+))", 1, "refine", { "ls_number_reflns_obs" } },
890  /* 8 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
891  /* 9 */ { R"(CROSS-VALIDATION METHOD :\s+(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
892  /* 10 */ { R"(FREE R VALUE TEST SET SELECTION :\s+(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
893  /* 11 */ { R"(R VALUE \(WORKING SET\) :\s+(.+))", 1, "refine", { "ls_R_factor_R_work" } },
894  /* 12 */ { R"(FREE R VALUE :\s+(.+))", 1, "refine", { "ls_R_factor_R_free" } },
895  /* 13 */ { R"(FREE R VALUE TEST SET SIZE \(%\) :\s+(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
896  /* 14 */ { R"(FREE R VALUE TEST SET COUNT :\s+(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
897  /* 15 */ { R"(ESTIMATED ERROR OF FREE R VALUE :\s+(.+))", 1, "refine", { "ls_R_factor_R_free_error" } },
898  /* 16 */ { R"(FIT IN THE HIGHEST RESOLUTION BIN\.)", 1 },
899  /* 17 */ { R"(TOTAL NUMBER OF BINS USED :\s+(.+))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
900  /* 18 */ { R"(BIN RESOLUTION RANGE HIGH \(A\) :\s+(.+))", 1, "refine_ls_shell", { "d_res_high" } },
901  /* 19 */ { R"(BIN RESOLUTION RANGE LOW \(A\) :\s+(.+))", 1, "refine_ls_shell", { "d_res_low" } },
902  /* 20 */ { R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\) :\s+(.+))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
903  /* 21 */ { R"(REFLECTIONS IN BIN \(WORKING SET\) :\s+(.+))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
904  /* 22 */ { R"(BIN R VALUE \(WORKING SET\) :\s+(.+))", 1, "refine_ls_shell", { "R_factor_R_work" } },
905  /* 23 */ { R"(BIN FREE R VALUE :\s+(.+))", 1, "refine_ls_shell", { "R_factor_R_free" } },
906  /* 24 */ { R"(BIN FREE R VALUE TEST SET SIZE \(%\) :\s+(.+))", 1, "refine_ls_shell", { "percent_reflns_R_free" } },
907  /* 25 */ { R"(BIN FREE R VALUE TEST SET COUNT :\s+(.+))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
908  /* 26 */ { R"(ESTIMATED ERROR OF BIN FREE R VALUE :\s+(.+))", 1, "refine_ls_shell", { "R_factor_R_free_error" } },
909  /* 27 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
910  /* 28 */ { R"(PROTEIN ATOMS :\s+(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
911  /* 29 */ { R"(NUCLEIC ACID ATOMS :\s+(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
912  /* 30 */ { R"(HETEROGEN ATOMS :\s+(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
913  /* 31 */ { R"(SOLVENT ATOMS :\s+(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
914  /* 32 */ { R"(B VALUES\.)", 1 },
915  /* 33 */ { R"(B VALUE TYPE :\s+(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
916  /* 34 */ { R"(FROM WILSON PLOT \(A\*\*2\) :\s+(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
917  /* 35 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\) :\s+(.+))", 1, "refine", { "B_iso_mean" } },
918  /* 36 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
919  /* 37 */ { R"(B11 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[1][1]" } },
920  /* 38 */ { R"(B22 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[2][2]" } },
921  /* 39 */ { R"(B33 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[3][3]" } },
922  /* 40 */ { R"(B12 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[1][2]" } },
923  /* 41 */ { R"(B13 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[1][3]" } },
924  /* 42 */ { R"(B23 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[2][3]" } },
925  /* 43 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
926  /* 44 */ { R"(ESD FROM LUZZATI PLOT \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
927  /* 45 */ { R"(ESD FROM SIGMAA \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
928  /* 46 */ { R"(LOW RESOLUTION CUTOFF \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
929  /* 47 */ { R"(CROSS-VALIDATED ESTIMATED COORDINATE ERROR\.)", 1 },
930  /* 48 */ { R"(ESD FROM C-V LUZZATI PLOT \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_free" } },
931  /* 49 */ { R"(ESD FROM C-V SIGMAA \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_free" } },
932  /* 50 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\..*)", 1 },
933  /* 51 */ { R"(BOND LENGTHS \(A\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_bond_d", false },
934  /* 52 */ { R"(BOND ANGLES \(DEGREES\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_angle_deg", false },
935  /* 53 */ { R"(DIHEDRAL ANGLES \(DEGREES\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_dihedral_angle_d", false },
936  /* 54 */ { R"(IMPROPER ANGLES \(DEGREES\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_improper_angle_d", false },
937  /* 55 */ { R"(ISOTROPIC THERMAL MODEL :\s+(.+))", 1, "refine", { "pdbx_isotropic_thermal_model" } },
938  /* 56 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
939  /* 57 */ { R"(MAIN-CHAIN BOND \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_mcbond_it", false },
940  /* 58 */ { R"(MAIN-CHAIN ANGLE \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_mcangle_it", false },
941  /* 59 */ { R"(SIDE-CHAIN BOND \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_scbond_it", false },
942  /* 60 */ { R"(SIDE-CHAIN ANGLE \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_scangle_it", false },
943  /* 61 */ { R"(NCS MODEL :\s+(.+))", 1, /* "refine_ls_restr_ncs", { "ncs_model_details" } */ },
944  /* 62 */ { R"(NCS RESTRAINTS\. RMS SIGMA/WEIGHT)", 1 },
945  /* 63 */ { R"(GROUP (\d+) POSITIONAL \(A\) :\s+(.+?);\s+(.+))", 1, /* "refine_ls_restr_ncs", { ":dom_id", "rms_dev_position", "weight_position" } */ },
946  /* 64 */ { R"(GROUP (\d+) B-FACTOR \(A\*\*2\) :\s+(.+?);\s+(.+))", 63 - 64, /* "refine_ls_restr_ncs", { ":dom_id", "rms_dev_B_iso", "weight_B_iso" } */ },
947  /* 65 */ { R"(PARAMETER FILE (\d+) :\s+(.+))", 0, /* "pdbx_xplor_file", { "serial_no", "param_file" } */ },
948  /* 66 */ { R"(TOPOLOGY FILE (\d+) :\s+(.+))", 0, /* "pdbx_xplor_file", { "serial_no", "topol_file" } */ },
949 };
950 
951 class XPLOR_Remark3Parser : public Remark3Parser
952 {
953  public:
954  XPLOR_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
955  : Remark3Parser(name, expMethod, r, db, kXPLOR_Template, sizeof(kXPLOR_Template) / sizeof(TemplateLine),
956  std::regex(R"((X-PLOR)(?: (\d+(?:\.\d+)?))?)"))
957  {
958  }
959 };
960 
961 // --------------------------------------------------------------------
962 
963 Remark3Parser::Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db,
964  const TemplateLine templatelines[], uint32_t templateLineCount, std::regex programversion)
965  : mName(name)
966  , mExpMethod(expMethod)
967  , mRec(r)
968  , mDb(db.name())
969  , mTemplate(templatelines)
970  , mTemplateCount(templateLineCount)
971  , mProgramVersion(programversion)
972 {
973  mDb.set_validator(db.get_validator());
974 }
975 
976 std::string Remark3Parser::nextLine()
977 {
978  mLine.clear();
979 
980  while (mRec != nullptr and mRec->is("REMARK 3"))
981  {
982  size_t valueIndent = 0;
983  for (size_t i = 4; i < mRec->mVlen; ++i)
984  {
985  if (mRec->mValue[i] == ' ')
986  continue;
987 
988  if (mRec->mValue[i] == ':')
989  {
990  valueIndent = i;
991  while (valueIndent < mRec->mVlen and mRec->mValue[i] == ' ')
992  ++valueIndent;
993  break;
994  }
995  }
996 
997  mLine = mRec->vS(12);
998  mRec = mRec->mNext;
999 
1000  if (mLine.empty())
1001  continue;
1002 
1003  // concatenate value that is wrapped over multiple lines (tricky code...)
1004 
1005  if (valueIndent > 4)
1006  {
1007  std::string indent(valueIndent - 4, ' ');
1008 
1009  while (mRec->is("REMARK 3") and mRec->mVlen > valueIndent)
1010  {
1011  std::string v(mRec->mValue + 4, mRec->mValue + mRec->mVlen);
1012  if (not cif::starts_with(v, indent))
1013  break;
1014 
1015  mLine += ' ';
1016  mLine.append(mRec->mValue + valueIndent, mRec->mValue + mRec->mVlen);
1017 
1018  mRec = mRec->mNext;
1019  }
1020  }
1021 
1022  // collapse multiple spaces
1023  bool space = false;
1024  auto i = mLine.begin(), j = i;
1025 
1026  while (i != mLine.end())
1027  {
1028  bool nspace = isspace(*i);
1029 
1030  if (nspace == false)
1031  {
1032  if (space)
1033  *j++ = ' ';
1034  *j++ = *i;
1035  }
1036  space = nspace;
1037  ++i;
1038  }
1039  mLine.erase(j, mLine.end());
1040 
1041  break;
1042  }
1043 
1044  if (cif::VERBOSE >= 2)
1045  std::cerr << "RM3: " << mLine << std::endl;
1046 
1047  return mLine;
1048 }
1049 
1050 bool Remark3Parser::match(const char *expr, int nextState)
1051 {
1052  std::regex rx(expr);
1053 
1054  bool result = regex_match(mLine, mM, rx);
1055 
1056  if (result)
1057  mState = nextState;
1058  else if (cif::VERBOSE >= 3)
1059  std::cerr << cif::coloured("No match:", cif::scWHITE, cif::scRED) << " '" << expr << '\'' << std::endl;
1060 
1061  return result;
1062 }
1063 
1064 float Remark3Parser::parse()
1065 {
1066  int lineCount = 0, dropped = 0;
1067  std::string remarks;
1068  mState = 0;
1069 
1070  while (mRec != nullptr)
1071  {
1072  nextLine();
1073 
1074  if (mLine.empty())
1075  break;
1076 
1077  ++lineCount;
1078 
1079  // Skip over AUTHORS lines
1080  if (mState == 0 and match(R"(AUTHORS\s*:.+)", 0))
1081  continue;
1082 
1083  auto state = mState;
1084  for (state = mState; state < mTemplateCount; ++state)
1085  {
1086  const TemplateLine &tmpl = mTemplate[state];
1087 
1088  if (match(tmpl.rx, state + tmpl.nextStateOffset))
1089  {
1090  if (not(tmpl.category == nullptr or tmpl.items.size() == 0))
1091  {
1092  if (tmpl.lsRestrType == nullptr)
1093  storeCapture(tmpl.category, tmpl.items, tmpl.createNew);
1094  else if (tmpl.createNew)
1095  storeRefineLsRestr(tmpl.lsRestrType, tmpl.items);
1096  else
1097  updateRefineLsRestr(tmpl.lsRestrType, tmpl.items);
1098  }
1099  break;
1100  }
1101  }
1102 
1103  if (state < mTemplateCount)
1104  continue;
1105 
1106  if (state == mTemplateCount and match(R"(OTHER REFINEMENT REMARKS\s*:\s*(.*))", mTemplateCount + 1))
1107  {
1108  remarks = mM[1].str();
1109  continue;
1110  }
1111 
1112  if (state == mTemplateCount + 1)
1113  {
1114  remarks = remarks + '\n' + mLine;
1115  continue;
1116  }
1117 
1118  if (cif::VERBOSE >= 2)
1119  std::cerr << cif::coloured("Dropping line:", cif::scWHITE, cif::scRED) << " '" << mLine << '\'' << std::endl;
1120 
1121  ++dropped;
1122  }
1123 
1124  if (not remarks.empty() and not iequals(remarks, "NULL"))
1125  {
1126  if (not mDb["refine"].empty())
1127  mDb["refine"].front()["details"] = remarks;
1128  }
1129 
1130  float score = float(lineCount - dropped) / lineCount;
1131 
1132  return score;
1133 }
1134 
1135 std::string Remark3Parser::program()
1136 {
1137  std::string result = mName;
1138 
1139  std::smatch m;
1140  if (regex_match(mName, m, mProgramVersion))
1141  result = m[1].str();
1142 
1143  return result;
1144 }
1145 
1146 std::string Remark3Parser::version()
1147 {
1148  std::string result;
1149 
1150  std::smatch m;
1151  if (regex_match(mName, m, mProgramVersion))
1152  result = m[2].str();
1153 
1154  return result;
1155 }
1156 
1157 void Remark3Parser::storeCapture(const char *category, std::initializer_list<const char *> items, bool createNew)
1158 {
1159  int capture = 0;
1160  for (auto item : items)
1161  {
1162  ++capture;
1163 
1164  std::string value = mM[capture].str();
1165  cif::trim(value);
1166 
1167  if (iequals(value, "NULL") or iequals(value, "NONE") or iequals(value, "Inf") or iequals(value, "+Inf") or iequals(value, std::string(value.length(), '*')))
1168  continue;
1169 
1170  if (cif::VERBOSE >= 3)
1171  std::cerr << "storing: '" << value << "' in _" << category << '.' << item << std::endl;
1172 
1173  auto &cat = mDb[category];
1174  if (cat.empty() or createNew)
1175  {
1176  if (iequals(category, "refine"))
1177  cat.emplace({ { "pdbx_refine_id", mExpMethod },
1178  { "entry_id", mDb.name() },
1179  //#warning("this diffrn-id is probably not correct?")
1180  { "pdbx_diffrn_id", 1 } });
1181  else if (iequals(category, "refine_analyze") or iequals(category, "pdbx_refine"))
1182  cat.emplace({
1183  { "pdbx_refine_id", mExpMethod },
1184  { "entry_id", mDb.name() },
1185  // { "pdbx_diffrn_id", 1 }
1186  });
1187  else if (iequals(category, "refine_hist"))
1188  {
1189  std::string dResHigh, dResLow;
1190  for (auto r : mDb["refine"])
1191  {
1192  cif::tie(dResHigh, dResLow) = r.get("ls_d_res_high", "ls_d_res_low");
1193  break;
1194  }
1195 
1196  cat.emplace({ { "pdbx_refine_id", mExpMethod },
1197  { "cycle_id", "LAST" },
1198  { "d_res_high", dResHigh.empty() ? "." : dResHigh },
1199  { "d_res_low", dResLow.empty() ? "." : dResLow } });
1200  }
1201  else if (iequals(category, "refine_ls_shell"))
1202  {
1203  cat.emplace({
1204  { "pdbx_refine_id", mExpMethod },
1205  });
1206  }
1207  else if (iequals(category, "pdbx_refine_tls_group"))
1208  {
1209  std::string tlsID;
1210  if (not mDb["pdbx_refine_tls"].empty())
1211  tlsID = mDb["pdbx_refine_tls"].back()["id"].as<std::string>();
1212  std::string tlsGroupID = cat.get_unique_id("");
1213 
1214  cat.emplace({
1215  { "pdbx_refine_id", mExpMethod },
1216  { "id", tlsGroupID },
1217  { "refine_tls_id", tlsID } });
1218  }
1219  else if (iequals(category, "pdbx_refine_tls"))
1220  {
1221  cat.emplace({ { "pdbx_refine_id", mExpMethod },
1222  { "method", "refined" } });
1223  }
1224  // else if (iequals(category, "struct_ncs_dom"))
1225  // {
1226  // size_t id = cat.size() + 1;
1227  //
1228  // cat.emplace({
1229  // { "id", id }
1230  // });
1231  // }
1232  else if (iequals(category, "pdbx_reflns_twin"))
1233  {
1234  cat.emplace({ // #warning("crystal id, diffrn id, what should be put here?")
1235  { "crystal_id", 1 },
1236  { "diffrn_id", 1 } });
1237  }
1238  else if (iequals(category, "reflns"))
1239  cat.emplace({ { "pdbx_ordinal", cat.size() + 1 },
1240  { "entry_id", mDb.name() },
1241  { "pdbx_diffrn_id", 1 } });
1242  else
1243  cat.emplace({});
1244 
1245  createNew = false;
1246  }
1247 
1248  cat.back()[item] = value;
1249  }
1250 }
1251 
1252 void Remark3Parser::storeRefineLsRestr(const char *type, std::initializer_list<const char *> items)
1253 {
1254  cif::row_handle r;
1255  int capture = 0;
1256 
1257  for (auto item : items)
1258  {
1259  ++capture;
1260 
1261  std::string value = mM[capture].str();
1262  cif::trim(value);
1263  if (value.empty() or iequals(value, "NULL") or iequals(value, "Inf") or iequals(value, "+Inf") or iequals(value, std::string(value.length(), '*')))
1264  continue;
1265 
1266  if (r.empty())
1267  {
1268  r = mDb["refine_ls_restr"].emplace({
1269  {"pdbx_refine_id", mExpMethod},
1270  {"type", type}
1271  });
1272  }
1273 
1274  r[item] = value;
1275  }
1276 }
1277 
1278 void Remark3Parser::updateRefineLsRestr(const char *type, std::initializer_list<const char *> items)
1279 {
1280  auto rows = mDb["refine_ls_restr"].find(cif::key("type") == type and cif::key("pdbx_refine_id") == mExpMethod);
1281  if (rows.empty())
1282  storeRefineLsRestr(type, items);
1283  else
1284  {
1285  for (auto r : rows)
1286  {
1287  int capture = 0;
1288  for (auto item : items)
1289  {
1290  ++capture;
1291 
1292  std::string value = mM[capture].str();
1293  cif::trim(value);
1294  if (iequals(value, "NULL") or iequals(value, std::string(value.length(), '*')))
1295  value.clear();
1296 
1297  r[item] = value;
1298  }
1299 
1300  break;
1301  }
1302  }
1303 }
1304 
1305 // --------------------------------------------------------------------
1306 
1307 bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datablock &db)
1308 {
1309  // simple version, only for the first few lines
1310  auto getNextLine = [&]()
1311  {
1312  std::string result;
1313 
1314  while (result.empty() and r != nullptr and r->is("REMARK 3"))
1315  {
1316  result = r->vS(12);
1317  r = r->mNext;
1318  }
1319 
1320  return result;
1321  };
1322 
1323  // All remark 3 records should start with the same data.
1324 
1325  std::string line = getNextLine();
1326 
1327  if (line != "REFINEMENT.")
1328  {
1329  if (cif::VERBOSE > 0)
1330  std::cerr << "Unexpected data in REMARK 3" << std::endl;
1331  return false;
1332  }
1333 
1334  line = getNextLine();
1335 
1336  std::regex rxp(R"(^PROGRAM\s*:\s*(.+))");
1337  std::smatch m;
1338 
1339  if (not std::regex_match(line, m, rxp))
1340  {
1341  if (cif::VERBOSE > 0)
1342  std::cerr << "Expected valid PROGRAM line in REMARK 3" << std::endl;
1343  return false;
1344  }
1345 
1346  line = m[1].str();
1347 
1348  struct programScore
1349  {
1350  programScore(const std::string &program, Remark3Parser *parser, float score)
1351  : program(program)
1352  , parser(parser)
1353  , score(score)
1354  {
1355  }
1356 
1357  std::string program;
1358  std::unique_ptr<Remark3Parser> parser;
1359  float score;
1360 
1361  bool operator<(const programScore &rhs) const
1362  {
1363  return score > rhs.score;
1364  }
1365  };
1366 
1367  std::vector<programScore> scores;
1368 
1369  auto tryParser = [&](Remark3Parser *p)
1370  {
1371  std::unique_ptr<Remark3Parser> parser(p);
1372  float score;
1373 
1374  try
1375  {
1376  score = parser->parse();
1377  }
1378  catch (const std::exception &e)
1379  {
1380  if (cif::VERBOSE >= 0)
1381  std::cerr << "Error parsing REMARK 3 with " << parser->program() << std::endl
1382  << e.what() << '\n';
1383  score = 0;
1384  }
1385 
1386  if (cif::VERBOSE >= 2)
1387  std::cerr << "Score for " << parser->program() << ": " << score << std::endl;
1388 
1389  if (score > 0)
1390  {
1391  std::string program = parser->program();
1392  std::string version = parser->version();
1393 
1394  scores.emplace_back(program, parser.release(), score);
1395  }
1396  };
1397 
1398  for (auto program : cif::split<std::string>(line, ", ", true))
1399  {
1400  if (cif::starts_with(program, "BUSTER"))
1401  tryParser(new BUSTER_TNT_Remark3Parser(program, expMethod, r, db));
1402  else if (cif::starts_with(program, "CNS") or cif::starts_with(program, "CNX"))
1403  tryParser(new CNS_Remark3Parser(program, expMethod, r, db));
1404  else if (cif::starts_with(program, "PHENIX"))
1405  tryParser(new PHENIX_Remark3Parser(program, expMethod, r, db));
1406  else if (cif::starts_with(program, "NUCLSQ"))
1407  tryParser(new NUCLSQ_Remark3Parser(program, expMethod, r, db));
1408  else if (cif::starts_with(program, "PROLSQ"))
1409  tryParser(new PROLSQ_Remark3Parser(program, expMethod, r, db));
1410  else if (cif::starts_with(program, "REFMAC"))
1411  {
1412  // simply try both and take the best
1413  tryParser(new REFMAC_Remark3Parser(program, expMethod, r, db));
1414  tryParser(new REFMAC5_Remark3Parser(program, expMethod, r, db));
1415  }
1416  else if (cif::starts_with(program, "SHELXL"))
1417  tryParser(new SHELXL_Remark3Parser(program, expMethod, r, db));
1418  else if (cif::starts_with(program, "TNT"))
1419  tryParser(new TNT_Remark3Parser(program, expMethod, r, db));
1420  else if (cif::starts_with(program, "X-PLOR"))
1421  tryParser(new XPLOR_Remark3Parser(program, expMethod, r, db));
1422  else if (cif::VERBOSE > 0)
1423  std::cerr << "Skipping unknown program (" << program << ") in REMARK 3" << std::endl;
1424  }
1425 
1426  sort(scores.begin(), scores.end());
1427 
1428  bool guessProgram = scores.empty() or scores.front().score < 0.9f;
1429  if (guessProgram)
1430  {
1431  if (cif::VERBOSE > 0)
1432  std::cerr << "Unknown or untrusted program in REMARK 3, trying all parsers to see if there is a match" << std::endl;
1433 
1434  tryParser(new BUSTER_TNT_Remark3Parser("BUSTER-TNT", expMethod, r, db));
1435  tryParser(new CNS_Remark3Parser("CNS", expMethod, r, db));
1436  tryParser(new PHENIX_Remark3Parser("PHENIX", expMethod, r, db));
1437  tryParser(new NUCLSQ_Remark3Parser("NUCLSQ", expMethod, r, db));
1438  tryParser(new PROLSQ_Remark3Parser("PROLSQ", expMethod, r, db));
1439  tryParser(new REFMAC_Remark3Parser("REFMAC", expMethod, r, db));
1440  tryParser(new REFMAC5_Remark3Parser("REFMAC5", expMethod, r, db));
1441  tryParser(new SHELXL_Remark3Parser("SHELXL", expMethod, r, db));
1442  tryParser(new TNT_Remark3Parser("TNT", expMethod, r, db));
1443  tryParser(new XPLOR_Remark3Parser("X-PLOR", expMethod, r, db));
1444  }
1445 
1446  bool result = false;
1447 
1448  if (not scores.empty())
1449  {
1450  result = true;
1451 
1452  sort(scores.begin(), scores.end());
1453 
1454  auto &best = scores.front();
1455 
1456  if (cif::VERBOSE > 0)
1457  std::cerr << "Choosing " << best.parser->program() << " version '" << best.parser->version() << "' as refinement program. Score = " << best.score << std::endl;
1458 
1459  auto &software = db["software"];
1460  std::string program = best.parser->program();
1461  std::string version = best.parser->version();
1462 
1463  software.emplace({ { "name", program },
1464  { "classification", "refinement" },
1465  { "version", version },
1466  { "pdbx_ordinal", software.size() + 1 } });
1467 
1468  best.parser->fixup();
1469 
1470  for (auto &cat1 : best.parser->mDb)
1471  {
1472  auto &cat2 = db[cat1.name()];
1473 
1474  // copy only the values in the first row for the following categories
1475  if (cat1.name() == "reflns" or cat1.name() == "refine")
1476  {
1477  if (cat2.empty())
1478  cat2.emplace(cat1.front());
1479  else
1480  {
1481 
1482  auto r1 = cat1.front();
1483  auto r2 = cat2.front();
1484 
1485  for (auto column : cat1.key_fields())
1486  r2[column] = r1[column].text();
1487  }
1488  }
1489  else
1490  {
1491  for (auto rs : cat1)
1492  cat2.emplace(rs);
1493  }
1494  }
1495  }
1496 
1497  return result;
1498 }
1499 
1500 } // namespace pdbx
BUSTER_TNT_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
CNS_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
const TemplateLine kSHELXL_Template[]
XPLOR_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
void trim(std::string &s)
Definition: text.cpp:205
const TemplateLine kPHENIX_Template[]
std::initializer_list< const char * > items
bool iequals(std::string_view a, std::string_view b)
Definition: text.cpp:59
#define i
SHELXL_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
viol type
PROLSQ_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
TNT_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
const TemplateLine kREFMAC_Template[]
NUCLSQ_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
REFMAC_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
int VERBOSE
Definition: utilities.cpp:58
void sort(struct DCEL_T *dcel)
Definition: sorting.cpp:18
const TemplateLine kNUCLSQ_Template[]
#define j
int space
Definition: rwDM3.cpp:394
int m
const TemplateLine kBusterTNT_Template[]
REFMAC5_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
const TemplateLine kCNS_Template[]
PHENIX_Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db)
const TemplateLine kXPLOR_Template[]
const TemplateLine kREFMAC5_Template[]
float r2
int * n
const TemplateLine kTNT_Template[]
bool operator<(const SparseElement &_x, const SparseElement &_y)
float r1
const TemplateLine kPROLSQ_Template[]