27 #include "cif++/text.hpp" 40 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
41 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
42 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
43 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
44 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
45 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
46 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
47 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
48 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
49 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
50 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
51 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
52 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
53 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
54 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
55 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};
59 bool iequals(std::string_view
a, std::string_view
b)
61 bool result = a.length() == b.length();
62 for (
auto ai = a.begin(), bi = b.begin(); result and ai != a.end(); ++ai, ++bi)
63 result = kCharToLowerMap[uint8_t(*ai)] == kCharToLowerMap[uint8_t(*bi)];
71 for (; result and *a and *
b; ++
a, ++
b)
72 result = tolower(*a) == tolower(*b);
74 return result and *a == *
b;
80 auto ai = a.begin(), bi = b.begin();
82 for (; d == 0 and ai != a.end() and bi != b.end(); ++ai, ++bi)
83 d = tolower(*ai) - tolower(*bi);
89 else if (bi != b.end())
100 for (; d == 0 and *a != 0 and *b != 0; ++
a, ++
b)
101 d = tolower(*a) - tolower(*b);
122 std::string result(s);
123 for (
auto &
c : result)
131 c =
static_cast<char>(toupper(
c));
134 void replace_all(std::string &s, std::string_view what, std::string_view with)
136 for (std::string::size_type p = s.find(what); p != std::string::npos; p = s.find(what, p))
138 s.replace(p, what.length(), with);
151 while (e != s.begin())
153 auto pe = std::prev(e);
154 if (not std::isspace(*pe))
166 while (e != s.begin())
168 auto pe = std::prev(e);
169 if (not std::isspace(*pe))
174 return {s.begin(), e};
182 if (not std::isspace(*
b))
196 if (not std::isspace(*
b))
202 s.erase(s.begin(),
b);
221 throw std::runtime_error(
"empty tag");
223 throw std::runtime_error(
"tag '" + std::string { tag } +
"' does not start with underscore");
225 auto s = tag.find(
'.');
226 if (s == std::string::npos)
228 return std::tuple<std::string, std::string>{
"", tag.substr(1) };
230 return std::tuple<std::string, std::string>{tag.substr(1, s - 1), tag.substr(s + 1)};
239 if (number >= 26 * 26 * 26)
243 if (number >= 26 * 26)
245 int v = number / (26 * 26);
246 result += char(
'A' - 1 + v);
253 result += char(
'A' - 1 + v);
257 result += char(
'A' + number);
260 assert(not result.empty());
334 std::string::const_iterator
nextLineBreak(std::string::const_iterator text, std::string::const_iterator end)
348 const breakAction brkTable[27][27] = {
350 {PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, CPB, PBK, PBK, PBK, PBK, PBK, PBK},
351 {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
352 {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
353 {PBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
354 {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
355 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
356 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
357 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
358 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
359 {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
360 {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
361 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
362 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
363 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
364 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
365 {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
366 {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
367 {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
368 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, PBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
369 {DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK},
370 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
371 {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
372 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
373 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
374 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, DBK},
375 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
376 {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
379 uint8_t ch =
static_cast<uint8_t
>(*text);
387 cls = kASCII_LBTable[ch];
411 ncls = kASCII_LBTable[ch];
416 breakAction brk = brkTable[cls][ncls];
427 std::vector<std::string>
wrapLine(
const std::string &text,
size_t width)
429 std::vector<std::string> result;
430 std::vector<size_t> offsets = {0};
432 auto b = text.begin();
433 while (
b != text.end())
437 offsets.push_back(e - text.begin());
442 size_t count = offsets.size() - 1;
444 std::vector<size_t> minima(count + 1, 1000000);
446 std::vector<size_t> breaks(count + 1, 0);
448 for (
size_t i = 0;
i < count; ++
i)
453 size_t w = offsets[
j] - offsets[
i];
458 while (w > 0 and isspace(text[offsets[
i] + w - 1]))
461 size_t cost = minima[
i];
463 cost += (width -
w) * (width - w);
465 if (cost < minima[j])
478 size_t i = breaks[
j];
479 result.push_back(text.substr(offsets[i], offsets[j] - offsets[i]));
483 reverse(result.begin(), result.end());
488 std::vector<std::string>
word_wrap(
const std::string &text,
size_t width)
490 std::vector<std::string> result;
491 for (
auto p : cif::split<std::string>(text,
"\n"))
495 result.push_back(
"");
500 result.insert(result.end(), lines.begin(), lines.end());
void to_upper(std::string &s)
void to_lower(std::string &s)
std::string trim_copy(std::string_view s)
void replace_all(std::string &s, std::string_view what, std::string_view with)
void trim(std::string &s)
const LineBreakClass kASCII_LBTable[128]
bool icontains(std::string_view s, std::string_view q)
void trim_left(std::string &s)
bool iequals(std::string_view a, std::string_view b)
std::string to_lower_copy(std::string_view s)
std::string trim_left_copy(std::string_view s)
std::tuple< std::string, std::string > split_tag_name(std::string_view tag)
std::vector< std::string > word_wrap(const std::string &text, size_t width)
std::string trim_right_copy(std::string_view s)
std::vector< std::string > wrapLine(const std::string &text, size_t width)
const uint8_t kCharToLowerMap[256]
std::string cif_id_for_number(int number)
void trim_right(std::string &s)
std::string::const_iterator nextLineBreak(std::string::const_iterator text, std::string::const_iterator end)
std::string to_string(bond_type bondType)
int icompare(std::string_view a, std::string_view b)