src/util/string.h

   1 /*
   2 Minetest
   3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
   4
   5 This program is free software; you can redistribute it and/or modify
   6 it under the terms of the GNU Lesser General Public License as published by
   7 the Free Software Foundation; either version 2.1 of the License, or
   8 (at your option) any later version.
   9
  10 This program is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 GNU Lesser General Public License for more details.
  14
  15 You should have received a copy of the GNU Lesser General Public License along
  16 with this program; if not, write to the Free Software Foundation, Inc.,
  17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18 */
  19
  20 #pragma once
  21
  22 #include "irrlichttypes_bloated.h"
  23 #include <cstdlib>
  24 #include <string>
  25 #include <cstring>
  26 #include <vector>
  27 #include <map>
  28 #include <sstream>
  29 #include <iomanip>
  30 #include <cctype>
  31 #include <unordered_map>
  32
  33 #define STRINGIFY(x) #x
  34 #define TOSTRING(x) STRINGIFY(x)
  35
  36 // Checks whether a value is an ASCII printable character
  37 #define IS_ASCII_PRINTABLE_CHAR(x)   \
  38         (((unsigned int)(x) >= 0x20) &&  \
  39         ( (unsigned int)(x) <= 0x7e))
  40
  41 // Checks whether a byte is an inner byte for an utf-8 multibyte sequence
  42 #define IS_UTF8_MULTB_INNER(x)       \
  43         (((unsigned char)(x) >= 0x80) && \
  44         ( (unsigned char)(x) <= 0xbf))
  45
  46 // Checks whether a byte is a start byte for an utf-8 multibyte sequence
  47 #define IS_UTF8_MULTB_START(x)       \
  48         (((unsigned char)(x) >= 0xc2) && \
  49         ( (unsigned char)(x) <= 0xf4))
  50
  51 // Given a start byte x for an utf-8 multibyte sequence
  52 // it gives the length of the whole sequence in bytes.
  53 #define UTF8_MULTB_START_LEN(x)            \
  54         (((unsigned char)(x) < 0xe0) ? 2 :     \
  55         (((unsigned char)(x) < 0xf0) ? 3 : 4))
  56
  57 typedef std::unordered_map<std::string, std::string> StringMap;
  58
  59 struct FlagDesc {
  60         const char *name;
  61         u32 flag;
  62 };
  63
  64 // try not to convert between wide/utf8 encodings; this can result in data loss
  65 // try to only convert between them when you need to input/output stuff via Irrlicht
  66 std::wstring utf8_to_wide(const std::string &input);
  67 std::string wide_to_utf8(const std::wstring &input);
  68
  69 wchar_t *utf8_to_wide_c(const char *str);
  70
  71 // NEVER use those two functions unless you have a VERY GOOD reason to
  72 // they just convert between wide and multibyte encoding
  73 // multibyte encoding depends on current locale, this is no good, especially on Windows
  74
  75 // You must free the returned string!
  76 // The returned string is allocated using new
  77 wchar_t *narrow_to_wide_c(const char *str);
  78 std::wstring narrow_to_wide(const std::string &mbs);
  79 std::string wide_to_narrow(const std::wstring &wcs);
  80
  81 std::string urlencode(const std::string &str);
  82 std::string urldecode(const std::string &str);
  83 u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask);
  84 std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask);
  85 size_t mystrlcpy(char *dst, const char *src, size_t size);
  86 char *mystrtok_r(char *s, const char *sep, char **lasts);
  87 u64 read_seed(const char *str);
  88 bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
  89                 unsigned char default_alpha = 0xff);
  90
  91
  92 /**
  93  * Returns a copy of \p str with spaces inserted at the right hand side to ensure
  94  * that the string is \p len characters in length. If \p str is <= \p len then the
  95  * returned string will be identical to str.
  96  */
  97 inline std::string padStringRight(std::string str, size_t len)
  98 {
  99         if (len > str.size())
 100                 str.insert(str.end(), len - str.size(), ' ');
 101
 102         return str;
 103 }
 104
 105 /**
 106  * Returns a version of \p str with the first occurrence of a string
 107  * contained within ends[] removed from the end of the string.
 108  *
 109  * @param str
 110  * @param ends A NULL- or ""- terminated array of strings to remove from s in
 111  *      the copy produced.  Note that once one of these strings is removed
 112  *      that no further postfixes contained within this array are removed.
 113  *
 114  * @return If no end could be removed then "" is returned.
 115  */
 116 inline std::string removeStringEnd(const std::string &str,
 117                 const char *ends[])
 118 {
 119         const char **p = ends;
 120
 121         for (; *p && (*p)[0] != '\0'; p++) {
 122                 std::string end = *p;
 123                 if (str.size() < end.size())
 124                         continue;
 125                 if (str.compare(str.size() - end.size(), end.size(), end) == 0)
 126                         return str.substr(0, str.size() - end.size());
 127         }
 128
 129         return "";
 130 }
 131
 132
 133 /**
 134  * Check two strings for equivalence.  If \p case_insensitive is true
 135  * then the case of the strings is ignored (default is false).
 136  *
 137  * @param s1
 138  * @param s2
 139  * @param case_insensitive
 140  * @return true if the strings match
 141  */
 142 template <typename T>
 143 inline bool str_equal(const std::basic_string<T> &s1,
 144                 const std::basic_string<T> &s2,
 145                 bool case_insensitive = false)
 146 {
 147         if (!case_insensitive)
 148                 return s1 == s2;
 149
 150         if (s1.size() != s2.size())
 151                 return false;
 152
 153         for (size_t i = 0; i < s1.size(); ++i)
 154                 if(tolower(s1[i]) != tolower(s2[i]))
 155                         return false;
 156
 157         return true;
 158 }
 159
 160
 161 /**
 162  * Check whether \p str begins with the string prefix. If \p case_insensitive
 163  * is true then the check is case insensitve (default is false; i.e. case is
 164  * significant).
 165  *
 166  * @param str
 167  * @param prefix
 168  * @param case_insensitive
 169  * @return true if the str begins with prefix
 170  */
 171 template <typename T>
 172 inline bool str_starts_with(const std::basic_string<T> &str,
 173                 const std::basic_string<T> &prefix,
 174                 bool case_insensitive = false)
 175 {
 176         if (str.size() < prefix.size())
 177                 return false;
 178
 179         if (!case_insensitive)
 180                 return str.compare(0, prefix.size(), prefix) == 0;
 181
 182         for (size_t i = 0; i < prefix.size(); ++i)
 183                 if (tolower(str[i]) != tolower(prefix[i]))
 184                         return false;
 185         return true;
 186 }
 187
 188 /**
 189  * Check whether \p str begins with the string prefix. If \p case_insensitive
 190  * is true then the check is case insensitve (default is false; i.e. case is
 191  * significant).
 192  *
 193  * @param str
 194  * @param prefix
 195  * @param case_insensitive
 196  * @return true if the str begins with prefix
 197  */
 198 template <typename T>
 199 inline bool str_starts_with(const std::basic_string<T> &str,
 200                 const T *prefix,
 201                 bool case_insensitive = false)
 202 {
 203         return str_starts_with(str, std::basic_string<T>(prefix),
 204                         case_insensitive);
 205 }
 206
 207
 208 /**
 209  * Check whether \p str ends with the string suffix. If \p case_insensitive
 210  * is true then the check is case insensitve (default is false; i.e. case is
 211  * significant).
 212  *
 213  * @param str
 214  * @param suffix
 215  * @param case_insensitive
 216  * @return true if the str begins with suffix
 217  */
 218 template <typename T>
 219 inline bool str_ends_with(const std::basic_string<T> &str,
 220                 const std::basic_string<T> &suffix,
 221                 bool case_insensitive = false)
 222 {
 223         if (str.size() < suffix.size())
 224                 return false;
 225
 226         size_t start = str.size() - suffix.size();
 227         if (!case_insensitive)
 228                 return str.compare(start, suffix.size(), suffix) == 0;
 229
 230         for (size_t i = 0; i < suffix.size(); ++i)
 231                 if (tolower(str[start + i]) != tolower(suffix[i]))
 232                         return false;
 233         return true;
 234 }
 235
 236
 237 /**
 238  * Check whether \p str ends with the string suffix. If \p case_insensitive
 239  * is true then the check is case insensitve (default is false; i.e. case is
 240  * significant).
 241  *
 242  * @param str
 243  * @param suffix
 244  * @param case_insensitive
 245  * @return true if the str begins with suffix
 246  */
 247 template <typename T>
 248 inline bool str_ends_with(const std::basic_string<T> &str,
 249                 const T *suffix,
 250                 bool case_insensitive = false)
 251 {
 252         return str_ends_with(str, std::basic_string<T>(suffix),
 253                         case_insensitive);
 254 }
 255
 256
 257 /**
 258  * Splits a string into its component parts separated by the character
 259  * \p delimiter.
 260  *
 261  * @return An std::vector<std::basic_string<T> > of the component parts
 262  */
 263 template <typename T>
 264 inline std::vector<std::basic_string<T> > str_split(
 265                 const std::basic_string<T> &str,
 266                 T delimiter)
 267 {
 268         std::vector<std::basic_string<T> > parts;
 269         std::basic_stringstream<T> sstr(str);
 270         std::basic_string<T> part;
 271
 272         while (std::getline(sstr, part, delimiter))
 273                 parts.push_back(part);
 274
 275         return parts;
 276 }
 277
 278
 279 /**
 280  * @param str
 281  * @return A copy of \p str converted to all lowercase characters.
 282  */
 283 inline std::string lowercase(const std::string &str)
 284 {
 285         std::string s2;
 286
 287         s2.reserve(str.size());
 288
 289         for (char i : str)
 290                 s2 += tolower(i);
 291
 292         return s2;
 293 }
 294
 295
 296 /**
 297  * @param str
 298  * @return A copy of \p str with leading and trailing whitespace removed.
 299  */
 300 inline std::string trim(const std::string &str)
 301 {
 302         size_t front = 0;
 303
 304         while (std::isspace(str[front]))
 305                 ++front;
 306
 307         size_t back = str.size();
 308         while (back > front && std::isspace(str[back - 1]))
 309                 --back;
 310
 311         return str.substr(front, back - front);
 312 }
 313
 314
 315 /**
 316  * Returns whether \p str should be regarded as (bool) true.  Case and leading
 317  * and trailing whitespace are ignored.  Values that will return
 318  * true are "y", "yes", "true" and any number that is not 0.
 319  * @param str
 320  */
 321 inline bool is_yes(const std::string &str)
 322 {
 323         std::string s2 = lowercase(trim(str));
 324
 325         return s2 == "y" || s2 == "yes" || s2 == "true" || atoi(s2.c_str()) != 0;
 326 }
 327
 328
 329 /**
 330  * Converts the string \p str to a signed 32-bit integer. The converted value
 331  * is constrained so that min <= value <= max.
 332  *
 333  * @see atoi(3) for limitations
 334  *
 335  * @param str
 336  * @param min Range minimum
 337  * @param max Range maximum
 338  * @return The value converted to a signed 32-bit integer and constrained
 339  *      within the range defined by min and max (inclusive)
 340  */
 341 inline s32 mystoi(const std::string &str, s32 min, s32 max)
 342 {
 343         s32 i = atoi(str.c_str());
 344
 345         if (i < min)
 346                 i = min;
 347         if (i > max)
 348                 i = max;
 349
 350         return i;
 351 }
 352
 353
 354 // MSVC2010 includes it's own versions of these
 355 //#if !defined(_MSC_VER) || _MSC_VER < 1600
 356
 357
 358 /**
 359  * Returns a 32-bit value reprensented by the string \p str (decimal).
 360  * @see atoi(3) for further limitations
 361  */
 362 inline s32 mystoi(const std::string &str)
 363 {
 364         return atoi(str.c_str());
 365 }
 366
 367
 368 /**
 369  * Returns s 32-bit value represented by the wide string \p str (decimal).
 370  * @see atoi(3) for further limitations
 371  */
 372 inline s32 mystoi(const std::wstring &str)
 373 {
 374         return mystoi(wide_to_narrow(str));
 375 }
 376
 377
 378 /**
 379  * Returns a float reprensented by the string \p str (decimal).
 380  * @see atof(3)
 381  */
 382 inline float mystof(const std::string &str)
 383 {
 384         return atof(str.c_str());
 385 }
 386
 387 //#endif
 388
 389 #define stoi mystoi
 390 #define stof mystof
 391
 392 /// Returns a value represented by the string \p val.
 393 template <typename T>
 394 inline T from_string(const std::string &str)
 395 {
 396         std::stringstream tmp(str);
 397         T t;
 398         tmp >> t;
 399         return t;
 400 }
 401
 402 /// Returns a 64-bit signed value represented by the string \p str (decimal).
 403 inline s64 stoi64(const std::string &str) { return from_string<s64>(str); }
 404
 405 #if __cplusplus < 201103L
 406 namespace std {
 407
 408 /// Returns a string representing the value \p val.
 409 template <typename T>
 410 inline string to_string(T val)
 411 {
 412         ostringstream oss;
 413         oss << val;
 414         return oss.str();
 415 }
 416 #define DEFINE_STD_TOSTRING_FLOATINGPOINT(T)            \
 417         template <>                                     \
 418         inline string to_string<T>(T val)               \
 419         {                                               \
 420                 ostringstream oss;                      \
 421                 oss << std::fixed                       \
 422                         << std::setprecision(6)         \
 423                         << val;                         \
 424                 return oss.str();                       \
 425         }
 426 DEFINE_STD_TOSTRING_FLOATINGPOINT(float)
 427 DEFINE_STD_TOSTRING_FLOATINGPOINT(double)
 428 DEFINE_STD_TOSTRING_FLOATINGPOINT(long double)
 429
 430 #undef DEFINE_STD_TOSTRING_FLOATINGPOINT
 431
 432 /// Returns a wide string representing the value \p val
 433 template <typename T>
 434 inline wstring to_wstring(T val)
 435 {
 436       return utf8_to_wide(to_string(val));
 437 }
 438 }
 439 #endif
 440
 441 /// Returns a string representing the decimal value of the 32-bit value \p i.
 442 inline std::string itos(s32 i) { return std::to_string(i); }
 443 /// Returns a string representing the decimal value of the 64-bit value \p i.
 444 inline std::string i64tos(s64 i) { return std::to_string(i); }
 445
 446 // std::to_string uses the '%.6f' conversion, which is inconsistent with
 447 // std::ostream::operator<<() and impractical too.  ftos() uses the
 448 // more generic and std::ostream::operator<<()-compatible '%G' format.
 449 /// Returns a string representing the decimal value of the float value \p f.
 450 inline std::string ftos(float f)
 451 {
 452         std::ostringstream oss;
 453         oss << f;
 454         return oss.str();
 455 }
 456
 457
 458 /**
 459  * Replace all occurrences of \p pattern in \p str with \p replacement.
 460  *
 461  * @param str String to replace pattern with replacement within.
 462  * @param pattern The pattern to replace.
 463  * @param replacement What to replace the pattern with.
 464  */
 465 inline void str_replace(std::string &str, const std::string &pattern,
 466                 const std::string &replacement)
 467 {
 468         std::string::size_type start = str.find(pattern, 0);
 469         while (start != str.npos) {
 470                 str.replace(start, pattern.size(), replacement);
 471                 start = str.find(pattern, start + replacement.size());
 472         }
 473 }
 474
 475 /**
 476  * Escapes characters [ ] \ , ; that can not be used in formspecs
 477  */
 478 inline void str_formspec_escape(std::string &str)
 479 {
 480         str_replace(str, "\\", "\\\\");
 481         str_replace(str, "]", "\\]");
 482         str_replace(str, "[", "\\[");
 483         str_replace(str, ";", "\\;");
 484         str_replace(str, ",", "\\,");
 485 }
 486
 487 /**
 488  * Replace all occurrences of the character \p from in \p str with \p to.
 489  *
 490  * @param str The string to (potentially) modify.
 491  * @param from The character in str to replace.
 492  * @param to The replacement character.
 493  */
 494 void str_replace(std::string &str, char from, char to);
 495
 496
 497 /**
 498  * Check that a string only contains whitelisted characters. This is the
 499  * opposite of string_allowed_blacklist().
 500  *
 501  * @param str The string to be checked.
 502  * @param allowed_chars A string containing permitted characters.
 503  * @return true if the string is allowed, otherwise false.
 504  *
 505  * @see string_allowed_blacklist()
 506  */
 507 inline bool string_allowed(const std::string &str, const std::string &allowed_chars)
 508 {
 509         return str.find_first_not_of(allowed_chars) == str.npos;
 510 }
 511
 512
 513 /**
 514  * Check that a string contains no blacklisted characters. This is the
 515  * opposite of string_allowed().
 516  *
 517  * @param str The string to be checked.
 518  * @param blacklisted_chars A string containing prohibited characters.
 519  * @return true if the string is allowed, otherwise false.
 520
 521  * @see string_allowed()
 522  */
 523 inline bool string_allowed_blacklist(const std::string &str,
 524                 const std::string &blacklisted_chars)
 525 {
 526         return str.find_first_of(blacklisted_chars) == str.npos;
 527 }
 528
 529
 530 /**
 531  * Create a string based on \p from where a newline is forcefully inserted
 532  * every \p row_len characters.
 533  *
 534  * @note This function does not honour word wraps and blindy inserts a newline
 535  *      every \p row_len characters whether it breaks a word or not.  It is
 536  *      intended to be used for, for example, showing paths in the GUI.
 537  *
 538  * @note This function doesn't wrap inside utf-8 multibyte sequences and also
 539  *      counts multibyte sequences correcly as single characters.
 540  *
 541  * @param from The (utf-8) string to be wrapped into rows.
 542  * @param row_len The row length (in characters).
 543  * @return A new string with the wrapping applied.
 544  */
 545 inline std::string wrap_rows(const std::string &from,
 546                 unsigned row_len)
 547 {
 548         std::string to;
 549
 550         size_t character_idx = 0;
 551         for (size_t i = 0; i < from.size(); i++) {
 552                 if (!IS_UTF8_MULTB_INNER(from[i])) {
 553                         // Wrap string after last inner byte of char
 554                         if (character_idx > 0 && character_idx % row_len == 0)
 555                                 to += '\n';
 556                         character_idx++;
 557                 }
 558                 to += from[i];
 559         }
 560
 561         return to;
 562 }
 563
 564
 565 /**
 566  * Removes backslashes from an escaped string (FormSpec strings)
 567  */
 568 template <typename T>
 569 inline std::basic_string<T> unescape_string(const std::basic_string<T> &s)
 570 {
 571         std::basic_string<T> res;
 572
 573         for (size_t i = 0; i < s.length(); i++) {
 574                 if (s[i] == '\\') {
 575                         i++;
 576                         if (i >= s.length())
 577                                 break;
 578                 }
 579                 res += s[i];
 580         }
 581
 582         return res;
 583 }
 584
 585 /**
 586  * Remove all escape sequences in \p s.
 587  *
 588  * @param s The string in which to remove escape sequences.
 589  * @return \p s, with escape sequences removed.
 590  */
 591 template <typename T>
 592 std::basic_string<T> unescape_enriched(const std::basic_string<T> &s)
 593 {
 594         std::basic_string<T> output;
 595         size_t i = 0;
 596         while (i < s.length()) {
 597                 if (s[i] == '\x1b') {
 598                         ++i;
 599                         if (i == s.length()) continue;
 600                         if (s[i] == '(') {
 601                                 ++i;
 602                                 while (i < s.length() && s[i] != ')') {
 603                                         if (s[i] == '\\') {
 604                                                 ++i;
 605                                         }
 606                                         ++i;
 607                                 }
 608                                 ++i;
 609                         } else {
 610                                 ++i;
 611                         }
 612                         continue;
 613                 }
 614                 output += s[i];
 615                 ++i;
 616         }
 617         return output;
 618 }
 619
 620 template <typename T>
 621 std::vector<std::basic_string<T> > split(const std::basic_string<T> &s, T delim)
 622 {
 623         std::vector<std::basic_string<T> > tokens;
 624
 625         std::basic_string<T> current;
 626         bool last_was_escape = false;
 627         for (size_t i = 0; i < s.length(); i++) {
 628                 T si = s[i];
 629                 if (last_was_escape) {
 630                         current += '\\';
 631                         current += si;
 632                         last_was_escape = false;
 633                 } else {
 634                         if (si == delim) {
 635                                 tokens.push_back(current);
 636                                 current = std::basic_string<T>();
 637                                 last_was_escape = false;
 638                         } else if (si == '\\') {
 639                                 last_was_escape = true;
 640                         } else {
 641                                 current += si;
 642                                 last_was_escape = false;
 643                         }
 644                 }
 645         }
 646         //push last element
 647         tokens.push_back(current);
 648
 649         return tokens;
 650 }
 651
 652 std::wstring translate_string(const std::wstring &s);
 653
 654 inline std::wstring unescape_translate(const std::wstring &s) {
 655         return unescape_enriched(translate_string(s));
 656 }
 657
 658 /**
 659  * Checks that all characters in \p to_check are a decimal digits.
 660  *
 661  * @param to_check
 662  * @return true if to_check is not empty and all characters in to_check are
 663  *      decimal digits, otherwise false
 664  */
 665 inline bool is_number(const std::string &to_check)
 666 {
 667         for (char i : to_check)
 668                 if (!std::isdigit(i))
 669                         return false;
 670
 671         return !to_check.empty();
 672 }
 673
 674
 675 /**
 676  * Returns a C-string, either "true" or "false", corresponding to \p val.
 677  *
 678  * @return If \p val is true, then "true" is returned, otherwise "false".
 679  */
 680 inline const char *bool_to_cstr(bool val)
 681 {
 682         return val ? "true" : "false";
 683 }
 684
 685 inline const std::string duration_to_string(int sec)
 686 {
 687         int min = sec / 60;
 688         sec %= 60;
 689         int hour = min / 60;
 690         min %= 60;
 691
 692         std::stringstream ss;
 693         if (hour > 0) {
 694                 ss << hour << "h ";
 695         }
 696
 697         if (min > 0) {
 698                 ss << min << "m ";
 699         }
 700
 701         if (sec > 0) {
 702                 ss << sec << "s ";
 703         }
 704
 705         return ss.str();
 706 }
 707
 708 /**
 709  * Joins a vector of strings by the string \p delimiter.
 710  *
 711  * @return A std::string
 712  */
 713 inline std::string str_join(const std::vector<std::string> &list,
 714                 const std::string &delimiter)
 715 {
 716         std::ostringstream oss;
 717         bool first = true;
 718         for (const auto &part : list) {
 719                 if (!first)
 720                         oss << delimiter;
 721                 oss << part;
 722                 first = false;
 723         }
 724         return oss.str();
 725 }