src/irrlicht_changes/irrUString.h

   1 /*
   2    Basic Unicode string class for Irrlicht.
   3    Copyright (c) 2009-2011 John Norman
   4
   5    This software is provided 'as-is', without any express or implied
   6    warranty. In no event will the authors be held liable for any
   7    damages arising from the use of this software.
   8
   9    Permission is granted to anyone to use this software for any
  10    purpose, including commercial applications, and to alter it and
  11    redistribute it freely, subject to the following restrictions:
  12
  13    1. The origin of this software must not be misrepresented; you
  14       must not claim that you wrote the original software. If you use
  15       this software in a product, an acknowledgment in the product
  16       documentation would be appreciated but is not required.
  17
  18    2. Altered source versions must be plainly marked as such, and
  19       must not be misrepresented as being the original software.
  20
  21    3. This notice may not be removed or altered from any source
  22       distribution.
  23
  24    The original version of this class can be located at:
  25    http://irrlicht.suckerfreegames.com/
  26
  27    John Norman
  28    john@suckerfreegames.com
  29 */
  30
  31 #ifndef __IRR_USTRING_H_INCLUDED__
  32 #define __IRR_USTRING_H_INCLUDED__
  33
  34 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
  35 #       define USTRING_CPP0X
  36 #       if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
  37 #               define USTRING_CPP0X_NEWLITERALS
  38 #       endif
  39 #endif
  40
  41 #include <stdio.h>
  42 #include <string.h>
  43 #include <stdlib.h>
  44 #include <cstddef>
  45
  46 #ifdef _WIN32
  47 #define __BYTE_ORDER 0
  48 #define __LITTLE_ENDIAN 0
  49 #define __BIG_ENDIAN 1
  50 #elif defined(__MACH__) && defined(__APPLE__)
  51 #include <machine/endian.h>
  52 #elif defined(__FreeBSD__)
  53 #include <sys/endian.h>
  54 #else
  55 #include <endian.h>
  56 #endif
  57
  58 #ifdef USTRING_CPP0X
  59 #       include <utility>
  60 #endif
  61
  62 #ifndef USTRING_NO_STL
  63 #       include <string>
  64 #       include <iterator>
  65 #       include <ostream>
  66 #endif
  67
  68 #include "irrTypes.h"
  69 #include "irrAllocator.h"
  70 #include "irrArray.h"
  71 #include "irrMath.h"
  72 #include "irrString.h"
  73 #include "path.h"
  74
  75 //! UTF-16 surrogate start values.
  76 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
  77 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
  78
  79 //! Is a UTF-16 code point a surrogate?
  80 #define UTF16_IS_SURROGATE(c)           (((c) & 0xF800) == 0xD800)
  81 #define UTF16_IS_SURROGATE_HI(c)        (((c) & 0xFC00) == 0xD800)
  82 #define UTF16_IS_SURROGATE_LO(c)        (((c) & 0xFC00) == 0xDC00)
  83
  84
  85 namespace irr
  86 {
  87
  88         // Define our character types.
  89 #ifdef USTRING_CPP0X_NEWLITERALS        // C++0x
  90         typedef char32_t uchar32_t;
  91         typedef char16_t uchar16_t;
  92         typedef char uchar8_t;
  93 #else
  94         typedef u32 uchar32_t;
  95         typedef u16 uchar16_t;
  96         typedef u8 uchar8_t;
  97 #endif
  98
  99 namespace core
 100 {
 101
 102 namespace unicode
 103 {
 104
 105 //! The unicode replacement character.  Used to replace invalid characters.
 106 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
 107
 108 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
 109 //! \param high The high value of the pair.
 110 //! \param low The low value of the pair.
 111 //! \return The UTF-32 character expressed by the surrogate pair.
 112 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
 113 {
 114         // Convert the surrogate pair into a single UTF-32 character.
 115         uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
 116         uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
 117         return (wu << 16) | x;
 118 }
 119
 120 //! Swaps the endianness of a 16-bit value.
 121 //! \return The new value.
 122 inline uchar16_t swapEndian16(const uchar16_t& c)
 123 {
 124         return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
 125 }
 126
 127 //! Swaps the endianness of a 32-bit value.
 128 //! \return The new value.
 129 inline uchar32_t swapEndian32(const uchar32_t& c)
 130 {
 131         return  ((c >> 24) & 0x000000FF) |
 132                         ((c >> 8)  & 0x0000FF00) |
 133                         ((c << 8)  & 0x00FF0000) |
 134                         ((c << 24) & 0xFF000000);
 135 }
 136
 137 //! The Unicode byte order mark.
 138 const u16 BOM = 0xFEFF;
 139
 140 //! The size of the Unicode byte order mark in terms of the Unicode character size.
 141 const u8 BOM_UTF8_LEN = 3;
 142 const u8 BOM_UTF16_LEN = 1;
 143 const u8 BOM_UTF32_LEN = 1;
 144
 145 //! Unicode byte order marks for file operations.
 146 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
 147 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
 148 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
 149 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
 150 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
 151
 152 //! The size in bytes of the Unicode byte marks for file operations.
 153 const u8 BOM_ENCODE_UTF8_LEN = 3;
 154 const u8 BOM_ENCODE_UTF16_LEN = 2;
 155 const u8 BOM_ENCODE_UTF32_LEN = 4;
 156
 157 //! Unicode encoding type.
 158 enum EUTF_ENCODE
 159 {
 160         EUTFE_NONE              = 0,
 161         EUTFE_UTF8,
 162         EUTFE_UTF16,
 163         EUTFE_UTF16_LE,
 164         EUTFE_UTF16_BE,
 165         EUTFE_UTF32,
 166         EUTFE_UTF32_LE,
 167         EUTFE_UTF32_BE
 168 };
 169
 170 //! Unicode endianness.
 171 enum EUTF_ENDIAN
 172 {
 173         EUTFEE_NATIVE   = 0,
 174         EUTFEE_LITTLE,
 175         EUTFEE_BIG
 176 };
 177
 178 //! Returns the specified unicode byte order mark in a byte array.
 179 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
 180 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
 181                 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
 182 //! \return An array that contains a byte order mark.
 183 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
 184 {
 185 #define COPY_ARRAY(source, size) \
 186         memcpy(ret.pointer(), source, size); \
 187         ret.set_used(size)
 188
 189         core::array<u8> ret(4);
 190         switch (mode)
 191         {
 192                 case EUTFE_UTF8:
 193                         COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
 194                         break;
 195                 case EUTFE_UTF16:
 196                         #ifdef __BIG_ENDIAN__
 197                                 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
 198                         #else
 199                                 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
 200                         #endif
 201                         break;
 202                 case EUTFE_UTF16_BE:
 203                         COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
 204                         break;
 205                 case EUTFE_UTF16_LE:
 206                         COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
 207                         break;
 208                 case EUTFE_UTF32:
 209                         #ifdef __BIG_ENDIAN__
 210                                 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
 211                         #else
 212                                 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
 213                         #endif
 214                         break;
 215                 case EUTFE_UTF32_BE:
 216                         COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
 217                         break;
 218                 case EUTFE_UTF32_LE:
 219                         COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
 220                         break;
 221                 case EUTFE_NONE:
 222                         // TODO sapier: fixed warning only,
 223                         // don't know if something needs to be done here
 224                         break;
 225         }
 226         return ret;
 227
 228 #undef COPY_ARRAY
 229 }
 230
 231 //! Detects if the given data stream starts with a unicode BOM.
 232 //! \param data The data stream to check.
 233 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
 234 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
 235 {
 236         if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
 237         if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
 238         if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
 239         if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
 240         if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
 241         return EUTFE_NONE;
 242 }
 243
 244 } // end namespace unicode
 245
 246
 247 //! UTF-16 string class.
 248 template <typename TAlloc = irrAllocator<uchar16_t> >
 249 class ustring16
 250 {
 251 public:
 252
 253         ///------------------///
 254         /// iterator classes ///
 255         ///------------------///
 256
 257         //! Access an element in a unicode string, allowing one to change it.
 258         class _ustring16_iterator_access
 259         {
 260                 public:
 261                         _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
 262
 263                         //! Allow the class to be interpreted as a single UTF-32 character.
 264                         operator uchar32_t() const
 265                         {
 266                                 return _get();
 267                         }
 268
 269                         //! Allow one to change the character in the unicode string.
 270                         //! \param c The new character to use.
 271                         //! \return Myself.
 272                         _ustring16_iterator_access& operator=(const uchar32_t c)
 273                         {
 274                                 _set(c);
 275                                 return *this;
 276                         }
 277
 278                         //! Increments the value by 1.
 279                         //! \return Myself.
 280                         _ustring16_iterator_access& operator++()
 281                         {
 282                                 _set(_get() + 1);
 283                                 return *this;
 284                         }
 285
 286                         //! Increments the value by 1, returning the old value.
 287                         //! \return A unicode character.
 288                         uchar32_t operator++(int)
 289                         {
 290                                 uchar32_t old = _get();
 291                                 _set(old + 1);
 292                                 return old;
 293                         }
 294
 295                         //! Decrements the value by 1.
 296                         //! \return Myself.
 297                         _ustring16_iterator_access& operator--()
 298                         {
 299                                 _set(_get() - 1);
 300                                 return *this;
 301                         }
 302
 303                         //! Decrements the value by 1, returning the old value.
 304                         //! \return A unicode character.
 305                         uchar32_t operator--(int)
 306                         {
 307                                 uchar32_t old = _get();
 308                                 _set(old - 1);
 309                                 return old;
 310                         }
 311
 312                         //! Adds to the value by a specified amount.
 313                         //! \param val The amount to add to this character.
 314                         //! \return Myself.
 315                         _ustring16_iterator_access& operator+=(int val)
 316                         {
 317                                 _set(_get() + val);
 318                                 return *this;
 319                         }
 320
 321                         //! Subtracts from the value by a specified amount.
 322                         //! \param val The amount to subtract from this character.
 323                         //! \return Myself.
 324                         _ustring16_iterator_access& operator-=(int val)
 325                         {
 326                                 _set(_get() - val);
 327                                 return *this;
 328                         }
 329
 330                         //! Multiples the value by a specified amount.
 331                         //! \param val The amount to multiply this character by.
 332                         //! \return Myself.
 333                         _ustring16_iterator_access& operator*=(int val)
 334                         {
 335                                 _set(_get() * val);
 336                                 return *this;
 337                         }
 338
 339                         //! Divides the value by a specified amount.
 340                         //! \param val The amount to divide this character by.
 341                         //! \return Myself.
 342                         _ustring16_iterator_access& operator/=(int val)
 343                         {
 344                                 _set(_get() / val);
 345                                 return *this;
 346                         }
 347
 348                         //! Modulos the value by a specified amount.
 349                         //! \param val The amount to modulo this character by.
 350                         //! \return Myself.
 351                         _ustring16_iterator_access& operator%=(int val)
 352                         {
 353                                 _set(_get() % val);
 354                                 return *this;
 355                         }
 356
 357                         //! Adds to the value by a specified amount.
 358                         //! \param val The amount to add to this character.
 359                         //! \return A unicode character.
 360                         uchar32_t operator+(int val) const
 361                         {
 362                                 return _get() + val;
 363                         }
 364
 365                         //! Subtracts from the value by a specified amount.
 366                         //! \param val The amount to subtract from this character.
 367                         //! \return A unicode character.
 368                         uchar32_t operator-(int val) const
 369                         {
 370                                 return _get() - val;
 371                         }
 372
 373                         //! Multiplies the value by a specified amount.
 374                         //! \param val The amount to multiply this character by.
 375                         //! \return A unicode character.
 376                         uchar32_t operator*(int val) const
 377                         {
 378                                 return _get() * val;
 379                         }
 380
 381                         //! Divides the value by a specified amount.
 382                         //! \param val The amount to divide this character by.
 383                         //! \return A unicode character.
 384                         uchar32_t operator/(int val) const
 385                         {
 386                                 return _get() / val;
 387                         }
 388
 389                         //! Modulos the value by a specified amount.
 390                         //! \param val The amount to modulo this character by.
 391                         //! \return A unicode character.
 392                         uchar32_t operator%(int val) const
 393                         {
 394                                 return _get() % val;
 395                         }
 396
 397                 private:
 398                         //! Gets a uchar32_t from our current position.
 399                         uchar32_t _get() const
 400                         {
 401                                 const uchar16_t* a = ref->c_str();
 402                                 if (!UTF16_IS_SURROGATE(a[pos]))
 403                                         return static_cast<uchar32_t>(a[pos]);
 404                                 else
 405                                 {
 406                                         if (pos + 1 >= ref->size_raw())
 407                                                 return 0;
 408
 409                                         return unicode::toUTF32(a[pos], a[pos + 1]);
 410                                 }
 411                         }
 412
 413                         //! Sets a uchar32_t at our current position.
 414                         void _set(uchar32_t c)
 415                         {
 416                                 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
 417                                 const uchar16_t* a = ref2->c_str();
 418                                 if (c > 0xFFFF)
 419                                 {
 420                                         // c will be multibyte, so split it up into the high and low surrogate pairs.
 421                                         uchar16_t x = static_cast<uchar16_t>(c);
 422                                         uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
 423                                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
 424
 425                                         // If the previous position was a surrogate pair, just replace them.  Else, insert the low pair.
 426                                         if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
 427                                                 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
 428                                         else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
 429
 430                                         ref2->replace_raw(vh, static_cast<u32>(pos));
 431                                 }
 432                                 else
 433                                 {
 434                                         // c will be a single byte.
 435                                         uchar16_t vh = static_cast<uchar16_t>(c);
 436
 437                                         // If the previous position was a surrogate pair, remove the extra byte.
 438                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 439                                                 ref2->erase_raw(static_cast<u32>(pos) + 1);
 440
 441                                         ref2->replace_raw(vh, static_cast<u32>(pos));
 442                                 }
 443                         }
 444
 445                         const ustring16<TAlloc>* ref;
 446                         u32 pos;
 447         };
 448         typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
 449
 450
 451         //! Iterator to iterate through a UTF-16 string.
 452 #ifndef USTRING_NO_STL
 453         class _ustring16_const_iterator : public std::iterator<
 454                 std::bidirectional_iterator_tag,        // iterator_category
 455                 access,                                                         // value_type
 456                 ptrdiff_t,                                                      // difference_type
 457                 const access,                                           // pointer
 458                 const access                                            // reference
 459         >
 460 #else
 461         class _ustring16_const_iterator
 462 #endif
 463         {
 464                 public:
 465                         typedef _ustring16_const_iterator _Iter;
 466                         typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
 467                         typedef const access const_pointer;
 468                         typedef const access const_reference;
 469
 470 #ifndef USTRING_NO_STL
 471                         typedef typename _Base::value_type value_type;
 472                         typedef typename _Base::difference_type difference_type;
 473                         typedef typename _Base::difference_type distance_type;
 474                         typedef typename _Base::pointer pointer;
 475                         typedef const_reference reference;
 476 #else
 477                         typedef access value_type;
 478                         typedef u32 difference_type;
 479                         typedef u32 distance_type;
 480                         typedef const_pointer pointer;
 481                         typedef const_reference reference;
 482 #endif
 483
 484                         //! Constructors.
 485                         _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
 486                         _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
 487                         _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
 488                         {
 489                                 if (ref->size_raw() == 0 || p == 0)
 490                                         return;
 491
 492                                 // Go to the appropriate position.
 493                                 u32 i = p;
 494                                 u32 sr = ref->size_raw();
 495                                 const uchar16_t* a = ref->c_str();
 496                                 while (i != 0 && pos < sr)
 497                                 {
 498                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 499                                                 pos += 2;
 500                                         else ++pos;
 501                                         --i;
 502                                 }
 503                         }
 504
 505                         //! Test for equalness.
 506                         bool operator==(const _Iter& iter) const
 507                         {
 508                                 if (ref == iter.ref && pos == iter.pos)
 509                                         return true;
 510                                 return false;
 511                         }
 512
 513                         //! Test for unequalness.
 514                         bool operator!=(const _Iter& iter) const
 515                         {
 516                                 if (ref != iter.ref || pos != iter.pos)
 517                                         return true;
 518                                 return false;
 519                         }
 520
 521                         //! Switch to the next full character in the string.
 522                         _Iter& operator++()
 523                         {       // ++iterator
 524                                 if (pos == ref->size_raw()) return *this;
 525                                 const uchar16_t* a = ref->c_str();
 526                                 if (UTF16_IS_SURROGATE_HI(a[pos]))
 527                                         pos += 2;                       // TODO: check for valid low surrogate?
 528                                 else ++pos;
 529                                 if (pos > ref->size_raw()) pos = ref->size_raw();
 530                                 return *this;
 531                         }
 532
 533                         //! Switch to the next full character in the string, returning the previous position.
 534                         _Iter operator++(int)
 535                         {       // iterator++
 536                                 _Iter _tmp(*this);
 537                                 ++*this;
 538                                 return _tmp;
 539                         }
 540
 541                         //! Switch to the previous full character in the string.
 542                         _Iter& operator--()
 543                         {       // --iterator
 544                                 if (pos == 0) return *this;
 545                                 const uchar16_t* a = ref->c_str();
 546                                 --pos;
 547                                 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0)  // low surrogate, go back one more.
 548                                         --pos;
 549                                 return *this;
 550                         }
 551
 552                         //! Switch to the previous full character in the string, returning the previous position.
 553                         _Iter operator--(int)
 554                         {       // iterator--
 555                                 _Iter _tmp(*this);
 556                                 --*this;
 557                                 return _tmp;
 558                         }
 559
 560                         //! Advance a specified number of full characters in the string.
 561                         //! \return Myself.
 562                         _Iter& operator+=(const difference_type v)
 563                         {
 564                                 if (v == 0) return *this;
 565                                 if (v < 0) return operator-=(v * -1);
 566
 567                                 if (pos >= ref->size_raw())
 568                                         return *this;
 569
 570                                 // Go to the appropriate position.
 571                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
 572                                 u32 i = (u32)v;
 573                                 u32 sr = ref->size_raw();
 574                                 const uchar16_t* a = ref->c_str();
 575                                 while (i != 0 && pos < sr)
 576                                 {
 577                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 578                                                 pos += 2;
 579                                         else ++pos;
 580                                         --i;
 581                                 }
 582                                 if (pos > sr)
 583                                         pos = sr;
 584
 585                                 return *this;
 586                         }
 587
 588                         //! Go back a specified number of full characters in the string.
 589                         //! \return Myself.
 590                         _Iter& operator-=(const difference_type v)
 591                         {
 592                                 if (v == 0) return *this;
 593                                 if (v > 0) return operator+=(v * -1);
 594
 595                                 if (pos == 0)
 596                                         return *this;
 597
 598                                 // Go to the appropriate position.
 599                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
 600                                 u32 i = (u32)v;
 601                                 const uchar16_t* a = ref->c_str();
 602                                 while (i != 0 && pos != 0)
 603                                 {
 604                                         --pos;
 605                                         if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
 606                                                 --pos;
 607                                         --i;
 608                                 }
 609
 610                                 return *this;
 611                         }
 612
 613                         //! Return a new iterator that is a variable number of full characters forward from the current position.
 614                         _Iter operator+(const difference_type v) const
 615                         {
 616                                 _Iter ret(*this);
 617                                 ret += v;
 618                                 return ret;
 619                         }
 620
 621                         //! Return a new iterator that is a variable number of full characters backward from the current position.
 622                         _Iter operator-(const difference_type v) const
 623                         {
 624                                 _Iter ret(*this);
 625                                 ret -= v;
 626                                 return ret;
 627                         }
 628
 629                         //! Returns the distance between two iterators.
 630                         difference_type operator-(const _Iter& iter) const
 631                         {
 632                                 // Make sure we reference the same object!
 633                                 if (ref != iter.ref)
 634                                         return difference_type();
 635
 636                                 _Iter i = iter;
 637                                 difference_type ret;
 638
 639                                 // Walk up.
 640                                 if (pos > i.pos)
 641                                 {
 642                                         while (pos > i.pos)
 643                                         {
 644                                                 ++i;
 645                                                 ++ret;
 646                                         }
 647                                         return ret;
 648                                 }
 649
 650                                 // Walk down.
 651                                 while (pos < i.pos)
 652                                 {
 653                                         --i;
 654                                         --ret;
 655                                 }
 656                                 return ret;
 657                         }
 658
 659                         //! Accesses the full character at the iterator's position.
 660                         const_reference operator*() const
 661                         {
 662                                 if (pos >= ref->size_raw())
 663                                 {
 664                                         const uchar16_t* a = ref->c_str();
 665                                         u32 p = ref->size_raw();
 666                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 667                                                 --p;
 668                                         reference ret(ref, p);
 669                                         return ret;
 670                                 }
 671                                 const_reference ret(ref, pos);
 672                                 return ret;
 673                         }
 674
 675                         //! Accesses the full character at the iterator's position.
 676                         reference operator*()
 677                         {
 678                                 if (pos >= ref->size_raw())
 679                                 {
 680                                         const uchar16_t* a = ref->c_str();
 681                                         u32 p = ref->size_raw();
 682                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 683                                                 --p;
 684                                         reference ret(ref, p);
 685                                         return ret;
 686                                 }
 687                                 reference ret(ref, pos);
 688                                 return ret;
 689                         }
 690
 691                         //! Accesses the full character at the iterator's position.
 692                         const_pointer operator->() const
 693                         {
 694                                 return operator*();
 695                         }
 696
 697                         //! Accesses the full character at the iterator's position.
 698                         pointer operator->()
 699                         {
 700                                 return operator*();
 701                         }
 702
 703                         //! Is the iterator at the start of the string?
 704                         bool atStart() const
 705                         {
 706                                 return pos == 0;
 707                         }
 708
 709                         //! Is the iterator at the end of the string?
 710                         bool atEnd() const
 711                         {
 712                                 const uchar16_t* a = ref->c_str();
 713                                 if (UTF16_IS_SURROGATE(a[pos]))
 714                                         return (pos + 1) >= ref->size_raw();
 715                                 else return pos >= ref->size_raw();
 716                         }
 717
 718                         //! Moves the iterator to the start of the string.
 719                         void toStart()
 720                         {
 721                                 pos = 0;
 722                         }
 723
 724                         //! Moves the iterator to the end of the string.
 725                         void toEnd()
 726                         {
 727                                 pos = ref->size_raw();
 728                         }
 729
 730                         //! Returns the iterator's position.
 731                         //! \return The iterator's position.
 732                         u32 getPos() const
 733                         {
 734                                 return pos;
 735                         }
 736
 737                 protected:
 738                         const ustring16<TAlloc>* ref;
 739                         u32 pos;
 740         };
 741
 742         //! Iterator to iterate through a UTF-16 string.
 743         class _ustring16_iterator : public _ustring16_const_iterator
 744         {
 745                 public:
 746                         typedef _ustring16_iterator _Iter;
 747                         typedef _ustring16_const_iterator _Base;
 748                         typedef typename _Base::const_pointer const_pointer;
 749                         typedef typename _Base::const_reference const_reference;
 750
 751
 752                         typedef typename _Base::value_type value_type;
 753                         typedef typename _Base::difference_type difference_type;
 754                         typedef typename _Base::distance_type distance_type;
 755                         typedef access pointer;
 756                         typedef access reference;
 757
 758                         using _Base::pos;
 759                         using _Base::ref;
 760
 761                         //! Constructors.
 762                         _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
 763                         _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
 764                         _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
 765
 766                         //! Accesses the full character at the iterator's position.
 767                         reference operator*() const
 768                         {
 769                                 if (pos >= ref->size_raw())
 770                                 {
 771                                         const uchar16_t* a = ref->c_str();
 772                                         u32 p = ref->size_raw();
 773                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 774                                                 --p;
 775                                         reference ret(ref, p);
 776                                         return ret;
 777                                 }
 778                                 reference ret(ref, pos);
 779                                 return ret;
 780                         }
 781
 782                         //! Accesses the full character at the iterator's position.
 783                         reference operator*()
 784                         {
 785                                 if (pos >= ref->size_raw())
 786                                 {
 787                                         const uchar16_t* a = ref->c_str();
 788                                         u32 p = ref->size_raw();
 789                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 790                                                 --p;
 791                                         reference ret(ref, p);
 792                                         return ret;
 793                                 }
 794                                 reference ret(ref, pos);
 795                                 return ret;
 796                         }
 797
 798                         //! Accesses the full character at the iterator's position.
 799                         pointer operator->() const
 800                         {
 801                                 return operator*();
 802                         }
 803
 804                         //! Accesses the full character at the iterator's position.
 805                         pointer operator->()
 806                         {
 807                                 return operator*();
 808                         }
 809         };
 810
 811         typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
 812         typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
 813
 814         ///----------------------///
 815         /// end iterator classes ///
 816         ///----------------------///
 817
 818         //! Default constructor
 819         ustring16()
 820         : array(0), allocated(1), used(0)
 821         {
 822 #if __BYTE_ORDER == __BIG_ENDIAN
 823                 encoding = unicode::EUTFE_UTF16_BE;
 824 #else
 825                 encoding = unicode::EUTFE_UTF16_LE;
 826 #endif
 827                 array = allocator.allocate(1); // new u16[1];
 828                 array[0] = 0x0;
 829         }
 830
 831
 832         //! Constructor
 833         ustring16(const ustring16<TAlloc>& other)
 834         : array(0), allocated(0), used(0)
 835         {
 836 #if __BYTE_ORDER == __BIG_ENDIAN
 837                 encoding = unicode::EUTFE_UTF16_BE;
 838 #else
 839                 encoding = unicode::EUTFE_UTF16_LE;
 840 #endif
 841                 *this = other;
 842         }
 843
 844
 845         //! Constructor from other string types
 846         template <class B, class A>
 847         ustring16(const string<B, A>& other)
 848         : array(0), allocated(0), used(0)
 849         {
 850 #if __BYTE_ORDER == __BIG_ENDIAN
 851                 encoding = unicode::EUTFE_UTF16_BE;
 852 #else
 853                 encoding = unicode::EUTFE_UTF16_LE;
 854 #endif
 855                 *this = other;
 856         }
 857
 858
 859 #ifndef USTRING_NO_STL
 860         //! Constructor from std::string
 861         template <class B, class A, typename Alloc>
 862         ustring16(const std::basic_string<B, A, Alloc>& other)
 863         : array(0), allocated(0), used(0)
 864         {
 865 #if __BYTE_ORDER == __BIG_ENDIAN
 866                 encoding = unicode::EUTFE_UTF16_BE;
 867 #else
 868                 encoding = unicode::EUTFE_UTF16_LE;
 869 #endif
 870                 *this = other.c_str();
 871         }
 872
 873
 874         //! Constructor from iterator.
 875         template <typename Itr>
 876         ustring16(Itr first, Itr last)
 877         : array(0), allocated(0), used(0)
 878         {
 879 #if __BYTE_ORDER == __BIG_ENDIAN
 880                 encoding = unicode::EUTFE_UTF16_BE;
 881 #else
 882                 encoding = unicode::EUTFE_UTF16_LE;
 883 #endif
 884                 reserve(std::distance(first, last));
 885                 array[used] = 0;
 886
 887                 for (; first != last; ++first)
 888                         append((uchar32_t)*first);
 889         }
 890 #endif
 891
 892
 893 #ifndef USTRING_CPP0X_NEWLITERALS
 894         //! Constructor for copying a character string from a pointer.
 895         ustring16(const char* const c)
 896         : array(0), allocated(0), used(0)
 897         {
 898 #if __BYTE_ORDER == __BIG_ENDIAN
 899                 encoding = unicode::EUTFE_UTF16_BE;
 900 #else
 901                 encoding = unicode::EUTFE_UTF16_LE;
 902 #endif
 903
 904                 loadDataStream(c, strlen(c));
 905                 //append((uchar8_t*)c);
 906         }
 907
 908
 909         //! Constructor for copying a character string from a pointer with a given length.
 910         ustring16(const char* const c, u32 length)
 911         : array(0), allocated(0), used(0)
 912         {
 913 #if __BYTE_ORDER == __BIG_ENDIAN
 914                 encoding = unicode::EUTFE_UTF16_BE;
 915 #else
 916                 encoding = unicode::EUTFE_UTF16_LE;
 917 #endif
 918
 919                 loadDataStream(c, length);
 920         }
 921 #endif
 922
 923
 924         //! Constructor for copying a UTF-8 string from a pointer.
 925         ustring16(const uchar8_t* const c)
 926         : array(0), allocated(0), used(0)
 927         {
 928 #if __BYTE_ORDER == __BIG_ENDIAN
 929                 encoding = unicode::EUTFE_UTF16_BE;
 930 #else
 931                 encoding = unicode::EUTFE_UTF16_LE;
 932 #endif
 933
 934                 append(c);
 935         }
 936
 937
 938         //! Constructor for copying a UTF-8 string from a single char.
 939         ustring16(const char c)
 940         : array(0), allocated(0), used(0)
 941         {
 942 #if __BYTE_ORDER == __BIG_ENDIAN
 943                 encoding = unicode::EUTFE_UTF16_BE;
 944 #else
 945                 encoding = unicode::EUTFE_UTF16_LE;
 946 #endif
 947
 948                 append((uchar32_t)c);
 949         }
 950
 951
 952         //! Constructor for copying a UTF-8 string from a pointer with a given length.
 953         ustring16(const uchar8_t* const c, u32 length)
 954         : array(0), allocated(0), used(0)
 955         {
 956 #if __BYTE_ORDER == __BIG_ENDIAN
 957                 encoding = unicode::EUTFE_UTF16_BE;
 958 #else
 959                 encoding = unicode::EUTFE_UTF16_LE;
 960 #endif
 961
 962                 append(c, length);
 963         }
 964
 965
 966         //! Constructor for copying a UTF-16 string from a pointer.
 967         ustring16(const uchar16_t* const c)
 968         : array(0), allocated(0), used(0)
 969         {
 970 #if __BYTE_ORDER == __BIG_ENDIAN
 971                 encoding = unicode::EUTFE_UTF16_BE;
 972 #else
 973                 encoding = unicode::EUTFE_UTF16_LE;
 974 #endif
 975
 976                 append(c);
 977         }
 978
 979
 980         //! Constructor for copying a UTF-16 string from a pointer with a given length
 981         ustring16(const uchar16_t* const c, u32 length)
 982         : array(0), allocated(0), used(0)
 983         {
 984 #if __BYTE_ORDER == __BIG_ENDIAN
 985                 encoding = unicode::EUTFE_UTF16_BE;
 986 #else
 987                 encoding = unicode::EUTFE_UTF16_LE;
 988 #endif
 989
 990                 append(c, length);
 991         }
 992
 993
 994         //! Constructor for copying a UTF-32 string from a pointer.
 995         ustring16(const uchar32_t* const c)
 996         : array(0), allocated(0), used(0)
 997         {
 998 #if __BYTE_ORDER == __BIG_ENDIAN
 999                 encoding = unicode::EUTFE_UTF16_BE;
1000 #else
1001                 encoding = unicode::EUTFE_UTF16_LE;
1002 #endif
1003
1004                 append(c);
1005         }
1006
1007
1008         //! Constructor for copying a UTF-32 from a pointer with a given length.
1009         ustring16(const uchar32_t* const c, u32 length)
1010         : array(0), allocated(0), used(0)
1011         {
1012 #if __BYTE_ORDER == __BIG_ENDIAN
1013                 encoding = unicode::EUTFE_UTF16_BE;
1014 #else
1015                 encoding = unicode::EUTFE_UTF16_LE;
1016 #endif
1017
1018                 append(c, length);
1019         }
1020
1021
1022         //! Constructor for copying a wchar_t string from a pointer.
1023         ustring16(const wchar_t* const c)
1024         : array(0), allocated(0), used(0)
1025         {
1026 #if __BYTE_ORDER == __BIG_ENDIAN
1027                 encoding = unicode::EUTFE_UTF16_BE;
1028 #else
1029                 encoding = unicode::EUTFE_UTF16_LE;
1030 #endif
1031
1032                 if (sizeof(wchar_t) == 4)
1033                         append(reinterpret_cast<const uchar32_t* const>(c));
1034                 else if (sizeof(wchar_t) == 2)
1035                         append(reinterpret_cast<const uchar16_t* const>(c));
1036                 else if (sizeof(wchar_t) == 1)
1037                         append(reinterpret_cast<const uchar8_t* const>(c));
1038         }
1039
1040
1041         //! Constructor for copying a wchar_t string from a pointer with a given length.
1042         ustring16(const wchar_t* const c, u32 length)
1043         : array(0), allocated(0), used(0)
1044         {
1045 #if __BYTE_ORDER == __BIG_ENDIAN
1046                 encoding = unicode::EUTFE_UTF16_BE;
1047 #else
1048                 encoding = unicode::EUTFE_UTF16_LE;
1049 #endif
1050
1051                 if (sizeof(wchar_t) == 4)
1052                         append(reinterpret_cast<const uchar32_t* const>(c), length);
1053                 else if (sizeof(wchar_t) == 2)
1054                         append(reinterpret_cast<const uchar16_t* const>(c), length);
1055                 else if (sizeof(wchar_t) == 1)
1056                         append(reinterpret_cast<const uchar8_t* const>(c), length);
1057         }
1058
1059
1060 #ifdef USTRING_CPP0X
1061         //! Constructor for moving a ustring16
1062         ustring16(ustring16<TAlloc>&& other)
1063         : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1064         {
1065                 //std::cout << "MOVE constructor" << std::endl;
1066                 other.array = 0;
1067                 other.allocated = 0;
1068                 other.used = 0;
1069         }
1070 #endif
1071
1072
1073         //! Destructor
1074         ~ustring16()
1075         {
1076                 allocator.deallocate(array); // delete [] array;
1077         }
1078
1079
1080         //! Assignment operator
1081         ustring16& operator=(const ustring16<TAlloc>& other)
1082         {
1083                 if (this == &other)
1084                         return *this;
1085
1086                 used = other.size_raw();
1087                 if (used >= allocated)
1088                 {
1089                         allocator.deallocate(array); // delete [] array;
1090                         allocated = used + 1;
1091                         array = allocator.allocate(used + 1); //new u16[used];
1092                 }
1093
1094                 const uchar16_t* p = other.c_str();
1095                 for (u32 i=0; i<=used; ++i, ++p)
1096                         array[i] = *p;
1097
1098                 array[used] = 0;
1099
1100                 // Validate our new UTF-16 string.
1101                 validate();
1102
1103                 return *this;
1104         }
1105
1106
1107 #ifdef USTRING_CPP0X
1108         //! Move assignment operator
1109         ustring16& operator=(ustring16<TAlloc>&& other)
1110         {
1111                 if (this != &other)
1112                 {
1113                         //std::cout << "MOVE operator=" << std::endl;
1114                         allocator.deallocate(array);
1115
1116                         array = other.array;
1117                         allocated = other.allocated;
1118                         encoding = other.encoding;
1119                         used = other.used;
1120                         other.array = 0;
1121                         other.used = 0;
1122                 }
1123                 return *this;
1124         }
1125 #endif
1126
1127
1128         //! Assignment operator for other string types
1129         template <class B, class A>
1130         ustring16<TAlloc>& operator=(const string<B, A>& other)
1131         {
1132                 *this = other.c_str();
1133                 return *this;
1134         }
1135
1136
1137         //! Assignment operator for UTF-8 strings
1138         ustring16<TAlloc>& operator=(const uchar8_t* const c)
1139         {
1140                 if (!array)
1141                 {
1142                         array = allocator.allocate(1); //new u16[1];
1143                         allocated = 1;
1144                 }
1145                 used = 0;
1146                 array[used] = 0x0;
1147                 if (!c) return *this;
1148
1149                 //! Append our string now.
1150                 append(c);
1151                 return *this;
1152         }
1153
1154
1155         //! Assignment operator for UTF-16 strings
1156         ustring16<TAlloc>& operator=(const uchar16_t* const c)
1157         {
1158                 if (!array)
1159                 {
1160                         array = allocator.allocate(1); //new u16[1];
1161                         allocated = 1;
1162                 }
1163                 used = 0;
1164                 array[used] = 0x0;
1165                 if (!c) return *this;
1166
1167                 //! Append our string now.
1168                 append(c);
1169                 return *this;
1170         }
1171
1172
1173         //! Assignment operator for UTF-32 strings
1174         ustring16<TAlloc>& operator=(const uchar32_t* const c)
1175         {
1176                 if (!array)
1177                 {
1178                         array = allocator.allocate(1); //new u16[1];
1179                         allocated = 1;
1180                 }
1181                 used = 0;
1182                 array[used] = 0x0;
1183                 if (!c) return *this;
1184
1185                 //! Append our string now.
1186                 append(c);
1187                 return *this;
1188         }
1189
1190
1191         //! Assignment operator for wchar_t strings.
1192         /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1193                 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1194                 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1195         ustring16<TAlloc>& operator=(const wchar_t* const c)
1196         {
1197                 if (sizeof(wchar_t) == 4)
1198                         *this = reinterpret_cast<const uchar32_t* const>(c);
1199                 else if (sizeof(wchar_t) == 2)
1200                         *this = reinterpret_cast<const uchar16_t* const>(c);
1201                 else if (sizeof(wchar_t) == 1)
1202                         *this = reinterpret_cast<const uchar8_t* const>(c);
1203
1204                 return *this;
1205         }
1206
1207
1208         //! Assignment operator for other strings.
1209         /** Note that this assumes that a correct unicode string is stored in the string. **/
1210         template <class B>
1211         ustring16<TAlloc>& operator=(const B* const c)
1212         {
1213                 if (sizeof(B) == 4)
1214                         *this = reinterpret_cast<const uchar32_t* const>(c);
1215                 else if (sizeof(B) == 2)
1216                         *this = reinterpret_cast<const uchar16_t* const>(c);
1217                 else if (sizeof(B) == 1)
1218                         *this = reinterpret_cast<const uchar8_t* const>(c);
1219
1220                 return *this;
1221         }
1222
1223
1224         //! Direct access operator
1225         access operator [](const u32 index)
1226         {
1227                 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1228                 iterator iter(*this, index);
1229                 return iter.operator*();
1230         }
1231
1232
1233         //! Direct access operator
1234         const access operator [](const u32 index) const
1235         {
1236                 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1237                 const_iterator iter(*this, index);
1238                 return iter.operator*();
1239         }
1240
1241
1242         //! Equality operator
1243         bool operator ==(const uchar16_t* const str) const
1244         {
1245                 if (!str)
1246                         return false;
1247
1248                 u32 i;
1249                 for(i=0; array[i] && str[i]; ++i)
1250                         if (array[i] != str[i])
1251                                 return false;
1252
1253                 return !array[i] && !str[i];
1254         }
1255
1256
1257         //! Equality operator
1258         bool operator ==(const ustring16<TAlloc>& other) const
1259         {
1260                 for(u32 i=0; array[i] && other.array[i]; ++i)
1261                         if (array[i] != other.array[i])
1262                                 return false;
1263
1264                 return used == other.used;
1265         }
1266
1267
1268         //! Is smaller comparator
1269         bool operator <(const ustring16<TAlloc>& other) const
1270         {
1271                 for(u32 i=0; array[i] && other.array[i]; ++i)
1272                 {
1273                         s32 diff = array[i] - other.array[i];
1274                         if ( diff )
1275                                 return diff < 0;
1276                 }
1277
1278                 return used < other.used;
1279         }
1280
1281
1282         //! Inequality operator
1283         bool operator !=(const uchar16_t* const str) const
1284         {
1285                 return !(*this == str);
1286         }
1287
1288
1289         //! Inequality operator
1290         bool operator !=(const ustring16<TAlloc>& other) const
1291         {
1292                 return !(*this == other);
1293         }
1294
1295
1296         //! Returns the length of a ustring16 in full characters.
1297         //! \return Length of a ustring16 in full characters.
1298         u32 size() const
1299         {
1300                 const_iterator i(*this, 0);
1301                 u32 pos = 0;
1302                 while (!i.atEnd())
1303                 {
1304                         ++i;
1305                         ++pos;
1306                 }
1307                 return pos;
1308         }
1309
1310
1311         //! Informs if the ustring is empty or not.
1312         //! \return True if the ustring is empty, false if not.
1313         bool empty() const
1314         {
1315                 return (size_raw() == 0);
1316         }
1317
1318
1319         //! Returns a pointer to the raw UTF-16 string data.
1320         //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1321         const uchar16_t* c_str() const
1322         {
1323                 return array;
1324         }
1325
1326
1327         //! Compares the first n characters of this string with another.
1328         //! \param other Other string to compare to.
1329         //! \param n Number of characters to compare.
1330         //! \return True if the n first characters of both strings are equal.
1331         bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1332         {
1333                 u32 i;
1334                 const uchar16_t* oa = other.c_str();
1335                 for(i=0; array[i] && oa[i] && i < n; ++i)
1336                         if (array[i] != oa[i])
1337                                 return false;
1338
1339                 // if one (or both) of the strings was smaller then they
1340                 // are only equal if they have the same length
1341                 return (i == n) || (used == other.used);
1342         }
1343
1344
1345         //! Compares the first n characters of this string with another.
1346         //! \param str Other string to compare to.
1347         //! \param n Number of characters to compare.
1348         //! \return True if the n first characters of both strings are equal.
1349         bool equalsn(const uchar16_t* const str, u32 n) const
1350         {
1351                 if (!str)
1352                         return false;
1353                 u32 i;
1354                 for(i=0; array[i] && str[i] && i < n; ++i)
1355                         if (array[i] != str[i])
1356                                 return false;
1357
1358                 // if one (or both) of the strings was smaller then they
1359                 // are only equal if they have the same length
1360                 return (i == n) || (array[i] == 0 && str[i] == 0);
1361         }
1362
1363
1364         //! Appends a character to this ustring16
1365         //! \param character The character to append.
1366         //! \return A reference to our current string.
1367         ustring16<TAlloc>& append(uchar32_t character)
1368         {
1369                 if (used + 2 >= allocated)
1370                         reallocate(used + 2);
1371
1372                 if (character > 0xFFFF)
1373                 {
1374                         used += 2;
1375
1376                         // character will be multibyte, so split it up into a surrogate pair.
1377                         uchar16_t x = static_cast<uchar16_t>(character);
1378                         uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1379                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1380                         array[used-2] = vh;
1381                         array[used-1] = vl;
1382                 }
1383                 else
1384                 {
1385                         ++used;
1386                         array[used-1] = character;
1387                 }
1388                 array[used] = 0;
1389
1390                 return *this;
1391         }
1392
1393
1394         //! Appends a UTF-8 string to this ustring16
1395         //! \param other The UTF-8 string to append.
1396         //! \param length The length of the string to append.
1397         //! \return A reference to our current string.
1398         ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1399         {
1400                 if (!other)
1401                         return *this;
1402
1403                 // Determine if the string is long enough for a BOM.
1404                 u32 len = 0;
1405                 const uchar8_t* p = other;
1406                 do
1407                 {
1408                         ++len;
1409                 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1410
1411                 // Check for BOM.
1412                 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1413                 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1414                 {
1415                         if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1416                                 c_bom = unicode::EUTFE_UTF8;
1417                 }
1418
1419                 // If a BOM was found, don't include it in the string.
1420                 const uchar8_t* c2 = other;
1421                 if (c_bom != unicode::EUTFE_NONE)
1422                 {
1423                         c2 = other + unicode::BOM_UTF8_LEN;
1424                         length -= unicode::BOM_UTF8_LEN;
1425                 }
1426
1427                 // Calculate the size of the string to read in.
1428                 len = 0;
1429                 p = c2;
1430                 do
1431                 {
1432                         ++len;
1433                 } while(*p++ && len < length);
1434                 if (len > length)
1435                         len = length;
1436
1437                 // If we need to grow the array, do it now.
1438                 if (used + len >= allocated)
1439                         reallocate(used + (len * 2));
1440                 u32 start = used;
1441
1442                 // Convert UTF-8 to UTF-16.
1443                 u32 pos = start;
1444                 for (u32 l = 0; l<len;)
1445                 {
1446                         ++used;
1447                         if (((c2[l] >> 6) & 0x03) == 0x02)
1448                         {       // Invalid continuation byte.
1449                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1450                                 ++l;
1451                         }
1452                         else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1453                         {       // Invalid byte - overlong encoding.
1454                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1455                                 ++l;
1456                         }
1457                         else if ((c2[l] & 0xF8) == 0xF0)
1458                         {       // 4 bytes UTF-8, 2 bytes UTF-16.
1459                                 // Check for a full string.
1460                                 if ((l + 3) >= len)
1461                                 {
1462                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1463                                         l += 3;
1464                                         break;
1465                                 }
1466
1467                                 // Validate.
1468                                 bool valid = true;
1469                                 u8 l2 = 0;
1470                                 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1471                                 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1472                                 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1473                                 if (!valid)
1474                                 {
1475                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1476                                         l += l2;
1477                                         continue;
1478                                 }
1479
1480                                 // Decode.
1481                                 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1482                                 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1483                                 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1484                                 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1485
1486                                 // Split v up into a surrogate pair.
1487                                 uchar16_t x = static_cast<uchar16_t>(v);
1488                                 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1489                                 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1490
1491                                 array[pos++] = vh;
1492                                 array[pos++] = vl;
1493                                 l += 4;
1494                                 ++used;         // Using two shorts this time, so increase used by 1.
1495                         }
1496                         else if ((c2[l] & 0xF0) == 0xE0)
1497                         {       // 3 bytes UTF-8, 1 byte UTF-16.
1498                                 // Check for a full string.
1499                                 if ((l + 2) >= len)
1500                                 {
1501                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1502                                         l += 2;
1503                                         break;
1504                                 }
1505
1506                                 // Validate.
1507                                 bool valid = true;
1508                                 u8 l2 = 0;
1509                                 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1510                                 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1511                                 if (!valid)
1512                                 {
1513                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1514                                         l += l2;
1515                                         continue;
1516                                 }
1517
1518                                 // Decode.
1519                                 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1520                                 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1521                                 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1522                                 array[pos++] = ch;
1523                                 l += 3;
1524                         }
1525                         else if ((c2[l] & 0xE0) == 0xC0)
1526                         {       // 2 bytes UTF-8, 1 byte UTF-16.
1527                                 // Check for a full string.
1528                                 if ((l + 1) >= len)
1529                                 {
1530                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1531                                         l += 1;
1532                                         break;
1533                                 }
1534
1535                                 // Validate.
1536                                 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1537                                 {
1538                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1539                                         ++l;
1540                                         continue;
1541                                 }
1542
1543                                 // Decode.
1544                                 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1545                                 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1546                                 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1547                                 array[pos++] = ch;
1548                                 l += 2;
1549                         }
1550                         else
1551                         {       // 1 byte UTF-8, 1 byte UTF-16.
1552                                 // Validate.
1553                                 if (c2[l] > 0x7F)
1554                                 {       // Values above 0xF4 are restricted and aren't used.  By now, anything above 0x7F is invalid.
1555                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1556                                 }
1557                                 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1558                                 ++l;
1559                         }
1560                 }
1561                 array[used] = 0;
1562
1563                 // Validate our new UTF-16 string.
1564                 validate();
1565
1566                 return *this;
1567         }
1568
1569
1570         //! Appends a UTF-16 string to this ustring16
1571         //! \param other The UTF-16 string to append.
1572         //! \param length The length of the string to append.
1573         //! \return A reference to our current string.
1574         ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1575         {
1576                 if (!other)
1577                         return *this;
1578
1579                 // Determine if the string is long enough for a BOM.
1580                 u32 len = 0;
1581                 const uchar16_t* p = other;
1582                 do
1583                 {
1584                         ++len;
1585                 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1586
1587                 // Check for the BOM to determine the string's endianness.
1588                 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1589                 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1590                         c_end = unicode::EUTFEE_LITTLE;
1591                 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1592                         c_end = unicode::EUTFEE_BIG;
1593
1594                 // If a BOM was found, don't include it in the string.
1595                 const uchar16_t* c2 = other;
1596                 if (c_end != unicode::EUTFEE_NATIVE)
1597                 {
1598                         c2 = other + unicode::BOM_UTF16_LEN;
1599                         length -= unicode::BOM_UTF16_LEN;
1600                 }
1601
1602                 // Calculate the size of the string to read in.
1603                 len = 0;
1604                 p = c2;
1605                 do
1606                 {
1607                         ++len;
1608                 } while(*p++ && len < length);
1609                 if (len > length)
1610                         len = length;
1611
1612                 // If we need to grow the size of the array, do it now.
1613                 if (used + len >= allocated)
1614                         reallocate(used + (len * 2));
1615                 u32 start = used;
1616                 used += len;
1617
1618                 // Copy the string now.
1619                 unicode::EUTF_ENDIAN m_end = getEndianness();
1620                 for (u32 l = start; l < start + len; ++l)
1621                 {
1622                         array[l] = (uchar16_t)c2[l];
1623                         if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1624                                 array[l] = unicode::swapEndian16(array[l]);
1625                 }
1626
1627                 array[used] = 0;
1628
1629                 // Validate our new UTF-16 string.
1630                 validate();
1631                 return *this;
1632         }
1633
1634
1635         //! Appends a UTF-32 string to this ustring16
1636         //! \param other The UTF-32 string to append.
1637         //! \param length The length of the string to append.
1638         //! \return A reference to our current string.
1639         ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1640         {
1641                 if (!other)
1642                         return *this;
1643
1644                 // Check for the BOM to determine the string's endianness.
1645                 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1646                 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1647                         c_end = unicode::EUTFEE_LITTLE;
1648                 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1649                         c_end = unicode::EUTFEE_BIG;
1650
1651                 // If a BOM was found, don't include it in the string.
1652                 const uchar32_t* c2 = other;
1653                 if (c_end != unicode::EUTFEE_NATIVE)
1654                 {
1655                         c2 = other + unicode::BOM_UTF32_LEN;
1656                         length -= unicode::BOM_UTF32_LEN;
1657                 }
1658
1659                 // Calculate the size of the string to read in.
1660                 u32 len = 0;
1661                 const uchar32_t* p = c2;
1662                 do
1663                 {
1664                         ++len;
1665                 } while(*p++ && len < length);
1666                 if (len > length)
1667                         len = length;
1668
1669                 // If we need to grow the size of the array, do it now.
1670                 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1671                 if (used + (len * 2) >= allocated)
1672                         reallocate(used + ((len * 2) * 2));
1673                 u32 start = used;
1674
1675                 // Convert UTF-32 to UTF-16.
1676                 unicode::EUTF_ENDIAN m_end = getEndianness();
1677                 u32 pos = start;
1678                 for (u32 l = 0; l<len; ++l)
1679                 {
1680                         ++used;
1681
1682                         uchar32_t ch = c2[l];
1683                         if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1684                                 ch = unicode::swapEndian32(ch);
1685
1686                         if (ch > 0xFFFF)
1687                         {
1688                                 // Split ch up into a surrogate pair as it is over 16 bits long.
1689                                 uchar16_t x = static_cast<uchar16_t>(ch);
1690                                 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1691                                 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1692                                 array[pos++] = vh;
1693                                 array[pos++] = vl;
1694                                 ++used;         // Using two shorts, so increased used again.
1695                         }
1696                         else if (ch >= 0xD800 && ch <= 0xDFFF)
1697                         {
1698                                 // Between possible UTF-16 surrogates (invalid!)
1699                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1700                         }
1701                         else array[pos++] = static_cast<uchar16_t>(ch);
1702                 }
1703                 array[used] = 0;
1704
1705                 // Validate our new UTF-16 string.
1706                 validate();
1707
1708                 return *this;
1709         }
1710
1711
1712         //! Appends a ustring16 to this ustring16
1713         //! \param other The string to append to this one.
1714         //! \return A reference to our current string.
1715         ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1716         {
1717                 const uchar16_t* oa = other.c_str();
1718
1719                 u32 len = other.size_raw();
1720
1721                 if (used + len >= allocated)
1722                         reallocate(used + len);
1723
1724                 for (u32 l=0; l<len; ++l)
1725                         array[used+l] = oa[l];
1726
1727                 used += len;
1728                 array[used] = 0;
1729
1730                 return *this;
1731         }
1732
1733
1734         //! Appends a certain amount of characters of a ustring16 to this ustring16.
1735         //! \param other The string to append to this one.
1736         //! \param length How many characters of the other string to add to this one.
1737         //! \return A reference to our current string.
1738         ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1739         {
1740                 if (other.size() == 0)
1741                         return *this;
1742
1743                 if (other.size() < length)
1744                 {
1745                         append(other);
1746                         return *this;
1747                 }
1748
1749                 if (used + length * 2 >= allocated)
1750                         reallocate(used + length * 2);
1751
1752                 const_iterator iter(other, 0);
1753                 u32 l = length;
1754                 while (!iter.atEnd() && l)
1755                 {
1756                         uchar32_t c = *iter;
1757                         append(c);
1758                         ++iter;
1759                         --l;
1760                 }
1761
1762                 return *this;
1763         }
1764
1765
1766         //! Reserves some memory.
1767         //! \param count The amount of characters to reserve.
1768         void reserve(u32 count)
1769         {
1770                 if (count < allocated)
1771                         return;
1772
1773                 reallocate(count);
1774         }
1775
1776
1777         //! Finds first occurrence of character.
1778         //! \param c The character to search for.
1779         //! \return Position where the character has been found, or -1 if not found.
1780         s32 findFirst(uchar32_t c) const
1781         {
1782                 const_iterator i(*this, 0);
1783
1784                 s32 pos = 0;
1785                 while (!i.atEnd())
1786                 {
1787                         uchar32_t t = *i;
1788                         if (c == t)
1789                                 return pos;
1790                         ++pos;
1791                         ++i;
1792                 }
1793
1794                 return -1;
1795         }
1796
1797         //! Finds first occurrence of a character of a list.
1798         //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1799         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1800         //! \return Position where one of the characters has been found, or -1 if not found.
1801         s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1802         {
1803                 if (!c || !count)
1804                         return -1;
1805
1806                 const_iterator i(*this, 0);
1807
1808                 s32 pos = 0;
1809                 while (!i.atEnd())
1810                 {
1811                         uchar32_t t = *i;
1812                         for (u32 j=0; j<count; ++j)
1813                                 if (t == c[j])
1814                                         return pos;
1815                         ++pos;
1816                         ++i;
1817                 }
1818
1819                 return -1;
1820         }
1821
1822
1823         //! Finds first position of a character not in a given list.
1824         //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1825         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1826         //! \return Position where the character has been found, or -1 if not found.
1827         s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1828         {
1829                 if (!c || !count)
1830                         return -1;
1831
1832                 const_iterator i(*this, 0);
1833
1834                 s32 pos = 0;
1835                 while (!i.atEnd())
1836                 {
1837                         uchar32_t t = *i;
1838                         u32 j;
1839                         for (j=0; j<count; ++j)
1840                                 if (t == c[j])
1841                                         break;
1842
1843                         if (j==count)
1844                                 return pos;
1845                         ++pos;
1846                         ++i;
1847                 }
1848
1849                 return -1;
1850         }
1851
1852         //! Finds last position of a character not in a given list.
1853         //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1854         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1855         //! \return Position where the character has been found, or -1 if not found.
1856         s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1857         {
1858                 if (!c || !count)
1859                         return -1;
1860
1861                 const_iterator i(end());
1862                 --i;
1863
1864                 s32 pos = size() - 1;
1865                 while (!i.atStart())
1866                 {
1867                         uchar32_t t = *i;
1868                         u32 j;
1869                         for (j=0; j<count; ++j)
1870                                 if (t == c[j])
1871                                         break;
1872
1873                         if (j==count)
1874                                 return pos;
1875                         --pos;
1876                         --i;
1877                 }
1878
1879                 return -1;
1880         }
1881
1882         //! Finds next occurrence of character.
1883         //! \param c The character to search for.
1884         //! \param startPos The position in the string to start searching.
1885         //! \return Position where the character has been found, or -1 if not found.
1886         s32 findNext(uchar32_t c, u32 startPos) const
1887         {
1888                 const_iterator i(*this, startPos);
1889
1890                 s32 pos = startPos;
1891                 while (!i.atEnd())
1892                 {
1893                         uchar32_t t = *i;
1894                         if (t == c)
1895                                 return pos;
1896                         ++pos;
1897                         ++i;
1898                 }
1899
1900                 return -1;
1901         }
1902
1903
1904         //! Finds last occurrence of character.
1905         //! \param c The character to search for.
1906         //! \param start The start position of the reverse search ( default = -1, on end ).
1907         //! \return Position where the character has been found, or -1 if not found.
1908         s32 findLast(uchar32_t c, s32 start = -1) const
1909         {
1910                 u32 s = size();
1911                 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1912
1913                 const_iterator i(*this, start);
1914                 u32 pos = start;
1915                 while (!i.atStart())
1916                 {
1917                         uchar32_t t = *i;
1918                         if (t == c)
1919                                 return pos;
1920                         --pos;
1921                         --i;
1922                 }
1923
1924                 return -1;
1925         }
1926
1927         //! Finds last occurrence of a character in a list.
1928         //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1929         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1930         //! \return Position where one of the characters has been found, or -1 if not found.
1931         s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1932         {
1933                 if (!c || !count)
1934                         return -1;
1935
1936                 const_iterator i(end());
1937                 --i;
1938
1939                 s32 pos = size();
1940                 while (!i.atStart())
1941                 {
1942                         uchar32_t t = *i;
1943                         for (u32 j=0; j<count; ++j)
1944                                 if (t == c[j])
1945                                         return pos;
1946                         --pos;
1947                         --i;
1948                 }
1949
1950                 return -1;
1951         }
1952
1953
1954         //! Finds another ustring16 in this ustring16.
1955         //! \param str The string to find.
1956         //! \param start The start position of the search.
1957         //! \return Positions where the ustring16 has been found, or -1 if not found.
1958         s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1959         {
1960                 u32 my_size = size();
1961                 u32 their_size = str.size();
1962
1963                 if (their_size == 0 || my_size - start < their_size)
1964                         return -1;
1965
1966                 const_iterator i(*this, start);
1967
1968                 s32 pos = start;
1969                 while (!i.atEnd())
1970                 {
1971                         const_iterator i2(i);
1972                         const_iterator j(str, 0);
1973                         uchar32_t t1 = (uchar32_t)*i2;
1974                         uchar32_t t2 = (uchar32_t)*j;
1975                         while (t1 == t2)
1976                         {
1977                                 ++i2;
1978                                 ++j;
1979                                 if (j.atEnd())
1980                                         return pos;
1981                                 t1 = (uchar32_t)*i2;
1982                                 t2 = (uchar32_t)*j;
1983                         }
1984                         ++i;
1985                         ++pos;
1986                 }
1987
1988                 return -1;
1989         }
1990
1991
1992         //! Finds another ustring16 in this ustring16.
1993         //! \param str The string to find.
1994         //! \param start The start position of the search.
1995         //! \return Positions where the string has been found, or -1 if not found.
1996         s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1997         {
1998                 const uchar16_t* data = str.c_str();
1999                 if (data && *data)
2000                 {
2001                         u32 len = 0;
2002
2003                         while (data[len])
2004                                 ++len;
2005
2006                         if (len > used)
2007                                 return -1;
2008
2009                         for (u32 i=start; i<=used-len; ++i)
2010                         {
2011                                 u32 j=0;
2012
2013                                 while(data[j] && array[i+j] == data[j])
2014                                         ++j;
2015
2016                                 if (!data[j])
2017                                         return i;
2018                         }
2019                 }
2020
2021                 return -1;
2022         }
2023
2024
2025         //! Returns a substring.
2026         //! \param begin: Start of substring.
2027         //! \param length: Length of substring.
2028         //! \return A reference to our current string.
2029         ustring16<TAlloc> subString(u32 begin, s32 length) const
2030         {
2031                 u32 len = size();
2032                 // if start after ustring16
2033                 // or no proper substring length
2034                 if ((length <= 0) || (begin>=len))
2035                         return ustring16<TAlloc>("");
2036                 // clamp length to maximal value
2037                 if ((length+begin) > len)
2038                         length = len-begin;
2039
2040                 ustring16<TAlloc> o;
2041                 o.reserve((length+1) * 2);
2042
2043                 const_iterator i(*this, begin);
2044                 while (!i.atEnd() && length)
2045                 {
2046                         o.append(*i);
2047                         ++i;
2048                         --length;
2049                 }
2050
2051                 return o;
2052         }
2053
2054
2055         //! Appends a character to this ustring16.
2056         //! \param c Character to append.
2057         //! \return A reference to our current string.
2058         ustring16<TAlloc>& operator += (char c)
2059         {
2060                 append((uchar32_t)c);
2061                 return *this;
2062         }
2063
2064
2065         //! Appends a character to this ustring16.
2066         //! \param c Character to append.
2067         //! \return A reference to our current string.
2068         ustring16<TAlloc>& operator += (uchar32_t c)
2069         {
2070                 append(c);
2071                 return *this;
2072         }
2073
2074
2075         //! Appends a number to this ustring16.
2076         //! \param c Number to append.
2077         //! \return A reference to our current string.
2078         ustring16<TAlloc>& operator += (short c)
2079         {
2080                 append(core::stringc(c));
2081                 return *this;
2082         }
2083
2084
2085         //! Appends a number to this ustring16.
2086         //! \param c Number to append.
2087         //! \return A reference to our current string.
2088         ustring16<TAlloc>& operator += (unsigned short c)
2089         {
2090                 append(core::stringc(c));
2091                 return *this;
2092         }
2093
2094
2095 #ifdef USTRING_CPP0X_NEWLITERALS
2096         //! Appends a number to this ustring16.
2097         //! \param c Number to append.
2098         //! \return A reference to our current string.
2099         ustring16<TAlloc>& operator += (int c)
2100         {
2101                 append(core::stringc(c));
2102                 return *this;
2103         }
2104
2105
2106         //! Appends a number to this ustring16.
2107         //! \param c Number to append.
2108         //! \return A reference to our current string.
2109         ustring16<TAlloc>& operator += (unsigned int c)
2110         {
2111                 append(core::stringc(c));
2112                 return *this;
2113         }
2114 #endif
2115
2116
2117         //! Appends a number to this ustring16.
2118         //! \param c Number to append.
2119         //! \return A reference to our current string.
2120         ustring16<TAlloc>& operator += (long c)
2121         {
2122                 append(core::stringc(c));
2123                 return *this;
2124         }
2125
2126
2127         //! Appends a number to this ustring16.
2128         //! \param c Number to append.
2129         //! \return A reference to our current string.
2130         ustring16<TAlloc>& operator += (unsigned long c)
2131         {
2132                 append(core::stringc(c));
2133                 return *this;
2134         }
2135
2136
2137         //! Appends a number to this ustring16.
2138         //! \param c Number to append.
2139         //! \return A reference to our current string.
2140         ustring16<TAlloc>& operator += (double c)
2141         {
2142                 append(core::stringc(c));
2143                 return *this;
2144         }
2145
2146
2147         //! Appends a char ustring16 to this ustring16.
2148         //! \param c Char ustring16 to append.
2149         //! \return A reference to our current string.
2150         ustring16<TAlloc>& operator += (const uchar16_t* const c)
2151         {
2152                 append(c);
2153                 return *this;
2154         }
2155
2156
2157         //! Appends a ustring16 to this ustring16.
2158         //! \param other ustring16 to append.
2159         //! \return A reference to our current string.
2160         ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2161         {
2162                 append(other);
2163                 return *this;
2164         }
2165
2166
2167         //! Replaces all characters of a given type with another one.
2168         //! \param toReplace Character to replace.
2169         //! \param replaceWith Character replacing the old one.
2170         //! \return A reference to our current string.
2171         ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2172         {
2173                 iterator i(*this, 0);
2174                 while (!i.atEnd())
2175                 {
2176                         typename ustring16<TAlloc>::access a = *i;
2177                         if ((uchar32_t)a == toReplace)
2178                                 a = replaceWith;
2179                         ++i;
2180                 }
2181                 return *this;
2182         }
2183
2184
2185         //! Replaces all instances of a string with another one.
2186         //! \param toReplace The string to replace.
2187         //! \param replaceWith The string replacing the old one.
2188         //! \return A reference to our current string.
2189         ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2190         {
2191                 if (toReplace.size() == 0)
2192                         return *this;
2193
2194                 const uchar16_t* other = toReplace.c_str();
2195                 const uchar16_t* replace = replaceWith.c_str();
2196                 const u32 other_size = toReplace.size_raw();
2197                 const u32 replace_size = replaceWith.size_raw();
2198
2199                 // Determine the delta.  The algorithm will change depending on the delta.
2200                 s32 delta = replace_size - other_size;
2201
2202                 // A character for character replace.  The string will not shrink or grow.
2203                 if (delta == 0)
2204                 {
2205                         s32 pos = 0;
2206                         while ((pos = find_raw(other, pos)) != -1)
2207                         {
2208                                 for (u32 i = 0; i < replace_size; ++i)
2209                                         array[pos + i] = replace[i];
2210                                 ++pos;
2211                         }
2212                         return *this;
2213                 }
2214
2215                 // We are going to be removing some characters.  The string will shrink.
2216                 if (delta < 0)
2217                 {
2218                         u32 i = 0;
2219                         for (u32 pos = 0; pos <= used; ++i, ++pos)
2220                         {
2221                                 // Is this potentially a match?
2222                                 if (array[pos] == *other)
2223                                 {
2224                                         // Check to see if we have a match.
2225                                         u32 j;
2226                                         for (j = 0; j < other_size; ++j)
2227                                         {
2228                                                 if (array[pos + j] != other[j])
2229                                                         break;
2230                                         }
2231
2232                                         // If we have a match, replace characters.
2233                                         if (j == other_size)
2234                                         {
2235                                                 for (j = 0; j < replace_size; ++j)
2236                                                         array[i + j] = replace[j];
2237                                                 i += replace_size - 1;
2238                                                 pos += other_size - 1;
2239                                                 continue;
2240                                         }
2241                                 }
2242
2243                                 // No match found, just copy characters.
2244                                 array[i - 1] = array[pos];
2245                         }
2246                         array[i] = 0;
2247                         used = i;
2248
2249                         return *this;
2250                 }
2251
2252                 // We are going to be adding characters, so the string size will increase.
2253                 // Count the number of times toReplace exists in the string so we can allocate the new size.
2254                 u32 find_count = 0;
2255                 s32 pos = 0;
2256                 while ((pos = find_raw(other, pos)) != -1)
2257                 {
2258                         ++find_count;
2259                         ++pos;
2260                 }
2261
2262                 // Re-allocate the string now, if needed.
2263                 u32 len = delta * find_count;
2264                 if (used + len >= allocated)
2265                         reallocate(used + len);
2266
2267                 // Start replacing.
2268                 pos = 0;
2269                 while ((pos = find_raw(other, pos)) != -1)
2270                 {
2271                         uchar16_t* start = array + pos + other_size - 1;
2272                         uchar16_t* ptr   = array + used;
2273                         uchar16_t* end   = array + used + delta;
2274
2275                         // Shift characters to make room for the string.
2276                         while (ptr != start)
2277                         {
2278                                 *end = *ptr;
2279                                 --ptr;
2280                                 --end;
2281                         }
2282
2283                         // Add the new string now.
2284                         for (u32 i = 0; i < replace_size; ++i)
2285                                 array[pos + i] = replace[i];
2286
2287                         pos += replace_size;
2288                         used += delta;
2289                 }
2290
2291                 // Terminate the string and return ourself.
2292                 array[used] = 0;
2293                 return *this;
2294         }
2295
2296
2297         //! Removes characters from a ustring16..
2298         //! \param c The character to remove.
2299         //! \return A reference to our current string.
2300         ustring16<TAlloc>& remove(uchar32_t c)
2301         {
2302                 u32 pos = 0;
2303                 u32 found = 0;
2304                 u32 len = (c > 0xFFFF ? 2 : 1);         // Remove characters equal to the size of c as a UTF-16 character.
2305                 for (u32 i=0; i<=used; ++i)
2306                 {
2307                         uchar32_t uc32 = 0;
2308                         if (!UTF16_IS_SURROGATE_HI(array[i]))
2309                                 uc32 |= array[i];
2310                         else if (i + 1 <= used)
2311                         {
2312                                 // Convert the surrogate pair into a single UTF-32 character.
2313                                 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2314                         }
2315                         u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2316
2317                         if (uc32 == c)
2318                         {
2319                                 found += len;
2320                                 continue;
2321                         }
2322
2323                         array[pos++] = array[i];
2324                         if (len2 == 2)
2325                                 array[pos++] = array[++i];
2326                 }
2327                 used -= found;
2328                 array[used] = 0;
2329                 return *this;
2330         }
2331
2332
2333         //! Removes a ustring16 from the ustring16.
2334         //! \param toRemove The string to remove.
2335         //! \return A reference to our current string.
2336         ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2337         {
2338                 u32 size = toRemove.size_raw();
2339                 if (size == 0) return *this;
2340
2341                 const uchar16_t* tra = toRemove.c_str();
2342                 u32 pos = 0;
2343                 u32 found = 0;
2344                 for (u32 i=0; i<=used; ++i)
2345                 {
2346                         u32 j = 0;
2347                         while (j < size)
2348                         {
2349                                 if (array[i + j] != tra[j])
2350                                         break;
2351                                 ++j;
2352                         }
2353                         if (j == size)
2354                         {
2355                                 found += size;
2356                                 i += size - 1;
2357                                 continue;
2358                         }
2359
2360                         array[pos++] = array[i];
2361                 }
2362                 used -= found;
2363                 array[used] = 0;
2364                 return *this;
2365         }
2366
2367
2368         //! Removes characters from the ustring16.
2369         //! \param characters The characters to remove.
2370         //! \return A reference to our current string.
2371         ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2372         {
2373                 if (characters.size_raw() == 0)
2374                         return *this;
2375
2376                 u32 pos = 0;
2377                 u32 found = 0;
2378                 const_iterator iter(characters);
2379                 for (u32 i=0; i<=used; ++i)
2380                 {
2381                         uchar32_t uc32 = 0;
2382                         if (!UTF16_IS_SURROGATE_HI(array[i]))
2383                                 uc32 |= array[i];
2384                         else if (i + 1 <= used)
2385                         {
2386                                 // Convert the surrogate pair into a single UTF-32 character.
2387                                 uc32 = unicode::toUTF32(array[i], array[i+1]);
2388                         }
2389                         u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2390
2391                         bool cont = false;
2392                         iter.toStart();
2393                         while (!iter.atEnd())
2394                         {
2395                                 uchar32_t c = *iter;
2396                                 if (uc32 == c)
2397                                 {
2398                                         found += (c > 0xFFFF ? 2 : 1);          // Remove characters equal to the size of c as a UTF-16 character.
2399                                         ++i;
2400                                         cont = true;
2401                                         break;
2402                                 }
2403                                 ++iter;
2404                         }
2405                         if (cont) continue;
2406
2407                         array[pos++] = array[i];
2408                         if (len2 == 2)
2409                                 array[pos++] = array[++i];
2410                 }
2411                 used -= found;
2412                 array[used] = 0;
2413                 return *this;
2414         }
2415
2416
2417         //! Trims the ustring16.
2418         //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2419         //! \param whitespace The characters that are to be considered as whitespace.
2420         //! \return A reference to our current string.
2421         ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2422         {
2423                 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2424
2425                 // find start and end of the substring without the specified characters
2426                 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2427                 if (begin == -1)
2428                         return (*this="");
2429
2430                 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2431
2432                 return (*this = subString(begin, (end +1) - begin));
2433         }
2434
2435
2436         //! Erases a character from the ustring16.
2437         //! May be slow, because all elements following after the erased element have to be copied.
2438         //! \param index Index of element to be erased.
2439         //! \return A reference to our current string.
2440         ustring16<TAlloc>& erase(u32 index)
2441         {
2442                 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2443
2444                 iterator i(*this, index);
2445
2446                 uchar32_t t = *i;
2447                 u32 len = (t > 0xFFFF ? 2 : 1);
2448
2449                 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2450                         array[j - len] = array[j];
2451
2452                 used -= len;
2453                 array[used] = 0;
2454
2455                 return *this;
2456         }
2457
2458
2459         //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2460         //! \return A reference to our current string.
2461         ustring16<TAlloc>& validate()
2462         {
2463                 // Validate all unicode characters.
2464                 for (u32 i=0; i<allocated; ++i)
2465                 {
2466                         // Terminate on existing null.
2467                         if (array[i] == 0)
2468                         {
2469                                 used = i;
2470                                 return *this;
2471                         }
2472                         if (UTF16_IS_SURROGATE(array[i]))
2473                         {
2474                                 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2475                                         array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2476                                 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2477                                         array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2478                                 ++i;
2479                         }
2480                         if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2481                                 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2482                 }
2483
2484                 // terminate
2485                 used = 0;
2486                 if (allocated > 0)
2487                 {
2488                         used = allocated - 1;
2489                         array[used] = 0;
2490                 }
2491                 return *this;
2492         }
2493
2494
2495         //! Gets the last char of the ustring16, or 0.
2496         //! \return The last char of the ustring16, or 0.
2497         uchar32_t lastChar() const
2498         {
2499                 if (used < 1)
2500                         return 0;
2501
2502                 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2503                 {
2504                         // Make sure we have a paired surrogate.
2505                         if (used < 2)
2506                                 return 0;
2507
2508                         // Check for an invalid surrogate.
2509                         if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2510                                 return 0;
2511
2512                         // Convert the surrogate pair into a single UTF-32 character.
2513                         return unicode::toUTF32(array[used-2], array[used-1]);
2514                 }
2515                 else
2516                 {
2517                         return array[used-1];
2518                 }
2519         }
2520
2521
2522         //! Split the ustring16 into parts.
2523         /** This method will split a ustring16 at certain delimiter characters
2524         into the container passed in as reference. The type of the container
2525         has to be given as template parameter. It must provide a push_back and
2526         a size method.
2527         \param ret The result container
2528         \param c C-style ustring16 of delimiter characters
2529         \param count Number of delimiter characters
2530         \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2531         container. If two delimiters occur without a character in between, an
2532         empty substring would be placed in the result. If this flag is set,
2533         only non-empty strings are stored.
2534         \param keepSeparators Flag which allows to add the separator to the
2535         result ustring16. If this flag is true, the concatenation of the
2536         substrings results in the original ustring16. Otherwise, only the
2537         characters between the delimiters are returned.
2538         \return The number of resulting substrings
2539         */
2540         template<class container>
2541         u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2542         {
2543                 if (!c)
2544                         return 0;
2545
2546                 const_iterator i(*this);
2547                 const u32 oldSize=ret.size();
2548                 u32 pos = 0;
2549                 u32 lastpos = 0;
2550                 u32 lastpospos = 0;
2551                 bool lastWasSeparator = false;
2552                 while (!i.atEnd())
2553                 {
2554                         uchar32_t ch = *i;
2555                         bool foundSeparator = false;
2556                         for (u32 j=0; j<count; ++j)
2557                         {
2558                                 if (ch == c[j])
2559                                 {
2560                                         if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2561                                                         !lastWasSeparator)
2562                                         ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2563                                         foundSeparator = true;
2564                                         lastpos = (keepSeparators ? pos : pos + 1);
2565                                         lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2566                                         break;
2567                                 }
2568                         }
2569                         lastWasSeparator = foundSeparator;
2570                         ++pos;
2571                         ++i;
2572                 }
2573                 u32 s = size() + 1;
2574                 if (s > lastpos)
2575                         ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2576                 return ret.size()-oldSize;
2577         }
2578
2579
2580         //! Split the ustring16 into parts.
2581         /** This method will split a ustring16 at certain delimiter characters
2582         into the container passed in as reference. The type of the container
2583         has to be given as template parameter. It must provide a push_back and
2584         a size method.
2585         \param ret The result container
2586         \param c A unicode string of delimiter characters
2587         \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2588         container. If two delimiters occur without a character in between, an
2589         empty substring would be placed in the result. If this flag is set,
2590         only non-empty strings are stored.
2591         \param keepSeparators Flag which allows to add the separator to the
2592         result ustring16. If this flag is true, the concatenation of the
2593         substrings results in the original ustring16. Otherwise, only the
2594         characters between the delimiters are returned.
2595         \return The number of resulting substrings
2596         */
2597         template<class container>
2598         u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2599         {
2600                 core::array<uchar32_t> v = c.toUTF32();
2601                 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2602         }
2603
2604
2605         //! Gets the size of the allocated memory buffer for the string.
2606         //! \return The size of the allocated memory buffer.
2607         u32 capacity() const
2608         {
2609                 return allocated;
2610         }
2611
2612
2613         //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2614         //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2615         u32 size_raw() const
2616         {
2617                 return used;
2618         }
2619
2620
2621         //! Inserts a character into the string.
2622         //! \param c The character to insert.
2623         //! \param pos The position to insert the character.
2624         //! \return A reference to our current string.
2625         ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2626         {
2627                 u8 len = (c > 0xFFFF ? 2 : 1);
2628
2629                 if (used + len >= allocated)
2630                         reallocate(used + len);
2631
2632                 used += len;
2633
2634                 iterator iter(*this, pos);
2635                 for (u32 i = used - 2; i > iter.getPos(); --i)
2636                         array[i] = array[i - len];
2637
2638                 if (c > 0xFFFF)
2639                 {
2640                         // c will be multibyte, so split it up into a surrogate pair.
2641                         uchar16_t x = static_cast<uchar16_t>(c);
2642                         uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2643                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2644                         array[iter.getPos()] = vh;
2645                         array[iter.getPos()+1] = vl;
2646                 }
2647                 else
2648                 {
2649                         array[iter.getPos()] = static_cast<uchar16_t>(c);
2650                 }
2651                 array[used] = 0;
2652                 return *this;
2653         }
2654
2655
2656         //! Inserts a string into the string.
2657         //! \param c The string to insert.
2658         //! \param pos The position to insert the string.
2659         //! \return A reference to our current string.
2660         ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2661         {
2662                 u32 len = c.size_raw();
2663                 if (len == 0) return *this;
2664
2665                 if (used + len >= allocated)
2666                         reallocate(used + len);
2667
2668                 used += len;
2669
2670                 iterator iter(*this, pos);
2671                 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2672                         array[i] = array[i - len];
2673
2674                 const uchar16_t* s = c.c_str();
2675                 for (u32 i = 0; i < len; ++i)
2676                 {
2677                         array[pos++] = *s;
2678                         ++s;
2679                 }
2680
2681                 array[used] = 0;
2682                 return *this;
2683         }
2684
2685
2686         //! Inserts a character into the string.
2687         //! \param c The character to insert.
2688         //! \param pos The position to insert the character.
2689         //! \return A reference to our current string.
2690         ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2691         {
2692                 if (used + 1 >= allocated)
2693                         reallocate(used + 1);
2694
2695                 ++used;
2696
2697                 for (u32 i = used - 1; i > pos; --i)
2698                         array[i] = array[i - 1];
2699
2700                 array[pos] = c;
2701                 array[used] = 0;
2702                 return *this;
2703         }
2704
2705
2706         //! Removes a character from string.
2707         //! \param pos Position of the character to remove.
2708         //! \return A reference to our current string.
2709         ustring16<TAlloc>& erase_raw(u32 pos)
2710         {
2711                 for (u32 i=pos; i<=used; ++i)
2712                 {
2713                         array[i] = array[i + 1];
2714                 }
2715                 --used;
2716                 array[used] = 0;
2717                 return *this;
2718         }
2719
2720
2721         //! Replaces a character in the string.
2722         //! \param c The new character.
2723         //! \param pos The position of the character to replace.
2724         //! \return A reference to our current string.
2725         ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2726         {
2727                 array[pos] = c;
2728                 return *this;
2729         }
2730
2731
2732         //! Returns an iterator to the beginning of the string.
2733         //! \return An iterator to the beginning of the string.
2734         iterator begin()
2735         {
2736                 iterator i(*this, 0);
2737                 return i;
2738         }
2739
2740
2741         //! Returns an iterator to the beginning of the string.
2742         //! \return An iterator to the beginning of the string.
2743         const_iterator begin() const
2744         {
2745                 const_iterator i(*this, 0);
2746                 return i;
2747         }
2748
2749
2750         //! Returns an iterator to the beginning of the string.
2751         //! \return An iterator to the beginning of the string.
2752         const_iterator cbegin() const
2753         {
2754                 const_iterator i(*this, 0);
2755                 return i;
2756         }
2757
2758
2759         //! Returns an iterator to the end of the string.
2760         //! \return An iterator to the end of the string.
2761         iterator end()
2762         {
2763                 iterator i(*this, 0);
2764                 i.toEnd();
2765                 return i;
2766         }
2767
2768
2769         //! Returns an iterator to the end of the string.
2770         //! \return An iterator to the end of the string.
2771         const_iterator end() const
2772         {
2773                 const_iterator i(*this, 0);
2774                 i.toEnd();
2775                 return i;
2776         }
2777
2778
2779         //! Returns an iterator to the end of the string.
2780         //! \return An iterator to the end of the string.
2781         const_iterator cend() const
2782         {
2783                 const_iterator i(*this, 0);
2784                 i.toEnd();
2785                 return i;
2786         }
2787
2788
2789         //! Converts the string to a UTF-8 encoded string.
2790         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2791         //! \return A string containing the UTF-8 encoded string.
2792         core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2793         {
2794                 core::string<uchar8_t> ret;
2795                 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2796                 const_iterator iter(*this, 0);
2797
2798                 // Add the byte order mark if the user wants it.
2799                 if (addBOM)
2800                 {
2801                         ret.append(unicode::BOM_ENCODE_UTF8[0]);
2802                         ret.append(unicode::BOM_ENCODE_UTF8[1]);
2803                         ret.append(unicode::BOM_ENCODE_UTF8[2]);
2804                 }
2805
2806                 while (!iter.atEnd())
2807                 {
2808                         uchar32_t c = *iter;
2809                         if (c > 0xFFFF)
2810                         {       // 4 bytes
2811                                 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2812                                 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2813                                 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2814                                 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2815                                 ret.append(b1);
2816                                 ret.append(b2);
2817                                 ret.append(b3);
2818                                 ret.append(b4);
2819                         }
2820                         else if (c > 0x7FF)
2821                         {       // 3 bytes
2822                                 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2823                                 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2824                                 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2825                                 ret.append(b1);
2826                                 ret.append(b2);
2827                                 ret.append(b3);
2828                         }
2829                         else if (c > 0x7F)
2830                         {       // 2 bytes
2831                                 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2832                                 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2833                                 ret.append(b1);
2834                                 ret.append(b2);
2835                         }
2836                         else
2837                         {       // 1 byte
2838                                 ret.append(static_cast<uchar8_t>(c));
2839                         }
2840                         ++iter;
2841                 }
2842                 return ret;
2843         }
2844
2845
2846         //! Converts the string to a UTF-8 encoded string array.
2847         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2848         //! \return An array containing the UTF-8 encoded string.
2849         core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2850         {
2851                 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2852                 const_iterator iter(*this, 0);
2853
2854                 // Add the byte order mark if the user wants it.
2855                 if (addBOM)
2856                 {
2857                         ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2858                         ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2859                         ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2860                 }
2861
2862                 while (!iter.atEnd())
2863                 {
2864                         uchar32_t c = *iter;
2865                         if (c > 0xFFFF)
2866                         {       // 4 bytes
2867                                 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2868                                 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2869                                 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2870                                 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2871                                 ret.push_back(b1);
2872                                 ret.push_back(b2);
2873                                 ret.push_back(b3);
2874                                 ret.push_back(b4);
2875                         }
2876                         else if (c > 0x7FF)
2877                         {       // 3 bytes
2878                                 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2879                                 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2880                                 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2881                                 ret.push_back(b1);
2882                                 ret.push_back(b2);
2883                                 ret.push_back(b3);
2884                         }
2885                         else if (c > 0x7F)
2886                         {       // 2 bytes
2887                                 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2888                                 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2889                                 ret.push_back(b1);
2890                                 ret.push_back(b2);
2891                         }
2892                         else
2893                         {       // 1 byte
2894                                 ret.push_back(static_cast<uchar8_t>(c));
2895                         }
2896                         ++iter;
2897                 }
2898                 ret.push_back(0);
2899                 return ret;
2900         }
2901
2902
2903 #ifdef USTRING_CPP0X_NEWLITERALS        // C++0x
2904         //! Converts the string to a UTF-16 encoded string.
2905         //! \param endian The desired endianness of the string.
2906         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2907         //! \return A string containing the UTF-16 encoded string.
2908         core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2909         {
2910                 core::string<char16_t> ret;
2911                 ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2912
2913                 // Add the BOM if specified.
2914                 if (addBOM)
2915                 {
2916                         if (endian == unicode::EUTFEE_NATIVE)
2917                                 ret[0] = unicode::BOM;
2918                         else if (endian == unicode::EUTFEE_LITTLE)
2919                         {
2920                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2921                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2922                                 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2923                         }
2924                         else
2925                         {
2926                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2927                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2928                                 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2929                         }
2930                 }
2931
2932                 ret.append(array);
2933                 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2934                 {
2935                         char16_t* ptr = ret.c_str();
2936                         for (u32 i = 0; i < ret.size(); ++i)
2937                                 *ptr++ = unicode::swapEndian16(*ptr);
2938                 }
2939                 return ret;
2940         }
2941 #endif
2942
2943
2944         //! Converts the string to a UTF-16 encoded string array.
2945         //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2946         //! \param endian The desired endianness of the string.
2947         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2948         //! \return An array containing the UTF-16 encoded string.
2949         core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2950         {
2951                 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2952                 uchar16_t* ptr = ret.pointer();
2953
2954                 // Add the BOM if specified.
2955                 if (addBOM)
2956                 {
2957                         if (endian == unicode::EUTFEE_NATIVE)
2958                                 *ptr = unicode::BOM;
2959                         else if (endian == unicode::EUTFEE_LITTLE)
2960                         {
2961                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2962                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2963                                 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2964                         }
2965                         else
2966                         {
2967                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2968                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2969                                 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2970                         }
2971                         ++ptr;
2972                 }
2973
2974                 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2975                 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2976                 {
2977                         for (u32 i = 0; i <= used; ++i)
2978                                 ptr[i] = unicode::swapEndian16(ptr[i]);
2979                 }
2980                 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2981                 ret.push_back(0);
2982                 return ret;
2983         }
2984
2985
2986 #ifdef USTRING_CPP0X_NEWLITERALS        // C++0x
2987         //! Converts the string to a UTF-32 encoded string.
2988         //! \param endian The desired endianness of the string.
2989         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2990         //! \return A string containing the UTF-32 encoded string.
2991         core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2992         {
2993                 core::string<char32_t> ret;
2994                 ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2995                 const_iterator iter(*this, 0);
2996
2997                 // Add the BOM if specified.
2998                 if (addBOM)
2999                 {
3000                         if (endian == unicode::EUTFEE_NATIVE)
3001                                 ret.append(unicode::BOM);
3002                         else
3003                         {
3004                                 union
3005                                 {
3006                                         uchar32_t full;
3007                                         u8 chunk[4];
3008                                 } t;
3009
3010                                 if (endian == unicode::EUTFEE_LITTLE)
3011                                 {
3012                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3013                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3014                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3015                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3016                                 }
3017                                 else
3018                                 {
3019                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3020                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3021                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3022                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3023                                 }
3024                                 ret.append(t.full);
3025                         }
3026                 }
3027
3028                 while (!iter.atEnd())
3029                 {
3030                         uchar32_t c = *iter;
3031                         if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3032                                 c = unicode::swapEndian32(c);
3033                         ret.append(c);
3034                         ++iter;
3035                 }
3036                 return ret;
3037         }
3038 #endif
3039
3040
3041         //! Converts the string to a UTF-32 encoded string array.
3042         //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
3043         //! \param endian The desired endianness of the string.
3044         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3045         //! \return An array containing the UTF-32 encoded string.
3046         core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3047         {
3048                 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3049                 const_iterator iter(*this, 0);
3050
3051                 // Add the BOM if specified.
3052                 if (addBOM)
3053                 {
3054                         if (endian == unicode::EUTFEE_NATIVE)
3055                                 ret.push_back(unicode::BOM);
3056                         else
3057                         {
3058                                 union
3059                                 {
3060                                         uchar32_t full;
3061                                         u8 chunk[4];
3062                                 } t;
3063
3064                                 if (endian == unicode::EUTFEE_LITTLE)
3065                                 {
3066                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3067                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3068                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3069                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3070                                 }
3071                                 else
3072                                 {
3073                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3074                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3075                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3076                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3077                                 }
3078                                 ret.push_back(t.full);
3079                         }
3080                 }
3081                 ret.push_back(0);
3082
3083                 while (!iter.atEnd())
3084                 {
3085                         uchar32_t c = *iter;
3086                         if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3087                                 c = unicode::swapEndian32(c);
3088                         ret.push_back(c);
3089                         ++iter;
3090                 }
3091                 return ret;
3092         }
3093
3094
3095         //! Converts the string to a wchar_t encoded string.
3096         /** The size of a wchar_t changes depending on the platform.  This function will store a
3097         correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3098         //! \param endian The desired endianness of the string.
3099         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3100         //! \return A string containing the wchar_t encoded string.
3101         core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3102         {
3103                 if (sizeof(wchar_t) == 4)
3104                 {
3105                         core::array<uchar32_t> a(toUTF32(endian, addBOM));
3106                         core::stringw ret(a.pointer());
3107                         return ret;
3108                 }
3109                 else if (sizeof(wchar_t) == 2)
3110                 {
3111                         if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3112                         {
3113                                 core::stringw ret(array);
3114                                 return ret;
3115                         }
3116                         else
3117                         {
3118                                 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3119                                 core::stringw ret(a.pointer());
3120                                 return ret;
3121                         }
3122                 }
3123                 else if (sizeof(wchar_t) == 1)
3124                 {
3125                         core::array<uchar8_t> a(toUTF8(addBOM));
3126                         core::stringw ret(a.pointer());
3127                         return ret;
3128                 }
3129
3130                 // Shouldn't happen.
3131                 return core::stringw();
3132         }
3133
3134
3135         //! Converts the string to a wchar_t encoded string array.
3136         /** The size of a wchar_t changes depending on the platform.  This function will store a
3137         correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3138         //! \param endian The desired endianness of the string.
3139         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3140         //! \return An array containing the wchar_t encoded string.
3141         core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3142         {
3143                 if (sizeof(wchar_t) == 4)
3144                 {
3145                         core::array<uchar32_t> a(toUTF32(endian, addBOM));
3146                         core::array<wchar_t> ret(a.size());
3147                         ret.set_used(a.size());
3148                         memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3149                         return ret;
3150                 }
3151                 if (sizeof(wchar_t) == 2)
3152                 {
3153                         if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3154                         {
3155                                 core::array<wchar_t> ret(used);
3156                                 ret.set_used(used);
3157                                 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3158                                 return ret;
3159                         }
3160                         else
3161                         {
3162                                 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3163                                 core::array<wchar_t> ret(a.size());
3164                                 ret.set_used(a.size());
3165                                 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3166                                 return ret;
3167                         }
3168                 }
3169                 if (sizeof(wchar_t) == 1)
3170                 {
3171                         core::array<uchar8_t> a(toUTF8(addBOM));
3172                         core::array<wchar_t> ret(a.size());
3173                         ret.set_used(a.size());
3174                         memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3175                         return ret;
3176                 }
3177
3178                 // Shouldn't happen.
3179                 return core::array<wchar_t>();
3180         }
3181
3182         //! Converts the string to a properly encoded io::path string.
3183         //! \param endian The desired endianness of the string.
3184         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3185         //! \return An io::path string containing the properly encoded string.
3186         io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3187         {
3188 #if defined(_IRR_WCHAR_FILESYSTEM)
3189                 return toWCHAR_s(endian, addBOM);
3190 #else
3191                 return toUTF8_s(addBOM);
3192 #endif
3193         }
3194
3195         //! Loads an unknown stream of data.
3196         //! Will attempt to determine if the stream is unicode data.  Useful for loading from files.
3197         //! \param data The data stream to load from.
3198         //! \param data_size The length of the data string.
3199         //! \return A reference to our current string.
3200         ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3201         {
3202                 // Clear our string.
3203                 *this = "";
3204                 if (!data)
3205                         return *this;
3206
3207                 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3208                 switch (e)
3209                 {
3210                         default:
3211                         case unicode::EUTFE_UTF8:
3212                                 append((uchar8_t*)data, data_size);
3213                                 break;
3214
3215                         case unicode::EUTFE_UTF16:
3216                         case unicode::EUTFE_UTF16_BE:
3217                         case unicode::EUTFE_UTF16_LE:
3218                                 append((uchar16_t*)data, data_size / 2);
3219                                 break;
3220
3221                         case unicode::EUTFE_UTF32:
3222                         case unicode::EUTFE_UTF32_BE:
3223                         case unicode::EUTFE_UTF32_LE:
3224                                 append((uchar32_t*)data, data_size / 4);
3225                                 break;
3226                 }
3227
3228                 return *this;
3229         }
3230
3231         //! Gets the encoding of the Unicode string this class contains.
3232         //! \return An enum describing the current encoding of this string.
3233         const unicode::EUTF_ENCODE getEncoding() const
3234         {
3235                 return encoding;
3236         }
3237
3238         //! Gets the endianness of the Unicode string this class contains.
3239         //! \return An enum describing the endianness of this string.
3240         const unicode::EUTF_ENDIAN getEndianness() const
3241         {
3242                 if (encoding == unicode::EUTFE_UTF16_LE ||
3243                         encoding == unicode::EUTFE_UTF32_LE)
3244                         return unicode::EUTFEE_LITTLE;
3245                 else return unicode::EUTFEE_BIG;
3246         }
3247
3248 private:
3249
3250         //! Reallocate the string, making it bigger or smaller.
3251         //! \param new_size The new size of the string.
3252         void reallocate(u32 new_size)
3253         {
3254                 uchar16_t* old_array = array;
3255
3256                 array = allocator.allocate(new_size + 1); //new u16[new_size];
3257                 allocated = new_size + 1;
3258                 if (old_array == 0) return;
3259
3260                 u32 amount = used < new_size ? used : new_size;
3261                 for (u32 i=0; i<=amount; ++i)
3262                         array[i] = old_array[i];
3263
3264                 if (allocated <= used)
3265                         used = allocated - 1;
3266
3267                 array[used] = 0;
3268
3269                 allocator.deallocate(old_array); // delete [] old_array;
3270         }
3271
3272         //--- member variables
3273
3274         uchar16_t* array;
3275         unicode::EUTF_ENCODE encoding;
3276         u32 allocated;
3277         u32 used;
3278         TAlloc allocator;
3279         //irrAllocator<uchar16_t> allocator;
3280 };
3281
3282 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3283
3284
3285 //! Appends two ustring16s.
3286 template <typename TAlloc>
3287 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3288 {
3289         ustring16<TAlloc> ret(left);
3290         ret += right;
3291         return ret;
3292 }
3293
3294
3295 //! Appends a ustring16 and a null-terminated unicode string.
3296 template <typename TAlloc, class B>
3297 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3298 {
3299         ustring16<TAlloc> ret(left);
3300         ret += right;
3301         return ret;
3302 }
3303
3304
3305 //! Appends a ustring16 and a null-terminated unicode string.
3306 template <class B, typename TAlloc>
3307 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3308 {
3309         ustring16<TAlloc> ret(left);
3310         ret += right;
3311         return ret;
3312 }
3313
3314
3315 //! Appends a ustring16 and an Irrlicht string.
3316 template <typename TAlloc, typename B, typename BAlloc>
3317 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3318 {
3319         ustring16<TAlloc> ret(left);
3320         ret += right;
3321         return ret;
3322 }
3323
3324
3325 //! Appends a ustring16 and an Irrlicht string.
3326 template <typename TAlloc, typename B, typename BAlloc>
3327 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3328 {
3329         ustring16<TAlloc> ret(left);
3330         ret += right;
3331         return ret;
3332 }
3333
3334
3335 //! Appends a ustring16 and a std::basic_string.
3336 template <typename TAlloc, typename B, typename A, typename BAlloc>
3337 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3338 {
3339         ustring16<TAlloc> ret(left);
3340         ret += right;
3341         return ret;
3342 }
3343
3344
3345 //! Appends a ustring16 and a std::basic_string.
3346 template <typename TAlloc, typename B, typename A, typename BAlloc>
3347 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3348 {
3349         ustring16<TAlloc> ret(left);
3350         ret += right;
3351         return ret;
3352 }
3353
3354
3355 //! Appends a ustring16 and a char.
3356 template <typename TAlloc>
3357 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3358 {
3359         ustring16<TAlloc> ret(left);
3360         ret += right;
3361         return ret;
3362 }
3363
3364
3365 //! Appends a ustring16 and a char.
3366 template <typename TAlloc>
3367 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3368 {
3369         ustring16<TAlloc> ret(left);
3370         ret += right;
3371         return ret;
3372 }
3373
3374
3375 #ifdef USTRING_CPP0X_NEWLITERALS
3376 //! Appends a ustring16 and a uchar32_t.
3377 template <typename TAlloc>
3378 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3379 {
3380         ustring16<TAlloc> ret(left);
3381         ret += right;
3382         return ret;
3383 }
3384
3385
3386 //! Appends a ustring16 and a uchar32_t.
3387 template <typename TAlloc>
3388 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3389 {
3390         ustring16<TAlloc> ret(left);
3391         ret += right;
3392         return ret;
3393 }
3394 #endif
3395
3396
3397 //! Appends a ustring16 and a short.
3398 template <typename TAlloc>
3399 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3400 {
3401         ustring16<TAlloc> ret(left);
3402         ret += core::stringc(right);
3403         return ret;
3404 }
3405
3406
3407 //! Appends a ustring16 and a short.
3408 template <typename TAlloc>
3409 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3410 {
3411         ustring16<TAlloc> ret((core::stringc(left)));
3412         ret += right;
3413         return ret;
3414 }
3415
3416
3417 //! Appends a ustring16 and an unsigned short.
3418 template <typename TAlloc>
3419 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3420 {
3421         ustring16<TAlloc> ret(left);
3422         ret += core::stringc(right);
3423         return ret;
3424 }
3425
3426
3427 //! Appends a ustring16 and an unsigned short.
3428 template <typename TAlloc>
3429 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3430 {
3431         ustring16<TAlloc> ret((core::stringc(left)));
3432         ret += right;
3433         return ret;
3434 }
3435
3436
3437 //! Appends a ustring16 and an int.
3438 template <typename TAlloc>
3439 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3440 {
3441         ustring16<TAlloc> ret(left);
3442         ret += core::stringc(right);
3443         return ret;
3444 }
3445
3446
3447 //! Appends a ustring16 and an int.
3448 template <typename TAlloc>
3449 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3450 {
3451         ustring16<TAlloc> ret((core::stringc(left)));
3452         ret += right;
3453         return ret;
3454 }
3455
3456
3457 //! Appends a ustring16 and an unsigned int.
3458 template <typename TAlloc>
3459 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3460 {
3461         ustring16<TAlloc> ret(left);
3462         ret += core::stringc(right);
3463         return ret;
3464 }
3465
3466
3467 //! Appends a ustring16 and an unsigned int.
3468 template <typename TAlloc>
3469 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3470 {
3471         ustring16<TAlloc> ret((core::stringc(left)));
3472         ret += right;
3473         return ret;
3474 }
3475
3476
3477 //! Appends a ustring16 and a long.
3478 template <typename TAlloc>
3479 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3480 {
3481         ustring16<TAlloc> ret(left);
3482         ret += core::stringc(right);
3483         return ret;
3484 }
3485
3486
3487 //! Appends a ustring16 and a long.
3488 template <typename TAlloc>
3489 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3490 {
3491         ustring16<TAlloc> ret((core::stringc(left)));
3492         ret += right;
3493         return ret;
3494 }
3495
3496
3497 //! Appends a ustring16 and an unsigned long.
3498 template <typename TAlloc>
3499 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3500 {
3501         ustring16<TAlloc> ret(left);
3502         ret += core::stringc(right);
3503         return ret;
3504 }
3505
3506
3507 //! Appends a ustring16 and an unsigned long.
3508 template <typename TAlloc>
3509 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3510 {
3511         ustring16<TAlloc> ret((core::stringc(left)));
3512         ret += right;
3513         return ret;
3514 }
3515
3516
3517 //! Appends a ustring16 and a float.
3518 template <typename TAlloc>
3519 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3520 {
3521         ustring16<TAlloc> ret(left);
3522         ret += core::stringc(right);
3523         return ret;
3524 }
3525
3526
3527 //! Appends a ustring16 and a float.
3528 template <typename TAlloc>
3529 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3530 {
3531         ustring16<TAlloc> ret((core::stringc(left)));
3532         ret += right;
3533         return ret;
3534 }
3535
3536
3537 //! Appends a ustring16 and a double.
3538 template <typename TAlloc>
3539 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3540 {
3541         ustring16<TAlloc> ret(left);
3542         ret += core::stringc(right);
3543         return ret;
3544 }
3545
3546
3547 //! Appends a ustring16 and a double.
3548 template <typename TAlloc>
3549 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3550 {
3551         ustring16<TAlloc> ret((core::stringc(left)));
3552         ret += right;
3553         return ret;
3554 }
3555
3556
3557 #ifdef USTRING_CPP0X
3558 //! Appends two ustring16s.
3559 template <typename TAlloc>
3560 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3561 {
3562         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3563         right.insert(left, 0);
3564         return std::move(right);
3565 }
3566
3567
3568 //! Appends two ustring16s.
3569 template <typename TAlloc>
3570 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3571 {
3572         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3573         left.append(right);
3574         return std::move(left);
3575 }
3576
3577
3578 //! Appends two ustring16s.
3579 template <typename TAlloc>
3580 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3581 {
3582         //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3583         if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3584                 (right.capacity() - right.size_raw() < left.size_raw()))
3585         {
3586                 left.append(right);
3587                 return std::move(left);
3588         }
3589         else
3590         {
3591                 right.insert(left, 0);
3592                 return std::move(right);
3593         }
3594 }
3595
3596
3597 //! Appends a ustring16 and a null-terminated unicode string.
3598 template <typename TAlloc, class B>
3599 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3600 {
3601         //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3602         left.append(right);
3603         return std::move(left);
3604 }
3605
3606
3607 //! Appends a ustring16 and a null-terminated unicode string.
3608 template <class B, typename TAlloc>
3609 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3610 {
3611         //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3612         right.insert(left, 0);
3613         return std::move(right);
3614 }
3615
3616
3617 //! Appends a ustring16 and an Irrlicht string.
3618 template <typename TAlloc, typename B, typename BAlloc>
3619 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3620 {
3621         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3622         right.insert(left, 0);
3623         return std::move(right);
3624 }
3625
3626
3627 //! Appends a ustring16 and an Irrlicht string.
3628 template <typename TAlloc, typename B, typename BAlloc>
3629 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3630 {
3631         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3632         left.append(right);
3633         return std::move(left);
3634 }
3635
3636
3637 //! Appends a ustring16 and a std::basic_string.
3638 template <typename TAlloc, typename B, typename A, typename BAlloc>
3639 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3640 {
3641         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3642         right.insert(core::ustring16<TAlloc>(left), 0);
3643         return std::move(right);
3644 }
3645
3646
3647 //! Appends a ustring16 and a std::basic_string.
3648 template <typename TAlloc, typename B, typename A, typename BAlloc>
3649 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3650 {
3651         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3652         left.append(right);
3653         return std::move(left);
3654 }
3655
3656
3657 //! Appends a ustring16 and a char.
3658 template <typename TAlloc>
3659 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3660 {
3661         left.append((uchar32_t)right);
3662         return std::move(left);
3663 }
3664
3665
3666 //! Appends a ustring16 and a char.
3667 template <typename TAlloc>
3668 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3669 {
3670         right.insert((uchar32_t)left, 0);
3671         return std::move(right);
3672 }
3673
3674
3675 #ifdef USTRING_CPP0X_NEWLITERALS
3676 //! Appends a ustring16 and a uchar32_t.
3677 template <typename TAlloc>
3678 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3679 {
3680         left.append(right);
3681         return std::move(left);
3682 }
3683
3684
3685 //! Appends a ustring16 and a uchar32_t.
3686 template <typename TAlloc>
3687 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3688 {
3689         right.insert(left, 0);
3690         return std::move(right);
3691 }
3692 #endif
3693
3694
3695 //! Appends a ustring16 and a short.
3696 template <typename TAlloc>
3697 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3698 {
3699         left.append(core::stringc(right));
3700         return std::move(left);
3701 }
3702
3703
3704 //! Appends a ustring16 and a short.
3705 template <typename TAlloc>
3706 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3707 {
3708         right.insert(core::stringc(left), 0);
3709         return std::move(right);
3710 }
3711
3712
3713 //! Appends a ustring16 and an unsigned short.
3714 template <typename TAlloc>
3715 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3716 {
3717         left.append(core::stringc(right));
3718         return std::move(left);
3719 }
3720
3721
3722 //! Appends a ustring16 and an unsigned short.
3723 template <typename TAlloc>
3724 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3725 {
3726         right.insert(core::stringc(left), 0);
3727         return std::move(right);
3728 }
3729
3730
3731 //! Appends a ustring16 and an int.
3732 template <typename TAlloc>
3733 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3734 {
3735         left.append(core::stringc(right));
3736         return std::move(left);
3737 }
3738
3739
3740 //! Appends a ustring16 and an int.
3741 template <typename TAlloc>
3742 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3743 {
3744         right.insert(core::stringc(left), 0);
3745         return std::move(right);
3746 }
3747
3748
3749 //! Appends a ustring16 and an unsigned int.
3750 template <typename TAlloc>
3751 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3752 {
3753         left.append(core::stringc(right));
3754         return std::move(left);
3755 }
3756
3757
3758 //! Appends a ustring16 and an unsigned int.
3759 template <typename TAlloc>
3760 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3761 {
3762         right.insert(core::stringc(left), 0);
3763         return std::move(right);
3764 }
3765
3766
3767 //! Appends a ustring16 and a long.
3768 template <typename TAlloc>
3769 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3770 {
3771         left.append(core::stringc(right));
3772         return std::move(left);
3773 }
3774
3775
3776 //! Appends a ustring16 and a long.
3777 template <typename TAlloc>
3778 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3779 {
3780         right.insert(core::stringc(left), 0);
3781         return std::move(right);
3782 }
3783
3784
3785 //! Appends a ustring16 and an unsigned long.
3786 template <typename TAlloc>
3787 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3788 {
3789         left.append(core::stringc(right));
3790         return std::move(left);
3791 }
3792
3793
3794 //! Appends a ustring16 and an unsigned long.
3795 template <typename TAlloc>
3796 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3797 {
3798         right.insert(core::stringc(left), 0);
3799         return std::move(right);
3800 }
3801
3802
3803 //! Appends a ustring16 and a float.
3804 template <typename TAlloc>
3805 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3806 {
3807         left.append(core::stringc(right));
3808         return std::move(left);
3809 }
3810
3811
3812 //! Appends a ustring16 and a float.
3813 template <typename TAlloc>
3814 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3815 {
3816         right.insert(core::stringc(left), 0);
3817         return std::move(right);
3818 }
3819
3820
3821 //! Appends a ustring16 and a double.
3822 template <typename TAlloc>
3823 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3824 {
3825         left.append(core::stringc(right));
3826         return std::move(left);
3827 }
3828
3829
3830 //! Appends a ustring16 and a double.
3831 template <typename TAlloc>
3832 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3833 {
3834         right.insert(core::stringc(left), 0);
3835         return std::move(right);
3836 }
3837 #endif
3838
3839
3840 #ifndef USTRING_NO_STL
3841 //! Writes a ustring16 to an ostream.
3842 template <typename TAlloc>
3843 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3844 {
3845         out << in.toUTF8_s().c_str();
3846         return out;
3847 }
3848
3849 //! Writes a ustring16 to a wostream.
3850 template <typename TAlloc>
3851 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3852 {
3853         out << in.toWCHAR_s().c_str();
3854         return out;
3855 }
3856 #endif
3857
3858
3859 #ifndef USTRING_NO_STL
3860
3861 namespace unicode
3862 {
3863
3864 //! Hashing algorithm for hashing a ustring.  Used for things like unordered_maps.
3865 //! Algorithm taken from std::hash<std::string>.
3866 class hash : public std::unary_function<core::ustring, size_t>
3867 {
3868         public:
3869                 size_t operator()(const core::ustring& s) const
3870                 {
3871                         size_t ret = 2166136261U;
3872                         size_t index = 0;
3873                         size_t stride = 1 + s.size_raw() / 10;
3874
3875                         core::ustring::const_iterator i = s.begin();
3876                         while (i != s.end())
3877                         {
3878                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
3879                                 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3880                                 index += stride;
3881                                 i += stride;
3882                         }
3883                         return (ret);
3884                 }
3885 };
3886
3887 } // end namespace unicode
3888
3889 #endif
3890
3891 } // end namespace core
3892 } // end namespace irr
3893
3894 #endif