src/cguittfont/irrUString.h

   1 /*
   2    Basic Unicode string class for Irrlicht.
   3    Copyright (c) 2009-2011 John Norman
   4
   5    This software is provided 'as-is', without any express or implied
   6    warranty. In no event will the authors be held liable for any
   7    damages arising from the use of this software.
   8
   9    Permission is granted to anyone to use this software for any
  10    purpose, including commercial applications, and to alter it and
  11    redistribute it freely, subject to the following restrictions:
  12
  13    1. The origin of this software must not be misrepresented; you
  14       must not claim that you wrote the original software. If you use
  15       this software in a product, an acknowledgment in the product
  16       documentation would be appreciated but is not required.
  17
  18    2. Altered source versions must be plainly marked as such, and
  19       must not be misrepresented as being the original software.
  20
  21    3. This notice may not be removed or altered from any source
  22       distribution.
  23
  24    The original version of this class can be located at:
  25    http://irrlicht.suckerfreegames.com/
  26
  27    John Norman
  28    john@suckerfreegames.com
  29 */
  30
  31 #ifndef __IRR_USTRING_H_INCLUDED__
  32 #define __IRR_USTRING_H_INCLUDED__
  33
  34 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
  35 #       define USTRING_CPP0X
  36 #       if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
  37 #               define USTRING_CPP0X_NEWLITERALS
  38 #       endif
  39 #endif
  40
  41 #include <stdio.h>
  42 #include <string.h>
  43 #include <stdlib.h>
  44
  45 #ifdef USTRING_CPP0X
  46 #       include <utility>
  47 #endif
  48
  49 #ifndef USTRING_NO_STL
  50 #       include <string>
  51 #       include <iterator>
  52 #       include <ostream>
  53 #endif
  54
  55 #include "irrTypes.h"
  56 #include "irrAllocator.h"
  57 #include "irrArray.h"
  58 #include "irrMath.h"
  59 #include "irrString.h"
  60 #include "path.h"
  61
  62 //! UTF-16 surrogate start values.
  63 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
  64 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
  65
  66 //! Is a UTF-16 code point a surrogate?
  67 #define UTF16_IS_SURROGATE(c)           (((c) & 0xF800) == 0xD800)
  68 #define UTF16_IS_SURROGATE_HI(c)        (((c) & 0xFC00) == 0xD800)
  69 #define UTF16_IS_SURROGATE_LO(c)        (((c) & 0xFC00) == 0xDC00)
  70
  71
  72 namespace irr
  73 {
  74
  75         // Define our character types.
  76 #ifdef USTRING_CPP0X_NEWLITERALS        // C++0x
  77         typedef char32_t uchar32_t;
  78         typedef char16_t uchar16_t;
  79         typedef char uchar8_t;
  80 #else
  81         typedef u32 uchar32_t;
  82         typedef u16 uchar16_t;
  83         typedef u8 uchar8_t;
  84 #endif
  85
  86 namespace core
  87 {
  88
  89 namespace unicode
  90 {
  91
  92 //! The unicode replacement character.  Used to replace invalid characters.
  93 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
  94
  95 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
  96 //! \param high The high value of the pair.
  97 //! \param low The low value of the pair.
  98 //! \return The UTF-32 character expressed by the surrogate pair.
  99 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
 100 {
 101         // Convert the surrogate pair into a single UTF-32 character.
 102         uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
 103         uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
 104         return (wu << 16) | x;
 105 }
 106
 107 //! Swaps the endianness of a 16-bit value.
 108 //! \return The new value.
 109 inline uchar16_t swapEndian16(const uchar16_t& c)
 110 {
 111         return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
 112 }
 113
 114 //! Swaps the endianness of a 32-bit value.
 115 //! \return The new value.
 116 inline uchar32_t swapEndian32(const uchar32_t& c)
 117 {
 118         return  ((c >> 24) & 0x000000FF) |
 119                         ((c >> 8)  & 0x0000FF00) |
 120                         ((c << 8)  & 0x00FF0000) |
 121                         ((c << 24) & 0xFF000000);
 122 }
 123
 124 //! The Unicode byte order mark.
 125 const u16 BOM = 0xFEFF;
 126
 127 //! The size of the Unicode byte order mark in terms of the Unicode character size.
 128 const u8 BOM_UTF8_LEN = 3;
 129 const u8 BOM_UTF16_LEN = 1;
 130 const u8 BOM_UTF32_LEN = 1;
 131
 132 //! Unicode byte order marks for file operations.
 133 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
 134 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
 135 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
 136 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
 137 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
 138
 139 //! The size in bytes of the Unicode byte marks for file operations.
 140 const u8 BOM_ENCODE_UTF8_LEN = 3;
 141 const u8 BOM_ENCODE_UTF16_LEN = 2;
 142 const u8 BOM_ENCODE_UTF32_LEN = 4;
 143
 144 //! Unicode encoding type.
 145 enum EUTF_ENCODE
 146 {
 147         EUTFE_NONE              = 0,
 148         EUTFE_UTF8,
 149         EUTFE_UTF16,
 150         EUTFE_UTF16_LE,
 151         EUTFE_UTF16_BE,
 152         EUTFE_UTF32,
 153         EUTFE_UTF32_LE,
 154         EUTFE_UTF32_BE
 155 };
 156
 157 //! Unicode endianness.
 158 enum EUTF_ENDIAN
 159 {
 160         EUTFEE_NATIVE   = 0,
 161         EUTFEE_LITTLE,
 162         EUTFEE_BIG
 163 };
 164
 165 //! Returns the specified unicode byte order mark in a byte array.
 166 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
 167 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
 168                 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
 169 //! \return An array that contains a byte order mark.
 170 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
 171 {
 172 #define COPY_ARRAY(source, size) \
 173         memcpy(ret.pointer(), source, size); \
 174         ret.set_used(size)
 175
 176         core::array<u8> ret(4);
 177         switch (mode)
 178         {
 179                 case EUTFE_UTF8:
 180                         COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
 181                         break;
 182                 case EUTFE_UTF16:
 183                         #ifdef __BIG_ENDIAN__
 184                                 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
 185                         #else
 186                                 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
 187                         #endif
 188                         break;
 189                 case EUTFE_UTF16_BE:
 190                         COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
 191                         break;
 192                 case EUTFE_UTF16_LE:
 193                         COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
 194                         break;
 195                 case EUTFE_UTF32:
 196                         #ifdef __BIG_ENDIAN__
 197                                 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
 198                         #else
 199                                 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
 200                         #endif
 201                         break;
 202                 case EUTFE_UTF32_BE:
 203                         COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
 204                         break;
 205                 case EUTFE_UTF32_LE:
 206                         COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
 207                         break;
 208                 case EUTFE_NONE:
 209                         // TODO sapier: fixed warning only,
 210                         // don't know if something needs to be done here
 211                         break;
 212         }
 213         return ret;
 214
 215 #undef COPY_ARRAY
 216 }
 217
 218 //! Detects if the given data stream starts with a unicode BOM.
 219 //! \param data The data stream to check.
 220 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
 221 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
 222 {
 223         if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
 224         if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
 225         if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
 226         if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
 227         if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
 228         return EUTFE_NONE;
 229 }
 230
 231 } // end namespace unicode
 232
 233
 234 //! UTF-16 string class.
 235 template <typename TAlloc = irrAllocator<uchar16_t> >
 236 class ustring16
 237 {
 238 public:
 239
 240         ///------------------///
 241         /// iterator classes ///
 242         ///------------------///
 243
 244         //! Access an element in a unicode string, allowing one to change it.
 245         class _ustring16_iterator_access
 246         {
 247                 public:
 248                         _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
 249
 250                         //! Allow the class to be interpreted as a single UTF-32 character.
 251                         operator uchar32_t() const
 252                         {
 253                                 return _get();
 254                         }
 255
 256                         //! Allow one to change the character in the unicode string.
 257                         //! \param c The new character to use.
 258                         //! \return Myself.
 259                         _ustring16_iterator_access& operator=(const uchar32_t c)
 260                         {
 261                                 _set(c);
 262                                 return *this;
 263                         }
 264
 265                         //! Increments the value by 1.
 266                         //! \return Myself.
 267                         _ustring16_iterator_access& operator++()
 268                         {
 269                                 _set(_get() + 1);
 270                                 return *this;
 271                         }
 272
 273                         //! Increments the value by 1, returning the old value.
 274                         //! \return A unicode character.
 275                         uchar32_t operator++(int)
 276                         {
 277                                 uchar32_t old = _get();
 278                                 _set(old + 1);
 279                                 return old;
 280                         }
 281
 282                         //! Decrements the value by 1.
 283                         //! \return Myself.
 284                         _ustring16_iterator_access& operator--()
 285                         {
 286                                 _set(_get() - 1);
 287                                 return *this;
 288                         }
 289
 290                         //! Decrements the value by 1, returning the old value.
 291                         //! \return A unicode character.
 292                         uchar32_t operator--(int)
 293                         {
 294                                 uchar32_t old = _get();
 295                                 _set(old - 1);
 296                                 return old;
 297                         }
 298
 299                         //! Adds to the value by a specified amount.
 300                         //! \param val The amount to add to this character.
 301                         //! \return Myself.
 302                         _ustring16_iterator_access& operator+=(int val)
 303                         {
 304                                 _set(_get() + val);
 305                                 return *this;
 306                         }
 307
 308                         //! Subtracts from the value by a specified amount.
 309                         //! \param val The amount to subtract from this character.
 310                         //! \return Myself.
 311                         _ustring16_iterator_access& operator-=(int val)
 312                         {
 313                                 _set(_get() - val);
 314                                 return *this;
 315                         }
 316
 317                         //! Multiples the value by a specified amount.
 318                         //! \param val The amount to multiply this character by.
 319                         //! \return Myself.
 320                         _ustring16_iterator_access& operator*=(int val)
 321                         {
 322                                 _set(_get() * val);
 323                                 return *this;
 324                         }
 325
 326                         //! Divides the value by a specified amount.
 327                         //! \param val The amount to divide this character by.
 328                         //! \return Myself.
 329                         _ustring16_iterator_access& operator/=(int val)
 330                         {
 331                                 _set(_get() / val);
 332                                 return *this;
 333                         }
 334
 335                         //! Modulos the value by a specified amount.
 336                         //! \param val The amount to modulo this character by.
 337                         //! \return Myself.
 338                         _ustring16_iterator_access& operator%=(int val)
 339                         {
 340                                 _set(_get() % val);
 341                                 return *this;
 342                         }
 343
 344                         //! Adds to the value by a specified amount.
 345                         //! \param val The amount to add to this character.
 346                         //! \return A unicode character.
 347                         uchar32_t operator+(int val) const
 348                         {
 349                                 return _get() + val;
 350                         }
 351
 352                         //! Subtracts from the value by a specified amount.
 353                         //! \param val The amount to subtract from this character.
 354                         //! \return A unicode character.
 355                         uchar32_t operator-(int val) const
 356                         {
 357                                 return _get() - val;
 358                         }
 359
 360                         //! Multiplies the value by a specified amount.
 361                         //! \param val The amount to multiply this character by.
 362                         //! \return A unicode character.
 363                         uchar32_t operator*(int val) const
 364                         {
 365                                 return _get() * val;
 366                         }
 367
 368                         //! Divides the value by a specified amount.
 369                         //! \param val The amount to divide this character by.
 370                         //! \return A unicode character.
 371                         uchar32_t operator/(int val) const
 372                         {
 373                                 return _get() / val;
 374                         }
 375
 376                         //! Modulos the value by a specified amount.
 377                         //! \param val The amount to modulo this character by.
 378                         //! \return A unicode character.
 379                         uchar32_t operator%(int val) const
 380                         {
 381                                 return _get() % val;
 382                         }
 383
 384                 private:
 385                         //! Gets a uchar32_t from our current position.
 386                         uchar32_t _get() const
 387                         {
 388                                 const uchar16_t* a = ref->c_str();
 389                                 if (!UTF16_IS_SURROGATE(a[pos]))
 390                                         return static_cast<uchar32_t>(a[pos]);
 391                                 else
 392                                 {
 393                                         if (pos + 1 >= ref->size_raw())
 394                                                 return 0;
 395
 396                                         return unicode::toUTF32(a[pos], a[pos + 1]);
 397                                 }
 398                         }
 399
 400                         //! Sets a uchar32_t at our current position.
 401                         void _set(uchar32_t c)
 402                         {
 403                                 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
 404                                 const uchar16_t* a = ref2->c_str();
 405                                 if (c > 0xFFFF)
 406                                 {
 407                                         // c will be multibyte, so split it up into the high and low surrogate pairs.
 408                                         uchar16_t x = static_cast<uchar16_t>(c);
 409                                         uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
 410                                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
 411
 412                                         // If the previous position was a surrogate pair, just replace them.  Else, insert the low pair.
 413                                         if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
 414                                                 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
 415                                         else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
 416
 417                                         ref2->replace_raw(vh, static_cast<u32>(pos));
 418                                 }
 419                                 else
 420                                 {
 421                                         // c will be a single byte.
 422                                         uchar16_t vh = static_cast<uchar16_t>(c);
 423
 424                                         // If the previous position was a surrogate pair, remove the extra byte.
 425                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 426                                                 ref2->erase_raw(static_cast<u32>(pos) + 1);
 427
 428                                         ref2->replace_raw(vh, static_cast<u32>(pos));
 429                                 }
 430                         }
 431
 432                         const ustring16<TAlloc>* ref;
 433                         u32 pos;
 434         };
 435         typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
 436
 437
 438         //! Iterator to iterate through a UTF-16 string.
 439 #ifndef USTRING_NO_STL
 440         class _ustring16_const_iterator : public std::iterator<
 441                 std::bidirectional_iterator_tag,        // iterator_category
 442                 access,                                                         // value_type
 443                 ptrdiff_t,                                                      // difference_type
 444                 const access,                                           // pointer
 445                 const access                                            // reference
 446         >
 447 #else
 448         class _ustring16_const_iterator
 449 #endif
 450         {
 451                 public:
 452                         typedef _ustring16_const_iterator _Iter;
 453                         typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
 454                         typedef const access const_pointer;
 455                         typedef const access const_reference;
 456
 457 #ifndef USTRING_NO_STL
 458                         typedef typename _Base::value_type value_type;
 459                         typedef typename _Base::difference_type difference_type;
 460                         typedef typename _Base::difference_type distance_type;
 461                         typedef typename _Base::pointer pointer;
 462                         typedef const_reference reference;
 463 #else
 464                         typedef access value_type;
 465                         typedef u32 difference_type;
 466                         typedef u32 distance_type;
 467                         typedef const_pointer pointer;
 468                         typedef const_reference reference;
 469 #endif
 470
 471                         //! Constructors.
 472                         _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
 473                         _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
 474                         _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
 475                         {
 476                                 if (ref->size_raw() == 0 || p == 0)
 477                                         return;
 478
 479                                 // Go to the appropriate position.
 480                                 u32 i = p;
 481                                 u32 sr = ref->size_raw();
 482                                 const uchar16_t* a = ref->c_str();
 483                                 while (i != 0 && pos < sr)
 484                                 {
 485                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 486                                                 pos += 2;
 487                                         else ++pos;
 488                                         --i;
 489                                 }
 490                         }
 491
 492                         //! Test for equalness.
 493                         bool operator==(const _Iter& iter) const
 494                         {
 495                                 if (ref == iter.ref && pos == iter.pos)
 496                                         return true;
 497                                 return false;
 498                         }
 499
 500                         //! Test for unequalness.
 501                         bool operator!=(const _Iter& iter) const
 502                         {
 503                                 if (ref != iter.ref || pos != iter.pos)
 504                                         return true;
 505                                 return false;
 506                         }
 507
 508                         //! Switch to the next full character in the string.
 509                         _Iter& operator++()
 510                         {       // ++iterator
 511                                 if (pos == ref->size_raw()) return *this;
 512                                 const uchar16_t* a = ref->c_str();
 513                                 if (UTF16_IS_SURROGATE_HI(a[pos]))
 514                                         pos += 2;                       // TODO: check for valid low surrogate?
 515                                 else ++pos;
 516                                 if (pos > ref->size_raw()) pos = ref->size_raw();
 517                                 return *this;
 518                         }
 519
 520                         //! Switch to the next full character in the string, returning the previous position.
 521                         _Iter operator++(int)
 522                         {       // iterator++
 523                                 _Iter _tmp(*this);
 524                                 ++*this;
 525                                 return _tmp;
 526                         }
 527
 528                         //! Switch to the previous full character in the string.
 529                         _Iter& operator--()
 530                         {       // --iterator
 531                                 if (pos == 0) return *this;
 532                                 const uchar16_t* a = ref->c_str();
 533                                 --pos;
 534                                 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0)  // low surrogate, go back one more.
 535                                         --pos;
 536                                 return *this;
 537                         }
 538
 539                         //! Switch to the previous full character in the string, returning the previous position.
 540                         _Iter operator--(int)
 541                         {       // iterator--
 542                                 _Iter _tmp(*this);
 543                                 --*this;
 544                                 return _tmp;
 545                         }
 546
 547                         //! Advance a specified number of full characters in the string.
 548                         //! \return Myself.
 549                         _Iter& operator+=(const difference_type v)
 550                         {
 551                                 if (v == 0) return *this;
 552                                 if (v < 0) return operator-=(v * -1);
 553
 554                                 if (pos >= ref->size_raw())
 555                                         return *this;
 556
 557                                 // Go to the appropriate position.
 558                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
 559                                 u32 i = (u32)v;
 560                                 u32 sr = ref->size_raw();
 561                                 const uchar16_t* a = ref->c_str();
 562                                 while (i != 0 && pos < sr)
 563                                 {
 564                                         if (UTF16_IS_SURROGATE_HI(a[pos]))
 565                                                 pos += 2;
 566                                         else ++pos;
 567                                         --i;
 568                                 }
 569                                 if (pos > sr)
 570                                         pos = sr;
 571
 572                                 return *this;
 573                         }
 574
 575                         //! Go back a specified number of full characters in the string.
 576                         //! \return Myself.
 577                         _Iter& operator-=(const difference_type v)
 578                         {
 579                                 if (v == 0) return *this;
 580                                 if (v > 0) return operator+=(v * -1);
 581
 582                                 if (pos == 0)
 583                                         return *this;
 584
 585                                 // Go to the appropriate position.
 586                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
 587                                 u32 i = (u32)v;
 588                                 const uchar16_t* a = ref->c_str();
 589                                 while (i != 0 && pos != 0)
 590                                 {
 591                                         --pos;
 592                                         if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
 593                                                 --pos;
 594                                         --i;
 595                                 }
 596
 597                                 return *this;
 598                         }
 599
 600                         //! Return a new iterator that is a variable number of full characters forward from the current position.
 601                         _Iter operator+(const difference_type v) const
 602                         {
 603                                 _Iter ret(*this);
 604                                 ret += v;
 605                                 return ret;
 606                         }
 607
 608                         //! Return a new iterator that is a variable number of full characters backward from the current position.
 609                         _Iter operator-(const difference_type v) const
 610                         {
 611                                 _Iter ret(*this);
 612                                 ret -= v;
 613                                 return ret;
 614                         }
 615
 616                         //! Returns the distance between two iterators.
 617                         difference_type operator-(const _Iter& iter) const
 618                         {
 619                                 // Make sure we reference the same object!
 620                                 if (ref != iter.ref)
 621                                         return difference_type();
 622
 623                                 _Iter i = iter;
 624                                 difference_type ret;
 625
 626                                 // Walk up.
 627                                 if (pos > i.pos)
 628                                 {
 629                                         while (pos > i.pos)
 630                                         {
 631                                                 ++i;
 632                                                 ++ret;
 633                                         }
 634                                         return ret;
 635                                 }
 636
 637                                 // Walk down.
 638                                 while (pos < i.pos)
 639                                 {
 640                                         --i;
 641                                         --ret;
 642                                 }
 643                                 return ret;
 644                         }
 645
 646                         //! Accesses the full character at the iterator's position.
 647                         const_reference operator*() const
 648                         {
 649                                 if (pos >= ref->size_raw())
 650                                 {
 651                                         const uchar16_t* a = ref->c_str();
 652                                         u32 p = ref->size_raw();
 653                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 654                                                 --p;
 655                                         reference ret(ref, p);
 656                                         return ret;
 657                                 }
 658                                 const_reference ret(ref, pos);
 659                                 return ret;
 660                         }
 661
 662                         //! Accesses the full character at the iterator's position.
 663                         reference operator*()
 664                         {
 665                                 if (pos >= ref->size_raw())
 666                                 {
 667                                         const uchar16_t* a = ref->c_str();
 668                                         u32 p = ref->size_raw();
 669                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 670                                                 --p;
 671                                         reference ret(ref, p);
 672                                         return ret;
 673                                 }
 674                                 reference ret(ref, pos);
 675                                 return ret;
 676                         }
 677
 678                         //! Accesses the full character at the iterator's position.
 679                         const_pointer operator->() const
 680                         {
 681                                 return operator*();
 682                         }
 683
 684                         //! Accesses the full character at the iterator's position.
 685                         pointer operator->()
 686                         {
 687                                 return operator*();
 688                         }
 689
 690                         //! Is the iterator at the start of the string?
 691                         bool atStart() const
 692                         {
 693                                 return pos == 0;
 694                         }
 695
 696                         //! Is the iterator at the end of the string?
 697                         bool atEnd() const
 698                         {
 699                                 const uchar16_t* a = ref->c_str();
 700                                 if (UTF16_IS_SURROGATE(a[pos]))
 701                                         return (pos + 1) >= ref->size_raw();
 702                                 else return pos >= ref->size_raw();
 703                         }
 704
 705                         //! Moves the iterator to the start of the string.
 706                         void toStart()
 707                         {
 708                                 pos = 0;
 709                         }
 710
 711                         //! Moves the iterator to the end of the string.
 712                         void toEnd()
 713                         {
 714                                 pos = ref->size_raw();
 715                         }
 716
 717                         //! Returns the iterator's position.
 718                         //! \return The iterator's position.
 719                         u32 getPos() const
 720                         {
 721                                 return pos;
 722                         }
 723
 724                 protected:
 725                         const ustring16<TAlloc>* ref;
 726                         u32 pos;
 727         };
 728
 729         //! Iterator to iterate through a UTF-16 string.
 730         class _ustring16_iterator : public _ustring16_const_iterator
 731         {
 732                 public:
 733                         typedef _ustring16_iterator _Iter;
 734                         typedef _ustring16_const_iterator _Base;
 735                         typedef typename _Base::const_pointer const_pointer;
 736                         typedef typename _Base::const_reference const_reference;
 737
 738
 739                         typedef typename _Base::value_type value_type;
 740                         typedef typename _Base::difference_type difference_type;
 741                         typedef typename _Base::distance_type distance_type;
 742                         typedef access pointer;
 743                         typedef access reference;
 744
 745                         using _Base::pos;
 746                         using _Base::ref;
 747
 748                         //! Constructors.
 749                         _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
 750                         _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
 751                         _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
 752
 753                         //! Accesses the full character at the iterator's position.
 754                         reference operator*() const
 755                         {
 756                                 if (pos >= ref->size_raw())
 757                                 {
 758                                         const uchar16_t* a = ref->c_str();
 759                                         u32 p = ref->size_raw();
 760                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 761                                                 --p;
 762                                         reference ret(ref, p);
 763                                         return ret;
 764                                 }
 765                                 reference ret(ref, pos);
 766                                 return ret;
 767                         }
 768
 769                         //! Accesses the full character at the iterator's position.
 770                         reference operator*()
 771                         {
 772                                 if (pos >= ref->size_raw())
 773                                 {
 774                                         const uchar16_t* a = ref->c_str();
 775                                         u32 p = ref->size_raw();
 776                                         if (UTF16_IS_SURROGATE_LO(a[p]))
 777                                                 --p;
 778                                         reference ret(ref, p);
 779                                         return ret;
 780                                 }
 781                                 reference ret(ref, pos);
 782                                 return ret;
 783                         }
 784
 785                         //! Accesses the full character at the iterator's position.
 786                         pointer operator->() const
 787                         {
 788                                 return operator*();
 789                         }
 790
 791                         //! Accesses the full character at the iterator's position.
 792                         pointer operator->()
 793                         {
 794                                 return operator*();
 795                         }
 796         };
 797
 798         typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
 799         typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
 800
 801         ///----------------------///
 802         /// end iterator classes ///
 803         ///----------------------///
 804
 805         //! Default constructor
 806         ustring16()
 807         : array(0), allocated(1), used(0)
 808         {
 809 #if __BIG_ENDIAN__
 810                 encoding = unicode::EUTFE_UTF16_BE;
 811 #else
 812                 encoding = unicode::EUTFE_UTF16_LE;
 813 #endif
 814                 array = allocator.allocate(1); // new u16[1];
 815                 array[0] = 0x0;
 816         }
 817
 818
 819         //! Constructor
 820         ustring16(const ustring16<TAlloc>& other)
 821         : array(0), allocated(0), used(0)
 822         {
 823 #if __BIG_ENDIAN__
 824                 encoding = unicode::EUTFE_UTF16_BE;
 825 #else
 826                 encoding = unicode::EUTFE_UTF16_LE;
 827 #endif
 828                 *this = other;
 829         }
 830
 831
 832         //! Constructor from other string types
 833         template <class B, class A>
 834         ustring16(const string<B, A>& other)
 835         : array(0), allocated(0), used(0)
 836         {
 837 #if __BIG_ENDIAN__
 838                 encoding = unicode::EUTFE_UTF16_BE;
 839 #else
 840                 encoding = unicode::EUTFE_UTF16_LE;
 841 #endif
 842                 *this = other;
 843         }
 844
 845
 846 #ifndef USTRING_NO_STL
 847         //! Constructor from std::string
 848         template <class B, class A, typename Alloc>
 849         ustring16(const std::basic_string<B, A, Alloc>& other)
 850         : array(0), allocated(0), used(0)
 851         {
 852 #if __BIG_ENDIAN__
 853                 encoding = unicode::EUTFE_UTF16_BE;
 854 #else
 855                 encoding = unicode::EUTFE_UTF16_LE;
 856 #endif
 857                 *this = other.c_str();
 858         }
 859
 860
 861         //! Constructor from iterator.
 862         template <typename Itr>
 863         ustring16(Itr first, Itr last)
 864         : array(0), allocated(0), used(0)
 865         {
 866 #if __BIG_ENDIAN__
 867                 encoding = unicode::EUTFE_UTF16_BE;
 868 #else
 869                 encoding = unicode::EUTFE_UTF16_LE;
 870 #endif
 871                 reserve(std::distance(first, last));
 872                 array[used] = 0;
 873
 874                 for (; first != last; ++first)
 875                         append((uchar32_t)*first);
 876         }
 877 #endif
 878
 879
 880 #ifndef USTRING_CPP0X_NEWLITERALS
 881         //! Constructor for copying a character string from a pointer.
 882         ustring16(const char* const c)
 883         : array(0), allocated(0), used(0)
 884         {
 885 #if __BIG_ENDIAN__
 886                 encoding = unicode::EUTFE_UTF16_BE;
 887 #else
 888                 encoding = unicode::EUTFE_UTF16_LE;
 889 #endif
 890
 891                 loadDataStream(c, strlen(c));
 892                 //append((uchar8_t*)c);
 893         }
 894
 895
 896         //! Constructor for copying a character string from a pointer with a given length.
 897         ustring16(const char* const c, u32 length)
 898         : array(0), allocated(0), used(0)
 899         {
 900 #if __BIG_ENDIAN__
 901                 encoding = unicode::EUTFE_UTF16_BE;
 902 #else
 903                 encoding = unicode::EUTFE_UTF16_LE;
 904 #endif
 905
 906                 loadDataStream(c, length);
 907         }
 908 #endif
 909
 910
 911         //! Constructor for copying a UTF-8 string from a pointer.
 912         ustring16(const uchar8_t* const c)
 913         : array(0), allocated(0), used(0)
 914         {
 915 #if __BIG_ENDIAN__
 916                 encoding = unicode::EUTFE_UTF16_BE;
 917 #else
 918                 encoding = unicode::EUTFE_UTF16_LE;
 919 #endif
 920
 921                 append(c);
 922         }
 923
 924
 925         //! Constructor for copying a UTF-8 string from a single char.
 926         ustring16(const char c)
 927         : array(0), allocated(0), used(0)
 928         {
 929 #if __BIG_ENDIAN__
 930                 encoding = unicode::EUTFE_UTF16_BE;
 931 #else
 932                 encoding = unicode::EUTFE_UTF16_LE;
 933 #endif
 934
 935                 append((uchar32_t)c);
 936         }
 937
 938
 939         //! Constructor for copying a UTF-8 string from a pointer with a given length.
 940         ustring16(const uchar8_t* const c, u32 length)
 941         : array(0), allocated(0), used(0)
 942         {
 943 #if __BIG_ENDIAN__
 944                 encoding = unicode::EUTFE_UTF16_BE;
 945 #else
 946                 encoding = unicode::EUTFE_UTF16_LE;
 947 #endif
 948
 949                 append(c, length);
 950         }
 951
 952
 953         //! Constructor for copying a UTF-16 string from a pointer.
 954         ustring16(const uchar16_t* const c)
 955         : array(0), allocated(0), used(0)
 956         {
 957 #if __BIG_ENDIAN__
 958                 encoding = unicode::EUTFE_UTF16_BE;
 959 #else
 960                 encoding = unicode::EUTFE_UTF16_LE;
 961 #endif
 962
 963                 append(c);
 964         }
 965
 966
 967         //! Constructor for copying a UTF-16 string from a pointer with a given length
 968         ustring16(const uchar16_t* const c, u32 length)
 969         : array(0), allocated(0), used(0)
 970         {
 971 #if __BIG_ENDIAN__
 972                 encoding = unicode::EUTFE_UTF16_BE;
 973 #else
 974                 encoding = unicode::EUTFE_UTF16_LE;
 975 #endif
 976
 977                 append(c, length);
 978         }
 979
 980
 981         //! Constructor for copying a UTF-32 string from a pointer.
 982         ustring16(const uchar32_t* const c)
 983         : array(0), allocated(0), used(0)
 984         {
 985 #if __BIG_ENDIAN__
 986                 encoding = unicode::EUTFE_UTF16_BE;
 987 #else
 988                 encoding = unicode::EUTFE_UTF16_LE;
 989 #endif
 990
 991                 append(c);
 992         }
 993
 994
 995         //! Constructor for copying a UTF-32 from a pointer with a given length.
 996         ustring16(const uchar32_t* const c, u32 length)
 997         : array(0), allocated(0), used(0)
 998         {
 999 #if __BIG_ENDIAN__
1000                 encoding = unicode::EUTFE_UTF16_BE;
1001 #else
1002                 encoding = unicode::EUTFE_UTF16_LE;
1003 #endif
1004
1005                 append(c, length);
1006         }
1007
1008
1009         //! Constructor for copying a wchar_t string from a pointer.
1010         ustring16(const wchar_t* const c)
1011         : array(0), allocated(0), used(0)
1012         {
1013 #if __BIG_ENDIAN__
1014                 encoding = unicode::EUTFE_UTF16_BE;
1015 #else
1016                 encoding = unicode::EUTFE_UTF16_LE;
1017 #endif
1018
1019                 if (sizeof(wchar_t) == 4)
1020                         append(reinterpret_cast<const uchar32_t* const>(c));
1021                 else if (sizeof(wchar_t) == 2)
1022                         append(reinterpret_cast<const uchar16_t* const>(c));
1023                 else if (sizeof(wchar_t) == 1)
1024                         append(reinterpret_cast<const uchar8_t* const>(c));
1025         }
1026
1027
1028         //! Constructor for copying a wchar_t string from a pointer with a given length.
1029         ustring16(const wchar_t* const c, u32 length)
1030         : array(0), allocated(0), used(0)
1031         {
1032 #if __BIG_ENDIAN__
1033                 encoding = unicode::EUTFE_UTF16_BE;
1034 #else
1035                 encoding = unicode::EUTFE_UTF16_LE;
1036 #endif
1037
1038                 if (sizeof(wchar_t) == 4)
1039                         append(reinterpret_cast<const uchar32_t* const>(c), length);
1040                 else if (sizeof(wchar_t) == 2)
1041                         append(reinterpret_cast<const uchar16_t* const>(c), length);
1042                 else if (sizeof(wchar_t) == 1)
1043                         append(reinterpret_cast<const uchar8_t* const>(c), length);
1044         }
1045
1046
1047 #ifdef USTRING_CPP0X
1048         //! Constructor for moving a ustring16
1049         ustring16(ustring16<TAlloc>&& other)
1050         : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1051         {
1052                 //std::cout << "MOVE constructor" << std::endl;
1053                 other.array = 0;
1054                 other.allocated = 0;
1055                 other.used = 0;
1056         }
1057 #endif
1058
1059
1060         //! Destructor
1061         ~ustring16()
1062         {
1063                 allocator.deallocate(array); // delete [] array;
1064         }
1065
1066
1067         //! Assignment operator
1068         ustring16& operator=(const ustring16<TAlloc>& other)
1069         {
1070                 if (this == &other)
1071                         return *this;
1072
1073                 used = other.size_raw();
1074                 if (used >= allocated)
1075                 {
1076                         allocator.deallocate(array); // delete [] array;
1077                         allocated = used + 1;
1078                         array = allocator.allocate(used + 1); //new u16[used];
1079                 }
1080
1081                 const uchar16_t* p = other.c_str();
1082                 for (u32 i=0; i<=used; ++i, ++p)
1083                         array[i] = *p;
1084
1085                 array[used] = 0;
1086
1087                 // Validate our new UTF-16 string.
1088                 validate();
1089
1090                 return *this;
1091         }
1092
1093
1094 #ifdef USTRING_CPP0X
1095         //! Move assignment operator
1096         ustring16& operator=(ustring16<TAlloc>&& other)
1097         {
1098                 if (this != &other)
1099                 {
1100                         //std::cout << "MOVE operator=" << std::endl;
1101                         allocator.deallocate(array);
1102
1103                         array = other.array;
1104                         allocated = other.allocated;
1105                         encoding = other.encoding;
1106                         used = other.used;
1107                         other.array = 0;
1108                         other.used = 0;
1109                 }
1110                 return *this;
1111         }
1112 #endif
1113
1114
1115         //! Assignment operator for other string types
1116         template <class B, class A>
1117         ustring16<TAlloc>& operator=(const string<B, A>& other)
1118         {
1119                 *this = other.c_str();
1120                 return *this;
1121         }
1122
1123
1124         //! Assignment operator for UTF-8 strings
1125         ustring16<TAlloc>& operator=(const uchar8_t* const c)
1126         {
1127                 if (!array)
1128                 {
1129                         array = allocator.allocate(1); //new u16[1];
1130                         allocated = 1;
1131                 }
1132                 used = 0;
1133                 array[used] = 0x0;
1134                 if (!c) return *this;
1135
1136                 //! Append our string now.
1137                 append(c);
1138                 return *this;
1139         }
1140
1141
1142         //! Assignment operator for UTF-16 strings
1143         ustring16<TAlloc>& operator=(const uchar16_t* const c)
1144         {
1145                 if (!array)
1146                 {
1147                         array = allocator.allocate(1); //new u16[1];
1148                         allocated = 1;
1149                 }
1150                 used = 0;
1151                 array[used] = 0x0;
1152                 if (!c) return *this;
1153
1154                 //! Append our string now.
1155                 append(c);
1156                 return *this;
1157         }
1158
1159
1160         //! Assignment operator for UTF-32 strings
1161         ustring16<TAlloc>& operator=(const uchar32_t* const c)
1162         {
1163                 if (!array)
1164                 {
1165                         array = allocator.allocate(1); //new u16[1];
1166                         allocated = 1;
1167                 }
1168                 used = 0;
1169                 array[used] = 0x0;
1170                 if (!c) return *this;
1171
1172                 //! Append our string now.
1173                 append(c);
1174                 return *this;
1175         }
1176
1177
1178         //! Assignment operator for wchar_t strings.
1179         /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1180                 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1181                 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1182         ustring16<TAlloc>& operator=(const wchar_t* const c)
1183         {
1184                 if (sizeof(wchar_t) == 4)
1185                         *this = reinterpret_cast<const uchar32_t* const>(c);
1186                 else if (sizeof(wchar_t) == 2)
1187                         *this = reinterpret_cast<const uchar16_t* const>(c);
1188                 else if (sizeof(wchar_t) == 1)
1189                         *this = reinterpret_cast<const uchar8_t* const>(c);
1190
1191                 return *this;
1192         }
1193
1194
1195         //! Assignment operator for other strings.
1196         /** Note that this assumes that a correct unicode string is stored in the string. **/
1197         template <class B>
1198         ustring16<TAlloc>& operator=(const B* const c)
1199         {
1200                 if (sizeof(B) == 4)
1201                         *this = reinterpret_cast<const uchar32_t* const>(c);
1202                 else if (sizeof(B) == 2)
1203                         *this = reinterpret_cast<const uchar16_t* const>(c);
1204                 else if (sizeof(B) == 1)
1205                         *this = reinterpret_cast<const uchar8_t* const>(c);
1206
1207                 return *this;
1208         }
1209
1210
1211         //! Direct access operator
1212         access operator [](const u32 index)
1213         {
1214                 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1215                 iterator iter(*this, index);
1216                 return iter.operator*();
1217         }
1218
1219
1220         //! Direct access operator
1221         const access operator [](const u32 index) const
1222         {
1223                 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1224                 const_iterator iter(*this, index);
1225                 return iter.operator*();
1226         }
1227
1228
1229         //! Equality operator
1230         bool operator ==(const uchar16_t* const str) const
1231         {
1232                 if (!str)
1233                         return false;
1234
1235                 u32 i;
1236                 for(i=0; array[i] && str[i]; ++i)
1237                         if (array[i] != str[i])
1238                                 return false;
1239
1240                 return !array[i] && !str[i];
1241         }
1242
1243
1244         //! Equality operator
1245         bool operator ==(const ustring16<TAlloc>& other) const
1246         {
1247                 for(u32 i=0; array[i] && other.array[i]; ++i)
1248                         if (array[i] != other.array[i])
1249                                 return false;
1250
1251                 return used == other.used;
1252         }
1253
1254
1255         //! Is smaller comparator
1256         bool operator <(const ustring16<TAlloc>& other) const
1257         {
1258                 for(u32 i=0; array[i] && other.array[i]; ++i)
1259                 {
1260                         s32 diff = array[i] - other.array[i];
1261                         if ( diff )
1262                                 return diff < 0;
1263                 }
1264
1265                 return used < other.used;
1266         }
1267
1268
1269         //! Inequality operator
1270         bool operator !=(const uchar16_t* const str) const
1271         {
1272                 return !(*this == str);
1273         }
1274
1275
1276         //! Inequality operator
1277         bool operator !=(const ustring16<TAlloc>& other) const
1278         {
1279                 return !(*this == other);
1280         }
1281
1282
1283         //! Returns the length of a ustring16 in full characters.
1284         //! \return Length of a ustring16 in full characters.
1285         u32 size() const
1286         {
1287                 const_iterator i(*this, 0);
1288                 u32 pos = 0;
1289                 while (!i.atEnd())
1290                 {
1291                         ++i;
1292                         ++pos;
1293                 }
1294                 return pos;
1295         }
1296
1297
1298         //! Informs if the ustring is empty or not.
1299         //! \return True if the ustring is empty, false if not.
1300         bool empty() const
1301         {
1302                 return (size_raw() == 0);
1303         }
1304
1305
1306         //! Returns a pointer to the raw UTF-16 string data.
1307         //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1308         const uchar16_t* c_str() const
1309         {
1310                 return array;
1311         }
1312
1313
1314         //! Compares the first n characters of this string with another.
1315         //! \param other Other string to compare to.
1316         //! \param n Number of characters to compare.
1317         //! \return True if the n first characters of both strings are equal.
1318         bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1319         {
1320                 u32 i;
1321                 const uchar16_t* oa = other.c_str();
1322                 for(i=0; array[i] && oa[i] && i < n; ++i)
1323                         if (array[i] != oa[i])
1324                                 return false;
1325
1326                 // if one (or both) of the strings was smaller then they
1327                 // are only equal if they have the same length
1328                 return (i == n) || (used == other.used);
1329         }
1330
1331
1332         //! Compares the first n characters of this string with another.
1333         //! \param str Other string to compare to.
1334         //! \param n Number of characters to compare.
1335         //! \return True if the n first characters of both strings are equal.
1336         bool equalsn(const uchar16_t* const str, u32 n) const
1337         {
1338                 if (!str)
1339                         return false;
1340                 u32 i;
1341                 for(i=0; array[i] && str[i] && i < n; ++i)
1342                         if (array[i] != str[i])
1343                                 return false;
1344
1345                 // if one (or both) of the strings was smaller then they
1346                 // are only equal if they have the same length
1347                 return (i == n) || (array[i] == 0 && str[i] == 0);
1348         }
1349
1350
1351         //! Appends a character to this ustring16
1352         //! \param character The character to append.
1353         //! \return A reference to our current string.
1354         ustring16<TAlloc>& append(uchar32_t character)
1355         {
1356                 if (used + 2 >= allocated)
1357                         reallocate(used + 2);
1358
1359                 if (character > 0xFFFF)
1360                 {
1361                         used += 2;
1362
1363                         // character will be multibyte, so split it up into a surrogate pair.
1364                         uchar16_t x = static_cast<uchar16_t>(character);
1365                         uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1366                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1367                         array[used-2] = vh;
1368                         array[used-1] = vl;
1369                 }
1370                 else
1371                 {
1372                         ++used;
1373                         array[used-1] = character;
1374                 }
1375                 array[used] = 0;
1376
1377                 return *this;
1378         }
1379
1380
1381         //! Appends a UTF-8 string to this ustring16
1382         //! \param other The UTF-8 string to append.
1383         //! \param length The length of the string to append.
1384         //! \return A reference to our current string.
1385         ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1386         {
1387                 if (!other)
1388                         return *this;
1389
1390                 // Determine if the string is long enough for a BOM.
1391                 u32 len = 0;
1392                 const uchar8_t* p = other;
1393                 do
1394                 {
1395                         ++len;
1396                 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1397
1398                 // Check for BOM.
1399                 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1400                 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1401                 {
1402                         if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1403                                 c_bom = unicode::EUTFE_UTF8;
1404                 }
1405
1406                 // If a BOM was found, don't include it in the string.
1407                 const uchar8_t* c2 = other;
1408                 if (c_bom != unicode::EUTFE_NONE)
1409                 {
1410                         c2 = other + unicode::BOM_UTF8_LEN;
1411                         length -= unicode::BOM_UTF8_LEN;
1412                 }
1413
1414                 // Calculate the size of the string to read in.
1415                 len = 0;
1416                 p = c2;
1417                 do
1418                 {
1419                         ++len;
1420                 } while(*p++ && len < length);
1421                 if (len > length)
1422                         len = length;
1423
1424                 // If we need to grow the array, do it now.
1425                 if (used + len >= allocated)
1426                         reallocate(used + (len * 2));
1427                 u32 start = used;
1428
1429                 // Convert UTF-8 to UTF-16.
1430                 u32 pos = start;
1431                 for (u32 l = 0; l<len;)
1432                 {
1433                         ++used;
1434                         if (((c2[l] >> 6) & 0x03) == 0x02)
1435                         {       // Invalid continuation byte.
1436                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1437                                 ++l;
1438                         }
1439                         else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1440                         {       // Invalid byte - overlong encoding.
1441                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1442                                 ++l;
1443                         }
1444                         else if ((c2[l] & 0xF8) == 0xF0)
1445                         {       // 4 bytes UTF-8, 2 bytes UTF-16.
1446                                 // Check for a full string.
1447                                 if ((l + 3) >= len)
1448                                 {
1449                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1450                                         l += 3;
1451                                         break;
1452                                 }
1453
1454                                 // Validate.
1455                                 bool valid = true;
1456                                 u8 l2 = 0;
1457                                 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1458                                 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1459                                 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1460                                 if (!valid)
1461                                 {
1462                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1463                                         l += l2;
1464                                         continue;
1465                                 }
1466
1467                                 // Decode.
1468                                 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1469                                 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1470                                 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1471                                 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1472
1473                                 // Split v up into a surrogate pair.
1474                                 uchar16_t x = static_cast<uchar16_t>(v);
1475                                 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1476                                 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1477
1478                                 array[pos++] = vh;
1479                                 array[pos++] = vl;
1480                                 l += 4;
1481                                 ++used;         // Using two shorts this time, so increase used by 1.
1482                         }
1483                         else if ((c2[l] & 0xF0) == 0xE0)
1484                         {       // 3 bytes UTF-8, 1 byte UTF-16.
1485                                 // Check for a full string.
1486                                 if ((l + 2) >= len)
1487                                 {
1488                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1489                                         l += 2;
1490                                         break;
1491                                 }
1492
1493                                 // Validate.
1494                                 bool valid = true;
1495                                 u8 l2 = 0;
1496                                 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1497                                 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1498                                 if (!valid)
1499                                 {
1500                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1501                                         l += l2;
1502                                         continue;
1503                                 }
1504
1505                                 // Decode.
1506                                 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1507                                 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1508                                 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1509                                 array[pos++] = ch;
1510                                 l += 3;
1511                         }
1512                         else if ((c2[l] & 0xE0) == 0xC0)
1513                         {       // 2 bytes UTF-8, 1 byte UTF-16.
1514                                 // Check for a full string.
1515                                 if ((l + 1) >= len)
1516                                 {
1517                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1518                                         l += 1;
1519                                         break;
1520                                 }
1521
1522                                 // Validate.
1523                                 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1524                                 {
1525                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1526                                         ++l;
1527                                         continue;
1528                                 }
1529
1530                                 // Decode.
1531                                 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1532                                 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1533                                 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1534                                 array[pos++] = ch;
1535                                 l += 2;
1536                         }
1537                         else
1538                         {       // 1 byte UTF-8, 1 byte UTF-16.
1539                                 // Validate.
1540                                 if (c2[l] > 0x7F)
1541                                 {       // Values above 0xF4 are restricted and aren't used.  By now, anything above 0x7F is invalid.
1542                                         array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1543                                 }
1544                                 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1545                                 ++l;
1546                         }
1547                 }
1548                 array[used] = 0;
1549
1550                 // Validate our new UTF-16 string.
1551                 validate();
1552
1553                 return *this;
1554         }
1555
1556
1557         //! Appends a UTF-16 string to this ustring16
1558         //! \param other The UTF-16 string to append.
1559         //! \param length The length of the string to append.
1560         //! \return A reference to our current string.
1561         ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1562         {
1563                 if (!other)
1564                         return *this;
1565
1566                 // Determine if the string is long enough for a BOM.
1567                 u32 len = 0;
1568                 const uchar16_t* p = other;
1569                 do
1570                 {
1571                         ++len;
1572                 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1573
1574                 // Check for the BOM to determine the string's endianness.
1575                 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1576                 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1577                         c_end = unicode::EUTFEE_LITTLE;
1578                 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1579                         c_end = unicode::EUTFEE_BIG;
1580
1581                 // If a BOM was found, don't include it in the string.
1582                 const uchar16_t* c2 = other;
1583                 if (c_end != unicode::EUTFEE_NATIVE)
1584                 {
1585                         c2 = other + unicode::BOM_UTF16_LEN;
1586                         length -= unicode::BOM_UTF16_LEN;
1587                 }
1588
1589                 // Calculate the size of the string to read in.
1590                 len = 0;
1591                 p = c2;
1592                 do
1593                 {
1594                         ++len;
1595                 } while(*p++ && len < length);
1596                 if (len > length)
1597                         len = length;
1598
1599                 // If we need to grow the size of the array, do it now.
1600                 if (used + len >= allocated)
1601                         reallocate(used + (len * 2));
1602                 u32 start = used;
1603                 used += len;
1604
1605                 // Copy the string now.
1606                 unicode::EUTF_ENDIAN m_end = getEndianness();
1607                 for (u32 l = start; l < start + len; ++l)
1608                 {
1609                         array[l] = (uchar16_t)c2[l];
1610                         if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1611                                 array[l] = unicode::swapEndian16(array[l]);
1612                 }
1613
1614                 array[used] = 0;
1615
1616                 // Validate our new UTF-16 string.
1617                 validate();
1618                 return *this;
1619         }
1620
1621
1622         //! Appends a UTF-32 string to this ustring16
1623         //! \param other The UTF-32 string to append.
1624         //! \param length The length of the string to append.
1625         //! \return A reference to our current string.
1626         ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1627         {
1628                 if (!other)
1629                         return *this;
1630
1631                 // Check for the BOM to determine the string's endianness.
1632                 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1633                 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1634                         c_end = unicode::EUTFEE_LITTLE;
1635                 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1636                         c_end = unicode::EUTFEE_BIG;
1637
1638                 // If a BOM was found, don't include it in the string.
1639                 const uchar32_t* c2 = other;
1640                 if (c_end != unicode::EUTFEE_NATIVE)
1641                 {
1642                         c2 = other + unicode::BOM_UTF32_LEN;
1643                         length -= unicode::BOM_UTF32_LEN;
1644                 }
1645
1646                 // Calculate the size of the string to read in.
1647                 u32 len = 0;
1648                 const uchar32_t* p = c2;
1649                 do
1650                 {
1651                         ++len;
1652                 } while(*p++ && len < length);
1653                 if (len > length)
1654                         len = length;
1655
1656                 // If we need to grow the size of the array, do it now.
1657                 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1658                 if (used + (len * 2) >= allocated)
1659                         reallocate(used + ((len * 2) * 2));
1660                 u32 start = used;
1661
1662                 // Convert UTF-32 to UTF-16.
1663                 unicode::EUTF_ENDIAN m_end = getEndianness();
1664                 u32 pos = start;
1665                 for (u32 l = 0; l<len; ++l)
1666                 {
1667                         ++used;
1668
1669                         uchar32_t ch = c2[l];
1670                         if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1671                                 ch = unicode::swapEndian32(ch);
1672
1673                         if (ch > 0xFFFF)
1674                         {
1675                                 // Split ch up into a surrogate pair as it is over 16 bits long.
1676                                 uchar16_t x = static_cast<uchar16_t>(ch);
1677                                 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1678                                 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1679                                 array[pos++] = vh;
1680                                 array[pos++] = vl;
1681                                 ++used;         // Using two shorts, so increased used again.
1682                         }
1683                         else if (ch >= 0xD800 && ch <= 0xDFFF)
1684                         {
1685                                 // Between possible UTF-16 surrogates (invalid!)
1686                                 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1687                         }
1688                         else array[pos++] = static_cast<uchar16_t>(ch);
1689                 }
1690                 array[used] = 0;
1691
1692                 // Validate our new UTF-16 string.
1693                 validate();
1694
1695                 return *this;
1696         }
1697
1698
1699         //! Appends a ustring16 to this ustring16
1700         //! \param other The string to append to this one.
1701         //! \return A reference to our current string.
1702         ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1703         {
1704                 const uchar16_t* oa = other.c_str();
1705
1706                 u32 len = other.size_raw();
1707
1708                 if (used + len >= allocated)
1709                         reallocate(used + len);
1710
1711                 for (u32 l=0; l<len; ++l)
1712                         array[used+l] = oa[l];
1713
1714                 used += len;
1715                 array[used] = 0;
1716
1717                 return *this;
1718         }
1719
1720
1721         //! Appends a certain amount of characters of a ustring16 to this ustring16.
1722         //! \param other The string to append to this one.
1723         //! \param length How many characters of the other string to add to this one.
1724         //! \return A reference to our current string.
1725         ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1726         {
1727                 if (other.size() == 0)
1728                         return *this;
1729
1730                 if (other.size() < length)
1731                 {
1732                         append(other);
1733                         return *this;
1734                 }
1735
1736                 if (used + length * 2 >= allocated)
1737                         reallocate(used + length * 2);
1738
1739                 const_iterator iter(other, 0);
1740                 u32 l = length;
1741                 while (!iter.atEnd() && l)
1742                 {
1743                         uchar32_t c = *iter;
1744                         append(c);
1745                         ++iter;
1746                         --l;
1747                 }
1748
1749                 return *this;
1750         }
1751
1752
1753         //! Reserves some memory.
1754         //! \param count The amount of characters to reserve.
1755         void reserve(u32 count)
1756         {
1757                 if (count < allocated)
1758                         return;
1759
1760                 reallocate(count);
1761         }
1762
1763
1764         //! Finds first occurrence of character.
1765         //! \param c The character to search for.
1766         //! \return Position where the character has been found, or -1 if not found.
1767         s32 findFirst(uchar32_t c) const
1768         {
1769                 const_iterator i(*this, 0);
1770
1771                 s32 pos = 0;
1772                 while (!i.atEnd())
1773                 {
1774                         uchar32_t t = *i;
1775                         if (c == t)
1776                                 return pos;
1777                         ++pos;
1778                         ++i;
1779                 }
1780
1781                 return -1;
1782         }
1783
1784         //! Finds first occurrence of a character of a list.
1785         //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1786         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1787         //! \return Position where one of the characters has been found, or -1 if not found.
1788         s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1789         {
1790                 if (!c || !count)
1791                         return -1;
1792
1793                 const_iterator i(*this, 0);
1794
1795                 s32 pos = 0;
1796                 while (!i.atEnd())
1797                 {
1798                         uchar32_t t = *i;
1799                         for (u32 j=0; j<count; ++j)
1800                                 if (t == c[j])
1801                                         return pos;
1802                         ++pos;
1803                         ++i;
1804                 }
1805
1806                 return -1;
1807         }
1808
1809
1810         //! Finds first position of a character not in a given list.
1811         //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1812         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1813         //! \return Position where the character has been found, or -1 if not found.
1814         s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1815         {
1816                 if (!c || !count)
1817                         return -1;
1818
1819                 const_iterator i(*this, 0);
1820
1821                 s32 pos = 0;
1822                 while (!i.atEnd())
1823                 {
1824                         uchar32_t t = *i;
1825                         u32 j;
1826                         for (j=0; j<count; ++j)
1827                                 if (t == c[j])
1828                                         break;
1829
1830                         if (j==count)
1831                                 return pos;
1832                         ++pos;
1833                         ++i;
1834                 }
1835
1836                 return -1;
1837         }
1838
1839         //! Finds last position of a character not in a given list.
1840         //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1841         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1842         //! \return Position where the character has been found, or -1 if not found.
1843         s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1844         {
1845                 if (!c || !count)
1846                         return -1;
1847
1848                 const_iterator i(end());
1849                 --i;
1850
1851                 s32 pos = size() - 1;
1852                 while (!i.atStart())
1853                 {
1854                         uchar32_t t = *i;
1855                         u32 j;
1856                         for (j=0; j<count; ++j)
1857                                 if (t == c[j])
1858                                         break;
1859
1860                         if (j==count)
1861                                 return pos;
1862                         --pos;
1863                         --i;
1864                 }
1865
1866                 return -1;
1867         }
1868
1869         //! Finds next occurrence of character.
1870         //! \param c The character to search for.
1871         //! \param startPos The position in the string to start searching.
1872         //! \return Position where the character has been found, or -1 if not found.
1873         s32 findNext(uchar32_t c, u32 startPos) const
1874         {
1875                 const_iterator i(*this, startPos);
1876
1877                 s32 pos = startPos;
1878                 while (!i.atEnd())
1879                 {
1880                         uchar32_t t = *i;
1881                         if (t == c)
1882                                 return pos;
1883                         ++pos;
1884                         ++i;
1885                 }
1886
1887                 return -1;
1888         }
1889
1890
1891         //! Finds last occurrence of character.
1892         //! \param c The character to search for.
1893         //! \param start The start position of the reverse search ( default = -1, on end ).
1894         //! \return Position where the character has been found, or -1 if not found.
1895         s32 findLast(uchar32_t c, s32 start = -1) const
1896         {
1897                 u32 s = size();
1898                 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1899
1900                 const_iterator i(*this, start);
1901                 u32 pos = start;
1902                 while (!i.atStart())
1903                 {
1904                         uchar32_t t = *i;
1905                         if (t == c)
1906                                 return pos;
1907                         --pos;
1908                         --i;
1909                 }
1910
1911                 return -1;
1912         }
1913
1914         //! Finds last occurrence of a character in a list.
1915         //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1916         //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1917         //! \return Position where one of the characters has been found, or -1 if not found.
1918         s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1919         {
1920                 if (!c || !count)
1921                         return -1;
1922
1923                 const_iterator i(end());
1924                 --i;
1925
1926                 s32 pos = size();
1927                 while (!i.atStart())
1928                 {
1929                         uchar32_t t = *i;
1930                         for (u32 j=0; j<count; ++j)
1931                                 if (t == c[j])
1932                                         return pos;
1933                         --pos;
1934                         --i;
1935                 }
1936
1937                 return -1;
1938         }
1939
1940
1941         //! Finds another ustring16 in this ustring16.
1942         //! \param str The string to find.
1943         //! \param start The start position of the search.
1944         //! \return Positions where the ustring16 has been found, or -1 if not found.
1945         s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1946         {
1947                 u32 my_size = size();
1948                 u32 their_size = str.size();
1949
1950                 if (their_size == 0 || my_size - start < their_size)
1951                         return -1;
1952
1953                 const_iterator i(*this, start);
1954
1955                 s32 pos = start;
1956                 while (!i.atEnd())
1957                 {
1958                         const_iterator i2(i);
1959                         const_iterator j(str, 0);
1960                         uchar32_t t1 = (uchar32_t)*i2;
1961                         uchar32_t t2 = (uchar32_t)*j;
1962                         while (t1 == t2)
1963                         {
1964                                 ++i2;
1965                                 ++j;
1966                                 if (j.atEnd())
1967                                         return pos;
1968                                 t1 = (uchar32_t)*i2;
1969                                 t2 = (uchar32_t)*j;
1970                         }
1971                         ++i;
1972                         ++pos;
1973                 }
1974
1975                 return -1;
1976         }
1977
1978
1979         //! Finds another ustring16 in this ustring16.
1980         //! \param str The string to find.
1981         //! \param start The start position of the search.
1982         //! \return Positions where the string has been found, or -1 if not found.
1983         s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1984         {
1985                 const uchar16_t* data = str.c_str();
1986                 if (data && *data)
1987                 {
1988                         u32 len = 0;
1989
1990                         while (data[len])
1991                                 ++len;
1992
1993                         if (len > used)
1994                                 return -1;
1995
1996                         for (u32 i=start; i<=used-len; ++i)
1997                         {
1998                                 u32 j=0;
1999
2000                                 while(data[j] && array[i+j] == data[j])
2001                                         ++j;
2002
2003                                 if (!data[j])
2004                                         return i;
2005                         }
2006                 }
2007
2008                 return -1;
2009         }
2010
2011
2012         //! Returns a substring.
2013         //! \param begin: Start of substring.
2014         //! \param length: Length of substring.
2015         //! \return A reference to our current string.
2016         ustring16<TAlloc> subString(u32 begin, s32 length) const
2017         {
2018                 u32 len = size();
2019                 // if start after ustring16
2020                 // or no proper substring length
2021                 if ((length <= 0) || (begin>=len))
2022                         return ustring16<TAlloc>("");
2023                 // clamp length to maximal value
2024                 if ((length+begin) > len)
2025                         length = len-begin;
2026
2027                 ustring16<TAlloc> o;
2028                 o.reserve((length+1) * 2);
2029
2030                 const_iterator i(*this, begin);
2031                 while (!i.atEnd() && length)
2032                 {
2033                         o.append(*i);
2034                         ++i;
2035                         --length;
2036                 }
2037
2038                 return o;
2039         }
2040
2041
2042         //! Appends a character to this ustring16.
2043         //! \param c Character to append.
2044         //! \return A reference to our current string.
2045         ustring16<TAlloc>& operator += (char c)
2046         {
2047                 append((uchar32_t)c);
2048                 return *this;
2049         }
2050
2051
2052         //! Appends a character to this ustring16.
2053         //! \param c Character to append.
2054         //! \return A reference to our current string.
2055         ustring16<TAlloc>& operator += (uchar32_t c)
2056         {
2057                 append(c);
2058                 return *this;
2059         }
2060
2061
2062         //! Appends a number to this ustring16.
2063         //! \param c Number to append.
2064         //! \return A reference to our current string.
2065         ustring16<TAlloc>& operator += (short c)
2066         {
2067                 append(core::stringc(c));
2068                 return *this;
2069         }
2070
2071
2072         //! Appends a number to this ustring16.
2073         //! \param c Number to append.
2074         //! \return A reference to our current string.
2075         ustring16<TAlloc>& operator += (unsigned short c)
2076         {
2077                 append(core::stringc(c));
2078                 return *this;
2079         }
2080
2081
2082 #ifdef USTRING_CPP0X_NEWLITERALS
2083         //! Appends a number to this ustring16.
2084         //! \param c Number to append.
2085         //! \return A reference to our current string.
2086         ustring16<TAlloc>& operator += (int c)
2087         {
2088                 append(core::stringc(c));
2089                 return *this;
2090         }
2091
2092
2093         //! Appends a number to this ustring16.
2094         //! \param c Number to append.
2095         //! \return A reference to our current string.
2096         ustring16<TAlloc>& operator += (unsigned int c)
2097         {
2098                 append(core::stringc(c));
2099                 return *this;
2100         }
2101 #endif
2102
2103
2104         //! Appends a number to this ustring16.
2105         //! \param c Number to append.
2106         //! \return A reference to our current string.
2107         ustring16<TAlloc>& operator += (long c)
2108         {
2109                 append(core::stringc(c));
2110                 return *this;
2111         }
2112
2113
2114         //! Appends a number to this ustring16.
2115         //! \param c Number to append.
2116         //! \return A reference to our current string.
2117         ustring16<TAlloc>& operator += (unsigned long c)
2118         {
2119                 append(core::stringc(c));
2120                 return *this;
2121         }
2122
2123
2124         //! Appends a number to this ustring16.
2125         //! \param c Number to append.
2126         //! \return A reference to our current string.
2127         ustring16<TAlloc>& operator += (double c)
2128         {
2129                 append(core::stringc(c));
2130                 return *this;
2131         }
2132
2133
2134         //! Appends a char ustring16 to this ustring16.
2135         //! \param c Char ustring16 to append.
2136         //! \return A reference to our current string.
2137         ustring16<TAlloc>& operator += (const uchar16_t* const c)
2138         {
2139                 append(c);
2140                 return *this;
2141         }
2142
2143
2144         //! Appends a ustring16 to this ustring16.
2145         //! \param other ustring16 to append.
2146         //! \return A reference to our current string.
2147         ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2148         {
2149                 append(other);
2150                 return *this;
2151         }
2152
2153
2154         //! Replaces all characters of a given type with another one.
2155         //! \param toReplace Character to replace.
2156         //! \param replaceWith Character replacing the old one.
2157         //! \return A reference to our current string.
2158         ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2159         {
2160                 iterator i(*this, 0);
2161                 while (!i.atEnd())
2162                 {
2163                         typename ustring16<TAlloc>::access a = *i;
2164                         if ((uchar32_t)a == toReplace)
2165                                 a = replaceWith;
2166                         ++i;
2167                 }
2168                 return *this;
2169         }
2170
2171
2172         //! Replaces all instances of a string with another one.
2173         //! \param toReplace The string to replace.
2174         //! \param replaceWith The string replacing the old one.
2175         //! \return A reference to our current string.
2176         ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2177         {
2178                 if (toReplace.size() == 0)
2179                         return *this;
2180
2181                 const uchar16_t* other = toReplace.c_str();
2182                 const uchar16_t* replace = replaceWith.c_str();
2183                 const u32 other_size = toReplace.size_raw();
2184                 const u32 replace_size = replaceWith.size_raw();
2185
2186                 // Determine the delta.  The algorithm will change depending on the delta.
2187                 s32 delta = replace_size - other_size;
2188
2189                 // A character for character replace.  The string will not shrink or grow.
2190                 if (delta == 0)
2191                 {
2192                         s32 pos = 0;
2193                         while ((pos = find_raw(other, pos)) != -1)
2194                         {
2195                                 for (u32 i = 0; i < replace_size; ++i)
2196                                         array[pos + i] = replace[i];
2197                                 ++pos;
2198                         }
2199                         return *this;
2200                 }
2201
2202                 // We are going to be removing some characters.  The string will shrink.
2203                 if (delta < 0)
2204                 {
2205                         u32 i = 0;
2206                         for (u32 pos = 0; pos <= used; ++i, ++pos)
2207                         {
2208                                 // Is this potentially a match?
2209                                 if (array[pos] == *other)
2210                                 {
2211                                         // Check to see if we have a match.
2212                                         u32 j;
2213                                         for (j = 0; j < other_size; ++j)
2214                                         {
2215                                                 if (array[pos + j] != other[j])
2216                                                         break;
2217                                         }
2218
2219                                         // If we have a match, replace characters.
2220                                         if (j == other_size)
2221                                         {
2222                                                 for (j = 0; j < replace_size; ++j)
2223                                                         array[i + j] = replace[j];
2224                                                 i += replace_size - 1;
2225                                                 pos += other_size - 1;
2226                                                 continue;
2227                                         }
2228                                 }
2229
2230                                 // No match found, just copy characters.
2231                                 array[i - 1] = array[pos];
2232                         }
2233                         array[i] = 0;
2234                         used = i;
2235
2236                         return *this;
2237                 }
2238
2239                 // We are going to be adding characters, so the string size will increase.
2240                 // Count the number of times toReplace exists in the string so we can allocate the new size.
2241                 u32 find_count = 0;
2242                 s32 pos = 0;
2243                 while ((pos = find_raw(other, pos)) != -1)
2244                 {
2245                         ++find_count;
2246                         ++pos;
2247                 }
2248
2249                 // Re-allocate the string now, if needed.
2250                 u32 len = delta * find_count;
2251                 if (used + len >= allocated)
2252                         reallocate(used + len);
2253
2254                 // Start replacing.
2255                 pos = 0;
2256                 while ((pos = find_raw(other, pos)) != -1)
2257                 {
2258                         uchar16_t* start = array + pos + other_size - 1;
2259                         uchar16_t* ptr   = array + used;
2260                         uchar16_t* end   = array + used + delta;
2261
2262                         // Shift characters to make room for the string.
2263                         while (ptr != start)
2264                         {
2265                                 *end = *ptr;
2266                                 --ptr;
2267                                 --end;
2268                         }
2269
2270                         // Add the new string now.
2271                         for (u32 i = 0; i < replace_size; ++i)
2272                                 array[pos + i] = replace[i];
2273
2274                         pos += replace_size;
2275                         used += delta;
2276                 }
2277
2278                 // Terminate the string and return ourself.
2279                 array[used] = 0;
2280                 return *this;
2281         }
2282
2283
2284         //! Removes characters from a ustring16..
2285         //! \param c The character to remove.
2286         //! \return A reference to our current string.
2287         ustring16<TAlloc>& remove(uchar32_t c)
2288         {
2289                 u32 pos = 0;
2290                 u32 found = 0;
2291                 u32 len = (c > 0xFFFF ? 2 : 1);         // Remove characters equal to the size of c as a UTF-16 character.
2292                 for (u32 i=0; i<=used; ++i)
2293                 {
2294                         uchar32_t uc32 = 0;
2295                         if (!UTF16_IS_SURROGATE_HI(array[i]))
2296                                 uc32 |= array[i];
2297                         else if (i + 1 <= used)
2298                         {
2299                                 // Convert the surrogate pair into a single UTF-32 character.
2300                                 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2301                         }
2302                         u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2303
2304                         if (uc32 == c)
2305                         {
2306                                 found += len;
2307                                 continue;
2308                         }
2309
2310                         array[pos++] = array[i];
2311                         if (len2 == 2)
2312                                 array[pos++] = array[++i];
2313                 }
2314                 used -= found;
2315                 array[used] = 0;
2316                 return *this;
2317         }
2318
2319
2320         //! Removes a ustring16 from the ustring16.
2321         //! \param toRemove The string to remove.
2322         //! \return A reference to our current string.
2323         ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2324         {
2325                 u32 size = toRemove.size_raw();
2326                 if (size == 0) return *this;
2327
2328                 const uchar16_t* tra = toRemove.c_str();
2329                 u32 pos = 0;
2330                 u32 found = 0;
2331                 for (u32 i=0; i<=used; ++i)
2332                 {
2333                         u32 j = 0;
2334                         while (j < size)
2335                         {
2336                                 if (array[i + j] != tra[j])
2337                                         break;
2338                                 ++j;
2339                         }
2340                         if (j == size)
2341                         {
2342                                 found += size;
2343                                 i += size - 1;
2344                                 continue;
2345                         }
2346
2347                         array[pos++] = array[i];
2348                 }
2349                 used -= found;
2350                 array[used] = 0;
2351                 return *this;
2352         }
2353
2354
2355         //! Removes characters from the ustring16.
2356         //! \param characters The characters to remove.
2357         //! \return A reference to our current string.
2358         ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2359         {
2360                 if (characters.size_raw() == 0)
2361                         return *this;
2362
2363                 u32 pos = 0;
2364                 u32 found = 0;
2365                 const_iterator iter(characters);
2366                 for (u32 i=0; i<=used; ++i)
2367                 {
2368                         uchar32_t uc32 = 0;
2369                         if (!UTF16_IS_SURROGATE_HI(array[i]))
2370                                 uc32 |= array[i];
2371                         else if (i + 1 <= used)
2372                         {
2373                                 // Convert the surrogate pair into a single UTF-32 character.
2374                                 uc32 = unicode::toUTF32(array[i], array[i+1]);
2375                         }
2376                         u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2377
2378                         bool cont = false;
2379                         iter.toStart();
2380                         while (!iter.atEnd())
2381                         {
2382                                 uchar32_t c = *iter;
2383                                 if (uc32 == c)
2384                                 {
2385                                         found += (c > 0xFFFF ? 2 : 1);          // Remove characters equal to the size of c as a UTF-16 character.
2386                                         ++i;
2387                                         cont = true;
2388                                         break;
2389                                 }
2390                                 ++iter;
2391                         }
2392                         if (cont) continue;
2393
2394                         array[pos++] = array[i];
2395                         if (len2 == 2)
2396                                 array[pos++] = array[++i];
2397                 }
2398                 used -= found;
2399                 array[used] = 0;
2400                 return *this;
2401         }
2402
2403
2404         //! Trims the ustring16.
2405         //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2406         //! \param whitespace The characters that are to be considered as whitespace.
2407         //! \return A reference to our current string.
2408         ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2409         {
2410                 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2411
2412                 // find start and end of the substring without the specified characters
2413                 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2414                 if (begin == -1)
2415                         return (*this="");
2416
2417                 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2418
2419                 return (*this = subString(begin, (end +1) - begin));
2420         }
2421
2422
2423         //! Erases a character from the ustring16.
2424         //! May be slow, because all elements following after the erased element have to be copied.
2425         //! \param index Index of element to be erased.
2426         //! \return A reference to our current string.
2427         ustring16<TAlloc>& erase(u32 index)
2428         {
2429                 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2430
2431                 iterator i(*this, index);
2432
2433                 uchar32_t t = *i;
2434                 u32 len = (t > 0xFFFF ? 2 : 1);
2435
2436                 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2437                         array[j - len] = array[j];
2438
2439                 used -= len;
2440                 array[used] = 0;
2441
2442                 return *this;
2443         }
2444
2445
2446         //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2447         //! \return A reference to our current string.
2448         ustring16<TAlloc>& validate()
2449         {
2450                 // Validate all unicode characters.
2451                 for (u32 i=0; i<allocated; ++i)
2452                 {
2453                         // Terminate on existing null.
2454                         if (array[i] == 0)
2455                         {
2456                                 used = i;
2457                                 return *this;
2458                         }
2459                         if (UTF16_IS_SURROGATE(array[i]))
2460                         {
2461                                 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2462                                         array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2463                                 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2464                                         array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2465                                 ++i;
2466                         }
2467                         if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2468                                 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2469                 }
2470
2471                 // terminate
2472                 used = 0;
2473                 if (allocated > 0)
2474                 {
2475                         used = allocated - 1;
2476                         array[used] = 0;
2477                 }
2478                 return *this;
2479         }
2480
2481
2482         //! Gets the last char of the ustring16, or 0.
2483         //! \return The last char of the ustring16, or 0.
2484         uchar32_t lastChar() const
2485         {
2486                 if (used < 1)
2487                         return 0;
2488
2489                 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2490                 {
2491                         // Make sure we have a paired surrogate.
2492                         if (used < 2)
2493                                 return 0;
2494
2495                         // Check for an invalid surrogate.
2496                         if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2497                                 return 0;
2498
2499                         // Convert the surrogate pair into a single UTF-32 character.
2500                         return unicode::toUTF32(array[used-2], array[used-1]);
2501                 }
2502                 else
2503                 {
2504                         return array[used-1];
2505                 }
2506         }
2507
2508
2509         //! Split the ustring16 into parts.
2510         /** This method will split a ustring16 at certain delimiter characters
2511         into the container passed in as reference. The type of the container
2512         has to be given as template parameter. It must provide a push_back and
2513         a size method.
2514         \param ret The result container
2515         \param c C-style ustring16 of delimiter characters
2516         \param count Number of delimiter characters
2517         \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2518         container. If two delimiters occur without a character in between, an
2519         empty substring would be placed in the result. If this flag is set,
2520         only non-empty strings are stored.
2521         \param keepSeparators Flag which allows to add the separator to the
2522         result ustring16. If this flag is true, the concatenation of the
2523         substrings results in the original ustring16. Otherwise, only the
2524         characters between the delimiters are returned.
2525         \return The number of resulting substrings
2526         */
2527         template<class container>
2528         u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2529         {
2530                 if (!c)
2531                         return 0;
2532
2533                 const_iterator i(*this);
2534                 const u32 oldSize=ret.size();
2535                 u32 pos = 0;
2536                 u32 lastpos = 0;
2537                 u32 lastpospos = 0;
2538                 bool lastWasSeparator = false;
2539                 while (!i.atEnd())
2540                 {
2541                         uchar32_t ch = *i;
2542                         bool foundSeparator = false;
2543                         for (u32 j=0; j<count; ++j)
2544                         {
2545                                 if (ch == c[j])
2546                                 {
2547                                         if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2548                                                         !lastWasSeparator)
2549                                         ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2550                                         foundSeparator = true;
2551                                         lastpos = (keepSeparators ? pos : pos + 1);
2552                                         lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2553                                         break;
2554                                 }
2555                         }
2556                         lastWasSeparator = foundSeparator;
2557                         ++pos;
2558                         ++i;
2559                 }
2560                 u32 s = size() + 1;
2561                 if (s > lastpos)
2562                         ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2563                 return ret.size()-oldSize;
2564         }
2565
2566
2567         //! Split the ustring16 into parts.
2568         /** This method will split a ustring16 at certain delimiter characters
2569         into the container passed in as reference. The type of the container
2570         has to be given as template parameter. It must provide a push_back and
2571         a size method.
2572         \param ret The result container
2573         \param c A unicode string of delimiter characters
2574         \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2575         container. If two delimiters occur without a character in between, an
2576         empty substring would be placed in the result. If this flag is set,
2577         only non-empty strings are stored.
2578         \param keepSeparators Flag which allows to add the separator to the
2579         result ustring16. If this flag is true, the concatenation of the
2580         substrings results in the original ustring16. Otherwise, only the
2581         characters between the delimiters are returned.
2582         \return The number of resulting substrings
2583         */
2584         template<class container>
2585         u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2586         {
2587                 core::array<uchar32_t> v = c.toUTF32();
2588                 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2589         }
2590
2591
2592         //! Gets the size of the allocated memory buffer for the string.
2593         //! \return The size of the allocated memory buffer.
2594         u32 capacity() const
2595         {
2596                 return allocated;
2597         }
2598
2599
2600         //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2601         //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2602         u32 size_raw() const
2603         {
2604                 return used;
2605         }
2606
2607
2608         //! Inserts a character into the string.
2609         //! \param c The character to insert.
2610         //! \param pos The position to insert the character.
2611         //! \return A reference to our current string.
2612         ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2613         {
2614                 u8 len = (c > 0xFFFF ? 2 : 1);
2615
2616                 if (used + len >= allocated)
2617                         reallocate(used + len);
2618
2619                 used += len;
2620
2621                 iterator iter(*this, pos);
2622                 for (u32 i = used - 2; i > iter.getPos(); --i)
2623                         array[i] = array[i - len];
2624
2625                 if (c > 0xFFFF)
2626                 {
2627                         // c will be multibyte, so split it up into a surrogate pair.
2628                         uchar16_t x = static_cast<uchar16_t>(c);
2629                         uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2630                         uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2631                         array[iter.getPos()] = vh;
2632                         array[iter.getPos()+1] = vl;
2633                 }
2634                 else
2635                 {
2636                         array[iter.getPos()] = static_cast<uchar16_t>(c);
2637                 }
2638                 array[used] = 0;
2639                 return *this;
2640         }
2641
2642
2643         //! Inserts a string into the string.
2644         //! \param c The string to insert.
2645         //! \param pos The position to insert the string.
2646         //! \return A reference to our current string.
2647         ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2648         {
2649                 u32 len = c.size_raw();
2650                 if (len == 0) return *this;
2651
2652                 if (used + len >= allocated)
2653                         reallocate(used + len);
2654
2655                 used += len;
2656
2657                 iterator iter(*this, pos);
2658                 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2659                         array[i] = array[i - len];
2660
2661                 const uchar16_t* s = c.c_str();
2662                 for (u32 i = 0; i < len; ++i)
2663                 {
2664                         array[pos++] = *s;
2665                         ++s;
2666                 }
2667
2668                 array[used] = 0;
2669                 return *this;
2670         }
2671
2672
2673         //! Inserts a character into the string.
2674         //! \param c The character to insert.
2675         //! \param pos The position to insert the character.
2676         //! \return A reference to our current string.
2677         ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2678         {
2679                 if (used + 1 >= allocated)
2680                         reallocate(used + 1);
2681
2682                 ++used;
2683
2684                 for (u32 i = used - 1; i > pos; --i)
2685                         array[i] = array[i - 1];
2686
2687                 array[pos] = c;
2688                 array[used] = 0;
2689                 return *this;
2690         }
2691
2692
2693         //! Removes a character from string.
2694         //! \param pos Position of the character to remove.
2695         //! \return A reference to our current string.
2696         ustring16<TAlloc>& erase_raw(u32 pos)
2697         {
2698                 for (u32 i=pos; i<=used; ++i)
2699                 {
2700                         array[i] = array[i + 1];
2701                 }
2702                 --used;
2703                 array[used] = 0;
2704                 return *this;
2705         }
2706
2707
2708         //! Replaces a character in the string.
2709         //! \param c The new character.
2710         //! \param pos The position of the character to replace.
2711         //! \return A reference to our current string.
2712         ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2713         {
2714                 array[pos] = c;
2715                 return *this;
2716         }
2717
2718
2719         //! Returns an iterator to the beginning of the string.
2720         //! \return An iterator to the beginning of the string.
2721         iterator begin()
2722         {
2723                 iterator i(*this, 0);
2724                 return i;
2725         }
2726
2727
2728         //! Returns an iterator to the beginning of the string.
2729         //! \return An iterator to the beginning of the string.
2730         const_iterator begin() const
2731         {
2732                 const_iterator i(*this, 0);
2733                 return i;
2734         }
2735
2736
2737         //! Returns an iterator to the beginning of the string.
2738         //! \return An iterator to the beginning of the string.
2739         const_iterator cbegin() const
2740         {
2741                 const_iterator i(*this, 0);
2742                 return i;
2743         }
2744
2745
2746         //! Returns an iterator to the end of the string.
2747         //! \return An iterator to the end of the string.
2748         iterator end()
2749         {
2750                 iterator i(*this, 0);
2751                 i.toEnd();
2752                 return i;
2753         }
2754
2755
2756         //! Returns an iterator to the end of the string.
2757         //! \return An iterator to the end of the string.
2758         const_iterator end() const
2759         {
2760                 const_iterator i(*this, 0);
2761                 i.toEnd();
2762                 return i;
2763         }
2764
2765
2766         //! Returns an iterator to the end of the string.
2767         //! \return An iterator to the end of the string.
2768         const_iterator cend() const
2769         {
2770                 const_iterator i(*this, 0);
2771                 i.toEnd();
2772                 return i;
2773         }
2774
2775
2776         //! Converts the string to a UTF-8 encoded string.
2777         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2778         //! \return A string containing the UTF-8 encoded string.
2779         core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2780         {
2781                 core::string<uchar8_t> ret;
2782                 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2783                 const_iterator iter(*this, 0);
2784
2785                 // Add the byte order mark if the user wants it.
2786                 if (addBOM)
2787                 {
2788                         ret.append(unicode::BOM_ENCODE_UTF8[0]);
2789                         ret.append(unicode::BOM_ENCODE_UTF8[1]);
2790                         ret.append(unicode::BOM_ENCODE_UTF8[2]);
2791                 }
2792
2793                 while (!iter.atEnd())
2794                 {
2795                         uchar32_t c = *iter;
2796                         if (c > 0xFFFF)
2797                         {       // 4 bytes
2798                                 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2799                                 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2800                                 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2801                                 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2802                                 ret.append(b1);
2803                                 ret.append(b2);
2804                                 ret.append(b3);
2805                                 ret.append(b4);
2806                         }
2807                         else if (c > 0x7FF)
2808                         {       // 3 bytes
2809                                 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2810                                 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2811                                 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2812                                 ret.append(b1);
2813                                 ret.append(b2);
2814                                 ret.append(b3);
2815                         }
2816                         else if (c > 0x7F)
2817                         {       // 2 bytes
2818                                 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2819                                 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2820                                 ret.append(b1);
2821                                 ret.append(b2);
2822                         }
2823                         else
2824                         {       // 1 byte
2825                                 ret.append(static_cast<uchar8_t>(c));
2826                         }
2827                         ++iter;
2828                 }
2829                 return ret;
2830         }
2831
2832
2833         //! Converts the string to a UTF-8 encoded string array.
2834         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2835         //! \return An array containing the UTF-8 encoded string.
2836         core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2837         {
2838                 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2839                 const_iterator iter(*this, 0);
2840
2841                 // Add the byte order mark if the user wants it.
2842                 if (addBOM)
2843                 {
2844                         ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2845                         ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2846                         ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2847                 }
2848
2849                 while (!iter.atEnd())
2850                 {
2851                         uchar32_t c = *iter;
2852                         if (c > 0xFFFF)
2853                         {       // 4 bytes
2854                                 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2855                                 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2856                                 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2857                                 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2858                                 ret.push_back(b1);
2859                                 ret.push_back(b2);
2860                                 ret.push_back(b3);
2861                                 ret.push_back(b4);
2862                         }
2863                         else if (c > 0x7FF)
2864                         {       // 3 bytes
2865                                 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2866                                 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2867                                 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2868                                 ret.push_back(b1);
2869                                 ret.push_back(b2);
2870                                 ret.push_back(b3);
2871                         }
2872                         else if (c > 0x7F)
2873                         {       // 2 bytes
2874                                 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2875                                 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2876                                 ret.push_back(b1);
2877                                 ret.push_back(b2);
2878                         }
2879                         else
2880                         {       // 1 byte
2881                                 ret.push_back(static_cast<uchar8_t>(c));
2882                         }
2883                         ++iter;
2884                 }
2885                 ret.push_back(0);
2886                 return ret;
2887         }
2888
2889
2890 #ifdef USTRING_CPP0X_NEWLITERALS        // C++0x
2891         //! Converts the string to a UTF-16 encoded string.
2892         //! \param endian The desired endianness of the string.
2893         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2894         //! \return A string containing the UTF-16 encoded string.
2895         core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2896         {
2897                 core::string<char16_t> ret;
2898                 ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2899
2900                 // Add the BOM if specified.
2901                 if (addBOM)
2902                 {
2903                         if (endian == unicode::EUTFEE_NATIVE)
2904                                 ret[0] = unicode::BOM;
2905                         else if (endian == unicode::EUTFEE_LITTLE)
2906                         {
2907                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2908                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2909                                 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2910                         }
2911                         else
2912                         {
2913                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2914                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2915                                 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2916                         }
2917                 }
2918
2919                 ret.append(array);
2920                 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2921                 {
2922                         char16_t* ptr = ret.c_str();
2923                         for (u32 i = 0; i < ret.size(); ++i)
2924                                 *ptr++ = unicode::swapEndian16(*ptr);
2925                 }
2926                 return ret;
2927         }
2928 #endif
2929
2930
2931         //! Converts the string to a UTF-16 encoded string array.
2932         //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2933         //! \param endian The desired endianness of the string.
2934         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2935         //! \return An array containing the UTF-16 encoded string.
2936         core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2937         {
2938                 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2939                 uchar16_t* ptr = ret.pointer();
2940
2941                 // Add the BOM if specified.
2942                 if (addBOM)
2943                 {
2944                         if (endian == unicode::EUTFEE_NATIVE)
2945                                 *ptr = unicode::BOM;
2946                         else if (endian == unicode::EUTFEE_LITTLE)
2947                         {
2948                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2949                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2950                                 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2951                         }
2952                         else
2953                         {
2954                                 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2955                                 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2956                                 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2957                         }
2958                         ++ptr;
2959                 }
2960
2961                 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2962                 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2963                 {
2964                         for (u32 i = 0; i <= used; ++i)
2965                                 ptr[i] = unicode::swapEndian16(ptr[i]);
2966                 }
2967                 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2968                 ret.push_back(0);
2969                 return ret;
2970         }
2971
2972
2973 #ifdef USTRING_CPP0X_NEWLITERALS        // C++0x
2974         //! Converts the string to a UTF-32 encoded string.
2975         //! \param endian The desired endianness of the string.
2976         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2977         //! \return A string containing the UTF-32 encoded string.
2978         core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2979         {
2980                 core::string<char32_t> ret;
2981                 ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2982                 const_iterator iter(*this, 0);
2983
2984                 // Add the BOM if specified.
2985                 if (addBOM)
2986                 {
2987                         if (endian == unicode::EUTFEE_NATIVE)
2988                                 ret.append(unicode::BOM);
2989                         else
2990                         {
2991                                 union
2992                                 {
2993                                         uchar32_t full;
2994                                         u8 chunk[4];
2995                                 } t;
2996
2997                                 if (endian == unicode::EUTFEE_LITTLE)
2998                                 {
2999                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3000                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3001                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3002                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3003                                 }
3004                                 else
3005                                 {
3006                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3007                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3008                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3009                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3010                                 }
3011                                 ret.append(t.full);
3012                         }
3013                 }
3014
3015                 while (!iter.atEnd())
3016                 {
3017                         uchar32_t c = *iter;
3018                         if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3019                                 c = unicode::swapEndian32(c);
3020                         ret.append(c);
3021                         ++iter;
3022                 }
3023                 return ret;
3024         }
3025 #endif
3026
3027
3028         //! Converts the string to a UTF-32 encoded string array.
3029         //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
3030         //! \param endian The desired endianness of the string.
3031         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3032         //! \return An array containing the UTF-32 encoded string.
3033         core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3034         {
3035                 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3036                 const_iterator iter(*this, 0);
3037
3038                 // Add the BOM if specified.
3039                 if (addBOM)
3040                 {
3041                         if (endian == unicode::EUTFEE_NATIVE)
3042                                 ret.push_back(unicode::BOM);
3043                         else
3044                         {
3045                                 union
3046                                 {
3047                                         uchar32_t full;
3048                                         u8 chunk[4];
3049                                 } t;
3050
3051                                 if (endian == unicode::EUTFEE_LITTLE)
3052                                 {
3053                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3054                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3055                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3056                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3057                                 }
3058                                 else
3059                                 {
3060                                         t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3061                                         t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3062                                         t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3063                                         t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3064                                 }
3065                                 ret.push_back(t.full);
3066                         }
3067                 }
3068                 ret.push_back(0);
3069
3070                 while (!iter.atEnd())
3071                 {
3072                         uchar32_t c = *iter;
3073                         if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3074                                 c = unicode::swapEndian32(c);
3075                         ret.push_back(c);
3076                         ++iter;
3077                 }
3078                 return ret;
3079         }
3080
3081
3082         //! Converts the string to a wchar_t encoded string.
3083         /** The size of a wchar_t changes depending on the platform.  This function will store a
3084         correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3085         //! \param endian The desired endianness of the string.
3086         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3087         //! \return A string containing the wchar_t encoded string.
3088         core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3089         {
3090                 if (sizeof(wchar_t) == 4)
3091                 {
3092                         core::array<uchar32_t> a(toUTF32(endian, addBOM));
3093                         core::stringw ret(a.pointer());
3094                         return ret;
3095                 }
3096                 else if (sizeof(wchar_t) == 2)
3097                 {
3098                         if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3099                         {
3100                                 core::stringw ret(array);
3101                                 return ret;
3102                         }
3103                         else
3104                         {
3105                                 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3106                                 core::stringw ret(a.pointer());
3107                                 return ret;
3108                         }
3109                 }
3110                 else if (sizeof(wchar_t) == 1)
3111                 {
3112                         core::array<uchar8_t> a(toUTF8(addBOM));
3113                         core::stringw ret(a.pointer());
3114                         return ret;
3115                 }
3116
3117                 // Shouldn't happen.
3118                 return core::stringw();
3119         }
3120
3121
3122         //! Converts the string to a wchar_t encoded string array.
3123         /** The size of a wchar_t changes depending on the platform.  This function will store a
3124         correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3125         //! \param endian The desired endianness of the string.
3126         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3127         //! \return An array containing the wchar_t encoded string.
3128         core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3129         {
3130                 if (sizeof(wchar_t) == 4)
3131                 {
3132                         core::array<uchar32_t> a(toUTF32(endian, addBOM));
3133                         core::array<wchar_t> ret(a.size());
3134                         ret.set_used(a.size());
3135                         memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3136                         return ret;
3137                 }
3138                 if (sizeof(wchar_t) == 2)
3139                 {
3140                         if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3141                         {
3142                                 core::array<wchar_t> ret(used);
3143                                 ret.set_used(used);
3144                                 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3145                                 return ret;
3146                         }
3147                         else
3148                         {
3149                                 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3150                                 core::array<wchar_t> ret(a.size());
3151                                 ret.set_used(a.size());
3152                                 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3153                                 return ret;
3154                         }
3155                 }
3156                 if (sizeof(wchar_t) == 1)
3157                 {
3158                         core::array<uchar8_t> a(toUTF8(addBOM));
3159                         core::array<wchar_t> ret(a.size());
3160                         ret.set_used(a.size());
3161                         memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3162                         return ret;
3163                 }
3164
3165                 // Shouldn't happen.
3166                 return core::array<wchar_t>();
3167         }
3168
3169         //! Converts the string to a properly encoded io::path string.
3170         //! \param endian The desired endianness of the string.
3171         //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3172         //! \return An io::path string containing the properly encoded string.
3173         io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3174         {
3175 #if defined(_IRR_WCHAR_FILESYSTEM)
3176                 return toWCHAR_s(endian, addBOM);
3177 #else
3178                 return toUTF8_s(addBOM);
3179 #endif
3180         }
3181
3182         //! Loads an unknown stream of data.
3183         //! Will attempt to determine if the stream is unicode data.  Useful for loading from files.
3184         //! \param data The data stream to load from.
3185         //! \param data_size The length of the data string.
3186         //! \return A reference to our current string.
3187         ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3188         {
3189                 // Clear our string.
3190                 *this = "";
3191                 if (!data)
3192                         return *this;
3193
3194                 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3195                 switch (e)
3196                 {
3197                         default:
3198                         case unicode::EUTFE_UTF8:
3199                                 append((uchar8_t*)data, data_size);
3200                                 break;
3201
3202                         case unicode::EUTFE_UTF16:
3203                         case unicode::EUTFE_UTF16_BE:
3204                         case unicode::EUTFE_UTF16_LE:
3205                                 append((uchar16_t*)data, data_size / 2);
3206                                 break;
3207
3208                         case unicode::EUTFE_UTF32:
3209                         case unicode::EUTFE_UTF32_BE:
3210                         case unicode::EUTFE_UTF32_LE:
3211                                 append((uchar32_t*)data, data_size / 4);
3212                                 break;
3213                 }
3214
3215                 return *this;
3216         }
3217
3218         //! Gets the encoding of the Unicode string this class contains.
3219         //! \return An enum describing the current encoding of this string.
3220         const unicode::EUTF_ENCODE getEncoding() const
3221         {
3222                 return encoding;
3223         }
3224
3225         //! Gets the endianness of the Unicode string this class contains.
3226         //! \return An enum describing the endianness of this string.
3227         const unicode::EUTF_ENDIAN getEndianness() const
3228         {
3229                 if (encoding == unicode::EUTFE_UTF16_LE ||
3230                         encoding == unicode::EUTFE_UTF32_LE)
3231                         return unicode::EUTFEE_LITTLE;
3232                 else return unicode::EUTFEE_BIG;
3233         }
3234
3235 private:
3236
3237         //! Reallocate the string, making it bigger or smaller.
3238         //! \param new_size The new size of the string.
3239         void reallocate(u32 new_size)
3240         {
3241                 uchar16_t* old_array = array;
3242
3243                 array = allocator.allocate(new_size + 1); //new u16[new_size];
3244                 allocated = new_size + 1;
3245                 if (old_array == 0) return;
3246
3247                 u32 amount = used < new_size ? used : new_size;
3248                 for (u32 i=0; i<=amount; ++i)
3249                         array[i] = old_array[i];
3250
3251                 if (allocated <= used)
3252                         used = allocated - 1;
3253
3254                 array[used] = 0;
3255
3256                 allocator.deallocate(old_array); // delete [] old_array;
3257         }
3258
3259         //--- member variables
3260
3261         uchar16_t* array;
3262         unicode::EUTF_ENCODE encoding;
3263         u32 allocated;
3264         u32 used;
3265         TAlloc allocator;
3266         //irrAllocator<uchar16_t> allocator;
3267 };
3268
3269 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3270
3271
3272 //! Appends two ustring16s.
3273 template <typename TAlloc>
3274 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3275 {
3276         ustring16<TAlloc> ret(left);
3277         ret += right;
3278         return ret;
3279 }
3280
3281
3282 //! Appends a ustring16 and a null-terminated unicode string.
3283 template <typename TAlloc, class B>
3284 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3285 {
3286         ustring16<TAlloc> ret(left);
3287         ret += right;
3288         return ret;
3289 }
3290
3291
3292 //! Appends a ustring16 and a null-terminated unicode string.
3293 template <class B, typename TAlloc>
3294 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3295 {
3296         ustring16<TAlloc> ret(left);
3297         ret += right;
3298         return ret;
3299 }
3300
3301
3302 //! Appends a ustring16 and an Irrlicht string.
3303 template <typename TAlloc, typename B, typename BAlloc>
3304 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3305 {
3306         ustring16<TAlloc> ret(left);
3307         ret += right;
3308         return ret;
3309 }
3310
3311
3312 //! Appends a ustring16 and an Irrlicht string.
3313 template <typename TAlloc, typename B, typename BAlloc>
3314 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3315 {
3316         ustring16<TAlloc> ret(left);
3317         ret += right;
3318         return ret;
3319 }
3320
3321
3322 //! Appends a ustring16 and a std::basic_string.
3323 template <typename TAlloc, typename B, typename A, typename BAlloc>
3324 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3325 {
3326         ustring16<TAlloc> ret(left);
3327         ret += right;
3328         return ret;
3329 }
3330
3331
3332 //! Appends a ustring16 and a std::basic_string.
3333 template <typename TAlloc, typename B, typename A, typename BAlloc>
3334 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3335 {
3336         ustring16<TAlloc> ret(left);
3337         ret += right;
3338         return ret;
3339 }
3340
3341
3342 //! Appends a ustring16 and a char.
3343 template <typename TAlloc>
3344 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3345 {
3346         ustring16<TAlloc> ret(left);
3347         ret += right;
3348         return ret;
3349 }
3350
3351
3352 //! Appends a ustring16 and a char.
3353 template <typename TAlloc>
3354 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3355 {
3356         ustring16<TAlloc> ret(left);
3357         ret += right;
3358         return ret;
3359 }
3360
3361
3362 #ifdef USTRING_CPP0X_NEWLITERALS
3363 //! Appends a ustring16 and a uchar32_t.
3364 template <typename TAlloc>
3365 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3366 {
3367         ustring16<TAlloc> ret(left);
3368         ret += right;
3369         return ret;
3370 }
3371
3372
3373 //! Appends a ustring16 and a uchar32_t.
3374 template <typename TAlloc>
3375 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3376 {
3377         ustring16<TAlloc> ret(left);
3378         ret += right;
3379         return ret;
3380 }
3381 #endif
3382
3383
3384 //! Appends a ustring16 and a short.
3385 template <typename TAlloc>
3386 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3387 {
3388         ustring16<TAlloc> ret(left);
3389         ret += core::stringc(right);
3390         return ret;
3391 }
3392
3393
3394 //! Appends a ustring16 and a short.
3395 template <typename TAlloc>
3396 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3397 {
3398         ustring16<TAlloc> ret(core::stringc(left));
3399         ret += right;
3400         return ret;
3401 }
3402
3403
3404 //! Appends a ustring16 and an unsigned short.
3405 template <typename TAlloc>
3406 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3407 {
3408         ustring16<TAlloc> ret(left);
3409         ret += core::stringc(right);
3410         return ret;
3411 }
3412
3413
3414 //! Appends a ustring16 and an unsigned short.
3415 template <typename TAlloc>
3416 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3417 {
3418         ustring16<TAlloc> ret(core::stringc(left));
3419         ret += right;
3420         return ret;
3421 }
3422
3423
3424 //! Appends a ustring16 and an int.
3425 template <typename TAlloc>
3426 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3427 {
3428         ustring16<TAlloc> ret(left);
3429         ret += core::stringc(right);
3430         return ret;
3431 }
3432
3433
3434 //! Appends a ustring16 and an int.
3435 template <typename TAlloc>
3436 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3437 {
3438         ustring16<TAlloc> ret(core::stringc(left));
3439         ret += right;
3440         return ret;
3441 }
3442
3443
3444 //! Appends a ustring16 and an unsigned int.
3445 template <typename TAlloc>
3446 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3447 {
3448         ustring16<TAlloc> ret(left);
3449         ret += core::stringc(right);
3450         return ret;
3451 }
3452
3453
3454 //! Appends a ustring16 and an unsigned int.
3455 template <typename TAlloc>
3456 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3457 {
3458         ustring16<TAlloc> ret(core::stringc(left));
3459         ret += right;
3460         return ret;
3461 }
3462
3463
3464 //! Appends a ustring16 and a long.
3465 template <typename TAlloc>
3466 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3467 {
3468         ustring16<TAlloc> ret(left);
3469         ret += core::stringc(right);
3470         return ret;
3471 }
3472
3473
3474 //! Appends a ustring16 and a long.
3475 template <typename TAlloc>
3476 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3477 {
3478         ustring16<TAlloc> ret(core::stringc(left));
3479         ret += right;
3480         return ret;
3481 }
3482
3483
3484 //! Appends a ustring16 and an unsigned long.
3485 template <typename TAlloc>
3486 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3487 {
3488         ustring16<TAlloc> ret(left);
3489         ret += core::stringc(right);
3490         return ret;
3491 }
3492
3493
3494 //! Appends a ustring16 and an unsigned long.
3495 template <typename TAlloc>
3496 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3497 {
3498         ustring16<TAlloc> ret(core::stringc(left));
3499         ret += right;
3500         return ret;
3501 }
3502
3503
3504 //! Appends a ustring16 and a float.
3505 template <typename TAlloc>
3506 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3507 {
3508         ustring16<TAlloc> ret(left);
3509         ret += core::stringc(right);
3510         return ret;
3511 }
3512
3513
3514 //! Appends a ustring16 and a float.
3515 template <typename TAlloc>
3516 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3517 {
3518         ustring16<TAlloc> ret(core::stringc(left));
3519         ret += right;
3520         return ret;
3521 }
3522
3523
3524 //! Appends a ustring16 and a double.
3525 template <typename TAlloc>
3526 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3527 {
3528         ustring16<TAlloc> ret(left);
3529         ret += core::stringc(right);
3530         return ret;
3531 }
3532
3533
3534 //! Appends a ustring16 and a double.
3535 template <typename TAlloc>
3536 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3537 {
3538         ustring16<TAlloc> ret(core::stringc(left));
3539         ret += right;
3540         return ret;
3541 }
3542
3543
3544 #ifdef USTRING_CPP0X
3545 //! Appends two ustring16s.
3546 template <typename TAlloc>
3547 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3548 {
3549         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3550         right.insert(left, 0);
3551         return std::move(right);
3552 }
3553
3554
3555 //! Appends two ustring16s.
3556 template <typename TAlloc>
3557 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3558 {
3559         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3560         left.append(right);
3561         return std::move(left);
3562 }
3563
3564
3565 //! Appends two ustring16s.
3566 template <typename TAlloc>
3567 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3568 {
3569         //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3570         if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3571                 (right.capacity() - right.size_raw() < left.size_raw()))
3572         {
3573                 left.append(right);
3574                 return std::move(left);
3575         }
3576         else
3577         {
3578                 right.insert(left, 0);
3579                 return std::move(right);
3580         }
3581 }
3582
3583
3584 //! Appends a ustring16 and a null-terminated unicode string.
3585 template <typename TAlloc, class B>
3586 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3587 {
3588         //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3589         left.append(right);
3590         return std::move(left);
3591 }
3592
3593
3594 //! Appends a ustring16 and a null-terminated unicode string.
3595 template <class B, typename TAlloc>
3596 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3597 {
3598         //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3599         right.insert(left, 0);
3600         return std::move(right);
3601 }
3602
3603
3604 //! Appends a ustring16 and an Irrlicht string.
3605 template <typename TAlloc, typename B, typename BAlloc>
3606 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3607 {
3608         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3609         right.insert(left, 0);
3610         return std::move(right);
3611 }
3612
3613
3614 //! Appends a ustring16 and an Irrlicht string.
3615 template <typename TAlloc, typename B, typename BAlloc>
3616 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3617 {
3618         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3619         left.append(right);
3620         return std::move(left);
3621 }
3622
3623
3624 //! Appends a ustring16 and a std::basic_string.
3625 template <typename TAlloc, typename B, typename A, typename BAlloc>
3626 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3627 {
3628         //std::cout << "MOVE operator+(&, &&)" << std::endl;
3629         right.insert(core::ustring16<TAlloc>(left), 0);
3630         return std::move(right);
3631 }
3632
3633
3634 //! Appends a ustring16 and a std::basic_string.
3635 template <typename TAlloc, typename B, typename A, typename BAlloc>
3636 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3637 {
3638         //std::cout << "MOVE operator+(&&, &)" << std::endl;
3639         left.append(right);
3640         return std::move(left);
3641 }
3642
3643
3644 //! Appends a ustring16 and a char.
3645 template <typename TAlloc>
3646 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3647 {
3648         left.append((uchar32_t)right);
3649         return std::move(left);
3650 }
3651
3652
3653 //! Appends a ustring16 and a char.
3654 template <typename TAlloc>
3655 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3656 {
3657         right.insert((uchar32_t)left, 0);
3658         return std::move(right);
3659 }
3660
3661
3662 #ifdef USTRING_CPP0X_NEWLITERALS
3663 //! Appends a ustring16 and a uchar32_t.
3664 template <typename TAlloc>
3665 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3666 {
3667         left.append(right);
3668         return std::move(left);
3669 }
3670
3671
3672 //! Appends a ustring16 and a uchar32_t.
3673 template <typename TAlloc>
3674 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3675 {
3676         right.insert(left, 0);
3677         return std::move(right);
3678 }
3679 #endif
3680
3681
3682 //! Appends a ustring16 and a short.
3683 template <typename TAlloc>
3684 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3685 {
3686         left.append(core::stringc(right));
3687         return std::move(left);
3688 }
3689
3690
3691 //! Appends a ustring16 and a short.
3692 template <typename TAlloc>
3693 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3694 {
3695         right.insert(core::stringc(left), 0);
3696         return std::move(right);
3697 }
3698
3699
3700 //! Appends a ustring16 and an unsigned short.
3701 template <typename TAlloc>
3702 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3703 {
3704         left.append(core::stringc(right));
3705         return std::move(left);
3706 }
3707
3708
3709 //! Appends a ustring16 and an unsigned short.
3710 template <typename TAlloc>
3711 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3712 {
3713         right.insert(core::stringc(left), 0);
3714         return std::move(right);
3715 }
3716
3717
3718 //! Appends a ustring16 and an int.
3719 template <typename TAlloc>
3720 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3721 {
3722         left.append(core::stringc(right));
3723         return std::move(left);
3724 }
3725
3726
3727 //! Appends a ustring16 and an int.
3728 template <typename TAlloc>
3729 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3730 {
3731         right.insert(core::stringc(left), 0);
3732         return std::move(right);
3733 }
3734
3735
3736 //! Appends a ustring16 and an unsigned int.
3737 template <typename TAlloc>
3738 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3739 {
3740         left.append(core::stringc(right));
3741         return std::move(left);
3742 }
3743
3744
3745 //! Appends a ustring16 and an unsigned int.
3746 template <typename TAlloc>
3747 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3748 {
3749         right.insert(core::stringc(left), 0);
3750         return std::move(right);
3751 }
3752
3753
3754 //! Appends a ustring16 and a long.
3755 template <typename TAlloc>
3756 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3757 {
3758         left.append(core::stringc(right));
3759         return std::move(left);
3760 }
3761
3762
3763 //! Appends a ustring16 and a long.
3764 template <typename TAlloc>
3765 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3766 {
3767         right.insert(core::stringc(left), 0);
3768         return std::move(right);
3769 }
3770
3771
3772 //! Appends a ustring16 and an unsigned long.
3773 template <typename TAlloc>
3774 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3775 {
3776         left.append(core::stringc(right));
3777         return std::move(left);
3778 }
3779
3780
3781 //! Appends a ustring16 and an unsigned long.
3782 template <typename TAlloc>
3783 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3784 {
3785         right.insert(core::stringc(left), 0);
3786         return std::move(right);
3787 }
3788
3789
3790 //! Appends a ustring16 and a float.
3791 template <typename TAlloc>
3792 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3793 {
3794         left.append(core::stringc(right));
3795         return std::move(left);
3796 }
3797
3798
3799 //! Appends a ustring16 and a float.
3800 template <typename TAlloc>
3801 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3802 {
3803         right.insert(core::stringc(left), 0);
3804         return std::move(right);
3805 }
3806
3807
3808 //! Appends a ustring16 and a double.
3809 template <typename TAlloc>
3810 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3811 {
3812         left.append(core::stringc(right));
3813         return std::move(left);
3814 }
3815
3816
3817 //! Appends a ustring16 and a double.
3818 template <typename TAlloc>
3819 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3820 {
3821         right.insert(core::stringc(left), 0);
3822         return std::move(right);
3823 }
3824 #endif
3825
3826
3827 #ifndef USTRING_NO_STL
3828 //! Writes a ustring16 to an ostream.
3829 template <typename TAlloc>
3830 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3831 {
3832         out << in.toUTF8_s().c_str();
3833         return out;
3834 }
3835
3836 //! Writes a ustring16 to a wostream.
3837 template <typename TAlloc>
3838 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3839 {
3840         out << in.toWCHAR_s().c_str();
3841         return out;
3842 }
3843 #endif
3844
3845
3846 #ifndef USTRING_NO_STL
3847
3848 namespace unicode
3849 {
3850
3851 //! Hashing algorithm for hashing a ustring.  Used for things like unordered_maps.
3852 //! Algorithm taken from std::hash<std::string>.
3853 class hash : public std::unary_function<core::ustring, size_t>
3854 {
3855         public:
3856                 size_t operator()(const core::ustring& s) const
3857                 {
3858                         size_t ret = 2166136261U;
3859                         size_t index = 0;
3860                         size_t stride = 1 + s.size_raw() / 10;
3861
3862                         core::ustring::const_iterator i = s.begin();
3863                         while (i != s.end())
3864                         {
3865                                 // TODO: Don't force u32 on an x64 OS.  Make it agnostic.
3866                                 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3867                                 index += stride;
3868                                 i += stride;
3869                         }
3870                         return (ret);
3871                 }
3872 };
3873
3874 } // end namespace unicode
3875
3876 #endif
3877
3878 } // end namespace core
3879 } // end namespace irr
3880
3881 #endif