2 Basic Unicode string class for Irrlicht.
3 Copyright (c) 2009-2011 John Norman
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you
14 must not claim that you wrote the original software. If you use
15 this software in a product, an acknowledgment in the product
16 documentation would be appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
21 3. This notice may not be removed or altered from any source
24 The original version of this class can be located at:
25 http://irrlicht.suckerfreegames.com/
28 john@suckerfreegames.com
31 #ifndef __IRR_USTRING_H_INCLUDED__
32 #define __IRR_USTRING_H_INCLUDED__
34 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
35 # define USTRING_CPP0X
36 # if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
37 # define USTRING_CPP0X_NEWLITERALS
47 #define __BYTE_ORDER 0
48 #define __LITTLE_ENDIAN 0
49 #define __BIG_ENDIAN 1
50 #elif defined(__MACH__) && defined(__APPLE__)
51 #include <machine/endian.h>
52 #elif defined(__FreeBSD__)
53 #include <sys/endian.h>
62 #ifndef USTRING_NO_STL
69 #include "irrAllocator.h"
72 #include "irrString.h"
75 //! UTF-16 surrogate start values.
76 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
77 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
79 //! Is a UTF-16 code point a surrogate?
80 #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
81 #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
82 #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
88 // Define our character types.
89 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
90 typedef char32_t uchar32_t;
91 typedef char16_t uchar16_t;
92 typedef char uchar8_t;
94 typedef u32 uchar32_t;
95 typedef u16 uchar16_t;
105 //! The unicode replacement character. Used to replace invalid characters.
106 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
108 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
109 //! \param high The high value of the pair.
110 //! \param low The low value of the pair.
111 //! \return The UTF-32 character expressed by the surrogate pair.
112 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
114 // Convert the surrogate pair into a single UTF-32 character.
115 uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
116 uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
117 return (wu << 16) | x;
120 //! Swaps the endianness of a 16-bit value.
121 //! \return The new value.
122 inline uchar16_t swapEndian16(const uchar16_t& c)
124 return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
127 //! Swaps the endianness of a 32-bit value.
128 //! \return The new value.
129 inline uchar32_t swapEndian32(const uchar32_t& c)
131 return ((c >> 24) & 0x000000FF) |
132 ((c >> 8) & 0x0000FF00) |
133 ((c << 8) & 0x00FF0000) |
134 ((c << 24) & 0xFF000000);
137 //! The Unicode byte order mark.
138 const u16 BOM = 0xFEFF;
140 //! The size of the Unicode byte order mark in terms of the Unicode character size.
141 const u8 BOM_UTF8_LEN = 3;
142 const u8 BOM_UTF16_LEN = 1;
143 const u8 BOM_UTF32_LEN = 1;
145 //! Unicode byte order marks for file operations.
146 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
147 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
148 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
149 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
150 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
152 //! The size in bytes of the Unicode byte marks for file operations.
153 const u8 BOM_ENCODE_UTF8_LEN = 3;
154 const u8 BOM_ENCODE_UTF16_LEN = 2;
155 const u8 BOM_ENCODE_UTF32_LEN = 4;
157 //! Unicode encoding type.
170 //! Unicode endianness.
178 //! Returns the specified unicode byte order mark in a byte array.
179 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
180 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
181 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
182 //! \return An array that contains a byte order mark.
183 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
185 #define COPY_ARRAY(source, size) \
186 memcpy(ret.pointer(), source, size); \
189 core::array<u8> ret(4);
193 COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
196 #ifdef __BIG_ENDIAN__
197 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
199 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
203 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
206 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
209 #ifdef __BIG_ENDIAN__
210 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
212 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
216 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
219 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
222 // TODO sapier: fixed warning only,
223 // don't know if something needs to be done here
231 //! Detects if the given data stream starts with a unicode BOM.
232 //! \param data The data stream to check.
233 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
234 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
236 if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
237 if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
238 if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
239 if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
240 if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
244 } // end namespace unicode
247 //! UTF-16 string class.
248 template <typename TAlloc = irrAllocator<uchar16_t> >
253 ///------------------///
254 /// iterator classes ///
255 ///------------------///
257 //! Access an element in a unicode string, allowing one to change it.
258 class _ustring16_iterator_access
261 _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
263 //! Allow the class to be interpreted as a single UTF-32 character.
264 operator uchar32_t() const
269 //! Allow one to change the character in the unicode string.
270 //! \param c The new character to use.
272 _ustring16_iterator_access& operator=(const uchar32_t c)
278 //! Increments the value by 1.
280 _ustring16_iterator_access& operator++()
286 //! Increments the value by 1, returning the old value.
287 //! \return A unicode character.
288 uchar32_t operator++(int)
290 uchar32_t old = _get();
295 //! Decrements the value by 1.
297 _ustring16_iterator_access& operator--()
303 //! Decrements the value by 1, returning the old value.
304 //! \return A unicode character.
305 uchar32_t operator--(int)
307 uchar32_t old = _get();
312 //! Adds to the value by a specified amount.
313 //! \param val The amount to add to this character.
315 _ustring16_iterator_access& operator+=(int val)
321 //! Subtracts from the value by a specified amount.
322 //! \param val The amount to subtract from this character.
324 _ustring16_iterator_access& operator-=(int val)
330 //! Multiples the value by a specified amount.
331 //! \param val The amount to multiply this character by.
333 _ustring16_iterator_access& operator*=(int val)
339 //! Divides the value by a specified amount.
340 //! \param val The amount to divide this character by.
342 _ustring16_iterator_access& operator/=(int val)
348 //! Modulos the value by a specified amount.
349 //! \param val The amount to modulo this character by.
351 _ustring16_iterator_access& operator%=(int val)
357 //! Adds to the value by a specified amount.
358 //! \param val The amount to add to this character.
359 //! \return A unicode character.
360 uchar32_t operator+(int val) const
365 //! Subtracts from the value by a specified amount.
366 //! \param val The amount to subtract from this character.
367 //! \return A unicode character.
368 uchar32_t operator-(int val) const
373 //! Multiplies the value by a specified amount.
374 //! \param val The amount to multiply this character by.
375 //! \return A unicode character.
376 uchar32_t operator*(int val) const
381 //! Divides the value by a specified amount.
382 //! \param val The amount to divide this character by.
383 //! \return A unicode character.
384 uchar32_t operator/(int val) const
389 //! Modulos the value by a specified amount.
390 //! \param val The amount to modulo this character by.
391 //! \return A unicode character.
392 uchar32_t operator%(int val) const
398 //! Gets a uchar32_t from our current position.
399 uchar32_t _get() const
401 const uchar16_t* a = ref->c_str();
402 if (!UTF16_IS_SURROGATE(a[pos]))
403 return static_cast<uchar32_t>(a[pos]);
406 if (pos + 1 >= ref->size_raw())
409 return unicode::toUTF32(a[pos], a[pos + 1]);
413 //! Sets a uchar32_t at our current position.
414 void _set(uchar32_t c)
416 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
417 const uchar16_t* a = ref2->c_str();
420 // c will be multibyte, so split it up into the high and low surrogate pairs.
421 uchar16_t x = static_cast<uchar16_t>(c);
422 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
423 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
425 // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
426 if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
427 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
428 else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
430 ref2->replace_raw(vh, static_cast<u32>(pos));
434 // c will be a single byte.
435 uchar16_t vh = static_cast<uchar16_t>(c);
437 // If the previous position was a surrogate pair, remove the extra byte.
438 if (UTF16_IS_SURROGATE_HI(a[pos]))
439 ref2->erase_raw(static_cast<u32>(pos) + 1);
441 ref2->replace_raw(vh, static_cast<u32>(pos));
445 const ustring16<TAlloc>* ref;
448 typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
451 //! Iterator to iterate through a UTF-16 string.
452 #ifndef USTRING_NO_STL
453 class _ustring16_const_iterator : public std::iterator<
454 std::bidirectional_iterator_tag, // iterator_category
455 access, // value_type
456 ptrdiff_t, // difference_type
457 const access, // pointer
458 const access // reference
461 class _ustring16_const_iterator
465 typedef _ustring16_const_iterator _Iter;
466 typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
467 typedef const access const_pointer;
468 typedef const access const_reference;
470 #ifndef USTRING_NO_STL
471 typedef typename _Base::value_type value_type;
472 typedef typename _Base::difference_type difference_type;
473 typedef typename _Base::difference_type distance_type;
474 typedef typename _Base::pointer pointer;
475 typedef const_reference reference;
477 typedef access value_type;
478 typedef u32 difference_type;
479 typedef u32 distance_type;
480 typedef const_pointer pointer;
481 typedef const_reference reference;
485 _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
486 _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
487 _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
489 if (ref->size_raw() == 0 || p == 0)
492 // Go to the appropriate position.
494 u32 sr = ref->size_raw();
495 const uchar16_t* a = ref->c_str();
496 while (i != 0 && pos < sr)
498 if (UTF16_IS_SURROGATE_HI(a[pos]))
505 //! Test for equalness.
506 bool operator==(const _Iter& iter) const
508 if (ref == iter.ref && pos == iter.pos)
513 //! Test for unequalness.
514 bool operator!=(const _Iter& iter) const
516 if (ref != iter.ref || pos != iter.pos)
521 //! Switch to the next full character in the string.
524 if (pos == ref->size_raw()) return *this;
525 const uchar16_t* a = ref->c_str();
526 if (UTF16_IS_SURROGATE_HI(a[pos]))
527 pos += 2; // TODO: check for valid low surrogate?
529 if (pos > ref->size_raw()) pos = ref->size_raw();
533 //! Switch to the next full character in the string, returning the previous position.
534 _Iter operator++(int)
541 //! Switch to the previous full character in the string.
544 if (pos == 0) return *this;
545 const uchar16_t* a = ref->c_str();
547 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
552 //! Switch to the previous full character in the string, returning the previous position.
553 _Iter operator--(int)
560 //! Advance a specified number of full characters in the string.
562 _Iter& operator+=(const difference_type v)
564 if (v == 0) return *this;
565 if (v < 0) return operator-=(v * -1);
567 if (pos >= ref->size_raw())
570 // Go to the appropriate position.
571 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
573 u32 sr = ref->size_raw();
574 const uchar16_t* a = ref->c_str();
575 while (i != 0 && pos < sr)
577 if (UTF16_IS_SURROGATE_HI(a[pos]))
588 //! Go back a specified number of full characters in the string.
590 _Iter& operator-=(const difference_type v)
592 if (v == 0) return *this;
593 if (v > 0) return operator+=(v * -1);
598 // Go to the appropriate position.
599 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
601 const uchar16_t* a = ref->c_str();
602 while (i != 0 && pos != 0)
605 if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
613 //! Return a new iterator that is a variable number of full characters forward from the current position.
614 _Iter operator+(const difference_type v) const
621 //! Return a new iterator that is a variable number of full characters backward from the current position.
622 _Iter operator-(const difference_type v) const
629 //! Returns the distance between two iterators.
630 difference_type operator-(const _Iter& iter) const
632 // Make sure we reference the same object!
634 return difference_type();
659 //! Accesses the full character at the iterator's position.
660 const_reference operator*() const
662 if (pos >= ref->size_raw())
664 const uchar16_t* a = ref->c_str();
665 u32 p = ref->size_raw();
666 if (UTF16_IS_SURROGATE_LO(a[p]))
668 reference ret(ref, p);
671 const_reference ret(ref, pos);
675 //! Accesses the full character at the iterator's position.
676 reference operator*()
678 if (pos >= ref->size_raw())
680 const uchar16_t* a = ref->c_str();
681 u32 p = ref->size_raw();
682 if (UTF16_IS_SURROGATE_LO(a[p]))
684 reference ret(ref, p);
687 reference ret(ref, pos);
691 //! Accesses the full character at the iterator's position.
692 const_pointer operator->() const
697 //! Accesses the full character at the iterator's position.
703 //! Is the iterator at the start of the string?
709 //! Is the iterator at the end of the string?
712 const uchar16_t* a = ref->c_str();
713 if (UTF16_IS_SURROGATE(a[pos]))
714 return (pos + 1) >= ref->size_raw();
715 else return pos >= ref->size_raw();
718 //! Moves the iterator to the start of the string.
724 //! Moves the iterator to the end of the string.
727 pos = ref->size_raw();
730 //! Returns the iterator's position.
731 //! \return The iterator's position.
738 const ustring16<TAlloc>* ref;
742 //! Iterator to iterate through a UTF-16 string.
743 class _ustring16_iterator : public _ustring16_const_iterator
746 typedef _ustring16_iterator _Iter;
747 typedef _ustring16_const_iterator _Base;
748 typedef typename _Base::const_pointer const_pointer;
749 typedef typename _Base::const_reference const_reference;
752 typedef typename _Base::value_type value_type;
753 typedef typename _Base::difference_type difference_type;
754 typedef typename _Base::distance_type distance_type;
755 typedef access pointer;
756 typedef access reference;
762 _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
763 _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
764 _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
766 //! Accesses the full character at the iterator's position.
767 reference operator*() const
769 if (pos >= ref->size_raw())
771 const uchar16_t* a = ref->c_str();
772 u32 p = ref->size_raw();
773 if (UTF16_IS_SURROGATE_LO(a[p]))
775 reference ret(ref, p);
778 reference ret(ref, pos);
782 //! Accesses the full character at the iterator's position.
783 reference operator*()
785 if (pos >= ref->size_raw())
787 const uchar16_t* a = ref->c_str();
788 u32 p = ref->size_raw();
789 if (UTF16_IS_SURROGATE_LO(a[p]))
791 reference ret(ref, p);
794 reference ret(ref, pos);
798 //! Accesses the full character at the iterator's position.
799 pointer operator->() const
804 //! Accesses the full character at the iterator's position.
811 typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
812 typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
814 ///----------------------///
815 /// end iterator classes ///
816 ///----------------------///
818 //! Default constructor
820 : array(0), allocated(1), used(0)
822 #if __BYTE_ORDER == __BIG_ENDIAN
823 encoding = unicode::EUTFE_UTF16_BE;
825 encoding = unicode::EUTFE_UTF16_LE;
827 array = allocator.allocate(1); // new u16[1];
833 ustring16(const ustring16<TAlloc>& other)
834 : array(0), allocated(0), used(0)
836 #if __BYTE_ORDER == __BIG_ENDIAN
837 encoding = unicode::EUTFE_UTF16_BE;
839 encoding = unicode::EUTFE_UTF16_LE;
845 //! Constructor from other string types
846 template <class B, class A>
847 ustring16(const string<B, A>& other)
848 : array(0), allocated(0), used(0)
850 #if __BYTE_ORDER == __BIG_ENDIAN
851 encoding = unicode::EUTFE_UTF16_BE;
853 encoding = unicode::EUTFE_UTF16_LE;
859 #ifndef USTRING_NO_STL
860 //! Constructor from std::string
861 template <class B, class A, typename Alloc>
862 ustring16(const std::basic_string<B, A, Alloc>& other)
863 : array(0), allocated(0), used(0)
865 #if __BYTE_ORDER == __BIG_ENDIAN
866 encoding = unicode::EUTFE_UTF16_BE;
868 encoding = unicode::EUTFE_UTF16_LE;
870 *this = other.c_str();
874 //! Constructor from iterator.
875 template <typename Itr>
876 ustring16(Itr first, Itr last)
877 : array(0), allocated(0), used(0)
879 #if __BYTE_ORDER == __BIG_ENDIAN
880 encoding = unicode::EUTFE_UTF16_BE;
882 encoding = unicode::EUTFE_UTF16_LE;
884 reserve(std::distance(first, last));
887 for (; first != last; ++first)
888 append((uchar32_t)*first);
893 #ifndef USTRING_CPP0X_NEWLITERALS
894 //! Constructor for copying a character string from a pointer.
895 ustring16(const char* const c)
896 : array(0), allocated(0), used(0)
898 #if __BYTE_ORDER == __BIG_ENDIAN
899 encoding = unicode::EUTFE_UTF16_BE;
901 encoding = unicode::EUTFE_UTF16_LE;
904 loadDataStream(c, strlen(c));
905 //append((uchar8_t*)c);
909 //! Constructor for copying a character string from a pointer with a given length.
910 ustring16(const char* const c, u32 length)
911 : array(0), allocated(0), used(0)
913 #if __BYTE_ORDER == __BIG_ENDIAN
914 encoding = unicode::EUTFE_UTF16_BE;
916 encoding = unicode::EUTFE_UTF16_LE;
919 loadDataStream(c, length);
924 //! Constructor for copying a UTF-8 string from a pointer.
925 ustring16(const uchar8_t* const c)
926 : array(0), allocated(0), used(0)
928 #if __BYTE_ORDER == __BIG_ENDIAN
929 encoding = unicode::EUTFE_UTF16_BE;
931 encoding = unicode::EUTFE_UTF16_LE;
938 //! Constructor for copying a UTF-8 string from a single char.
939 ustring16(const char c)
940 : array(0), allocated(0), used(0)
942 #if __BYTE_ORDER == __BIG_ENDIAN
943 encoding = unicode::EUTFE_UTF16_BE;
945 encoding = unicode::EUTFE_UTF16_LE;
948 append((uchar32_t)c);
952 //! Constructor for copying a UTF-8 string from a pointer with a given length.
953 ustring16(const uchar8_t* const c, u32 length)
954 : array(0), allocated(0), used(0)
956 #if __BYTE_ORDER == __BIG_ENDIAN
957 encoding = unicode::EUTFE_UTF16_BE;
959 encoding = unicode::EUTFE_UTF16_LE;
966 //! Constructor for copying a UTF-16 string from a pointer.
967 ustring16(const uchar16_t* const c)
968 : array(0), allocated(0), used(0)
970 #if __BYTE_ORDER == __BIG_ENDIAN
971 encoding = unicode::EUTFE_UTF16_BE;
973 encoding = unicode::EUTFE_UTF16_LE;
980 //! Constructor for copying a UTF-16 string from a pointer with a given length
981 ustring16(const uchar16_t* const c, u32 length)
982 : array(0), allocated(0), used(0)
984 #if __BYTE_ORDER == __BIG_ENDIAN
985 encoding = unicode::EUTFE_UTF16_BE;
987 encoding = unicode::EUTFE_UTF16_LE;
994 //! Constructor for copying a UTF-32 string from a pointer.
995 ustring16(const uchar32_t* const c)
996 : array(0), allocated(0), used(0)
998 #if __BYTE_ORDER == __BIG_ENDIAN
999 encoding = unicode::EUTFE_UTF16_BE;
1001 encoding = unicode::EUTFE_UTF16_LE;
1008 //! Constructor for copying a UTF-32 from a pointer with a given length.
1009 ustring16(const uchar32_t* const c, u32 length)
1010 : array(0), allocated(0), used(0)
1012 #if __BYTE_ORDER == __BIG_ENDIAN
1013 encoding = unicode::EUTFE_UTF16_BE;
1015 encoding = unicode::EUTFE_UTF16_LE;
1022 //! Constructor for copying a wchar_t string from a pointer.
1023 ustring16(const wchar_t* const c)
1024 : array(0), allocated(0), used(0)
1026 #if __BYTE_ORDER == __BIG_ENDIAN
1027 encoding = unicode::EUTFE_UTF16_BE;
1029 encoding = unicode::EUTFE_UTF16_LE;
1032 if (sizeof(wchar_t) == 4)
1033 append(reinterpret_cast<const uchar32_t* const>(c));
1034 else if (sizeof(wchar_t) == 2)
1035 append(reinterpret_cast<const uchar16_t* const>(c));
1036 else if (sizeof(wchar_t) == 1)
1037 append(reinterpret_cast<const uchar8_t* const>(c));
1041 //! Constructor for copying a wchar_t string from a pointer with a given length.
1042 ustring16(const wchar_t* const c, u32 length)
1043 : array(0), allocated(0), used(0)
1045 #if __BYTE_ORDER == __BIG_ENDIAN
1046 encoding = unicode::EUTFE_UTF16_BE;
1048 encoding = unicode::EUTFE_UTF16_LE;
1051 if (sizeof(wchar_t) == 4)
1052 append(reinterpret_cast<const uchar32_t* const>(c), length);
1053 else if (sizeof(wchar_t) == 2)
1054 append(reinterpret_cast<const uchar16_t* const>(c), length);
1055 else if (sizeof(wchar_t) == 1)
1056 append(reinterpret_cast<const uchar8_t* const>(c), length);
1060 #ifdef USTRING_CPP0X
1061 //! Constructor for moving a ustring16
1062 ustring16(ustring16<TAlloc>&& other)
1063 : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1065 //std::cout << "MOVE constructor" << std::endl;
1067 other.allocated = 0;
1076 allocator.deallocate(array); // delete [] array;
1080 //! Assignment operator
1081 ustring16& operator=(const ustring16<TAlloc>& other)
1086 used = other.size_raw();
1087 if (used >= allocated)
1089 allocator.deallocate(array); // delete [] array;
1090 allocated = used + 1;
1091 array = allocator.allocate(used + 1); //new u16[used];
1094 const uchar16_t* p = other.c_str();
1095 for (u32 i=0; i<=used; ++i, ++p)
1100 // Validate our new UTF-16 string.
1107 #ifdef USTRING_CPP0X
1108 //! Move assignment operator
1109 ustring16& operator=(ustring16<TAlloc>&& other)
1113 //std::cout << "MOVE operator=" << std::endl;
1114 allocator.deallocate(array);
1116 array = other.array;
1117 allocated = other.allocated;
1118 encoding = other.encoding;
1128 //! Assignment operator for other string types
1129 template <class B, class A>
1130 ustring16<TAlloc>& operator=(const string<B, A>& other)
1132 *this = other.c_str();
1137 //! Assignment operator for UTF-8 strings
1138 ustring16<TAlloc>& operator=(const uchar8_t* const c)
1142 array = allocator.allocate(1); //new u16[1];
1147 if (!c) return *this;
1149 //! Append our string now.
1155 //! Assignment operator for UTF-16 strings
1156 ustring16<TAlloc>& operator=(const uchar16_t* const c)
1160 array = allocator.allocate(1); //new u16[1];
1165 if (!c) return *this;
1167 //! Append our string now.
1173 //! Assignment operator for UTF-32 strings
1174 ustring16<TAlloc>& operator=(const uchar32_t* const c)
1178 array = allocator.allocate(1); //new u16[1];
1183 if (!c) return *this;
1185 //! Append our string now.
1191 //! Assignment operator for wchar_t strings.
1192 /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1193 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1194 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1195 ustring16<TAlloc>& operator=(const wchar_t* const c)
1197 if (sizeof(wchar_t) == 4)
1198 *this = reinterpret_cast<const uchar32_t* const>(c);
1199 else if (sizeof(wchar_t) == 2)
1200 *this = reinterpret_cast<const uchar16_t* const>(c);
1201 else if (sizeof(wchar_t) == 1)
1202 *this = reinterpret_cast<const uchar8_t* const>(c);
1208 //! Assignment operator for other strings.
1209 /** Note that this assumes that a correct unicode string is stored in the string. **/
1211 ustring16<TAlloc>& operator=(const B* const c)
1214 *this = reinterpret_cast<const uchar32_t* const>(c);
1215 else if (sizeof(B) == 2)
1216 *this = reinterpret_cast<const uchar16_t* const>(c);
1217 else if (sizeof(B) == 1)
1218 *this = reinterpret_cast<const uchar8_t* const>(c);
1224 //! Direct access operator
1225 access operator [](const u32 index)
1227 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1228 iterator iter(*this, index);
1229 return iter.operator*();
1233 //! Direct access operator
1234 const access operator [](const u32 index) const
1236 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1237 const_iterator iter(*this, index);
1238 return iter.operator*();
1242 //! Equality operator
1243 bool operator ==(const uchar16_t* const str) const
1249 for(i=0; array[i] && str[i]; ++i)
1250 if (array[i] != str[i])
1253 return !array[i] && !str[i];
1257 //! Equality operator
1258 bool operator ==(const ustring16<TAlloc>& other) const
1260 for(u32 i=0; array[i] && other.array[i]; ++i)
1261 if (array[i] != other.array[i])
1264 return used == other.used;
1268 //! Is smaller comparator
1269 bool operator <(const ustring16<TAlloc>& other) const
1271 for(u32 i=0; array[i] && other.array[i]; ++i)
1273 s32 diff = array[i] - other.array[i];
1278 return used < other.used;
1282 //! Inequality operator
1283 bool operator !=(const uchar16_t* const str) const
1285 return !(*this == str);
1289 //! Inequality operator
1290 bool operator !=(const ustring16<TAlloc>& other) const
1292 return !(*this == other);
1296 //! Returns the length of a ustring16 in full characters.
1297 //! \return Length of a ustring16 in full characters.
1300 const_iterator i(*this, 0);
1311 //! Informs if the ustring is empty or not.
1312 //! \return True if the ustring is empty, false if not.
1315 return (size_raw() == 0);
1319 //! Returns a pointer to the raw UTF-16 string data.
1320 //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1321 const uchar16_t* c_str() const
1327 //! Compares the first n characters of this string with another.
1328 //! \param other Other string to compare to.
1329 //! \param n Number of characters to compare.
1330 //! \return True if the n first characters of both strings are equal.
1331 bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1334 const uchar16_t* oa = other.c_str();
1335 for(i=0; array[i] && oa[i] && i < n; ++i)
1336 if (array[i] != oa[i])
1339 // if one (or both) of the strings was smaller then they
1340 // are only equal if they have the same length
1341 return (i == n) || (used == other.used);
1345 //! Compares the first n characters of this string with another.
1346 //! \param str Other string to compare to.
1347 //! \param n Number of characters to compare.
1348 //! \return True if the n first characters of both strings are equal.
1349 bool equalsn(const uchar16_t* const str, u32 n) const
1354 for(i=0; array[i] && str[i] && i < n; ++i)
1355 if (array[i] != str[i])
1358 // if one (or both) of the strings was smaller then they
1359 // are only equal if they have the same length
1360 return (i == n) || (array[i] == 0 && str[i] == 0);
1364 //! Appends a character to this ustring16
1365 //! \param character The character to append.
1366 //! \return A reference to our current string.
1367 ustring16<TAlloc>& append(uchar32_t character)
1369 if (used + 2 >= allocated)
1370 reallocate(used + 2);
1372 if (character > 0xFFFF)
1376 // character will be multibyte, so split it up into a surrogate pair.
1377 uchar16_t x = static_cast<uchar16_t>(character);
1378 uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1379 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1386 array[used-1] = character;
1394 //! Appends a UTF-8 string to this ustring16
1395 //! \param other The UTF-8 string to append.
1396 //! \param length The length of the string to append.
1397 //! \return A reference to our current string.
1398 ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1403 // Determine if the string is long enough for a BOM.
1405 const uchar8_t* p = other;
1409 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1412 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1413 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1415 if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1416 c_bom = unicode::EUTFE_UTF8;
1419 // If a BOM was found, don't include it in the string.
1420 const uchar8_t* c2 = other;
1421 if (c_bom != unicode::EUTFE_NONE)
1423 c2 = other + unicode::BOM_UTF8_LEN;
1424 length -= unicode::BOM_UTF8_LEN;
1427 // Calculate the size of the string to read in.
1433 } while(*p++ && len < length);
1437 // If we need to grow the array, do it now.
1438 if (used + len >= allocated)
1439 reallocate(used + (len * 2));
1442 // Convert UTF-8 to UTF-16.
1444 for (u32 l = 0; l<len;)
1447 if (((c2[l] >> 6) & 0x03) == 0x02)
1448 { // Invalid continuation byte.
1449 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1452 else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1453 { // Invalid byte - overlong encoding.
1454 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1457 else if ((c2[l] & 0xF8) == 0xF0)
1458 { // 4 bytes UTF-8, 2 bytes UTF-16.
1459 // Check for a full string.
1462 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1470 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1471 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1472 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1475 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1481 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1482 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1483 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1484 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1486 // Split v up into a surrogate pair.
1487 uchar16_t x = static_cast<uchar16_t>(v);
1488 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1489 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1494 ++used; // Using two shorts this time, so increase used by 1.
1496 else if ((c2[l] & 0xF0) == 0xE0)
1497 { // 3 bytes UTF-8, 1 byte UTF-16.
1498 // Check for a full string.
1501 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1509 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1510 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1513 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1519 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1520 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1521 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1525 else if ((c2[l] & 0xE0) == 0xC0)
1526 { // 2 bytes UTF-8, 1 byte UTF-16.
1527 // Check for a full string.
1530 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1536 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1538 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1544 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1545 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1546 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1551 { // 1 byte UTF-8, 1 byte UTF-16.
1554 { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
1555 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1557 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1563 // Validate our new UTF-16 string.
1570 //! Appends a UTF-16 string to this ustring16
1571 //! \param other The UTF-16 string to append.
1572 //! \param length The length of the string to append.
1573 //! \return A reference to our current string.
1574 ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1579 // Determine if the string is long enough for a BOM.
1581 const uchar16_t* p = other;
1585 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1587 // Check for the BOM to determine the string's endianness.
1588 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1589 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1590 c_end = unicode::EUTFEE_LITTLE;
1591 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1592 c_end = unicode::EUTFEE_BIG;
1594 // If a BOM was found, don't include it in the string.
1595 const uchar16_t* c2 = other;
1596 if (c_end != unicode::EUTFEE_NATIVE)
1598 c2 = other + unicode::BOM_UTF16_LEN;
1599 length -= unicode::BOM_UTF16_LEN;
1602 // Calculate the size of the string to read in.
1608 } while(*p++ && len < length);
1612 // If we need to grow the size of the array, do it now.
1613 if (used + len >= allocated)
1614 reallocate(used + (len * 2));
1618 // Copy the string now.
1619 unicode::EUTF_ENDIAN m_end = getEndianness();
1620 for (u32 l = start; l < start + len; ++l)
1622 array[l] = (uchar16_t)c2[l];
1623 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1624 array[l] = unicode::swapEndian16(array[l]);
1629 // Validate our new UTF-16 string.
1635 //! Appends a UTF-32 string to this ustring16
1636 //! \param other The UTF-32 string to append.
1637 //! \param length The length of the string to append.
1638 //! \return A reference to our current string.
1639 ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1644 // Check for the BOM to determine the string's endianness.
1645 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1646 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1647 c_end = unicode::EUTFEE_LITTLE;
1648 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1649 c_end = unicode::EUTFEE_BIG;
1651 // If a BOM was found, don't include it in the string.
1652 const uchar32_t* c2 = other;
1653 if (c_end != unicode::EUTFEE_NATIVE)
1655 c2 = other + unicode::BOM_UTF32_LEN;
1656 length -= unicode::BOM_UTF32_LEN;
1659 // Calculate the size of the string to read in.
1661 const uchar32_t* p = c2;
1665 } while(*p++ && len < length);
1669 // If we need to grow the size of the array, do it now.
1670 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1671 if (used + (len * 2) >= allocated)
1672 reallocate(used + ((len * 2) * 2));
1675 // Convert UTF-32 to UTF-16.
1676 unicode::EUTF_ENDIAN m_end = getEndianness();
1678 for (u32 l = 0; l<len; ++l)
1682 uchar32_t ch = c2[l];
1683 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1684 ch = unicode::swapEndian32(ch);
1688 // Split ch up into a surrogate pair as it is over 16 bits long.
1689 uchar16_t x = static_cast<uchar16_t>(ch);
1690 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1691 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1694 ++used; // Using two shorts, so increased used again.
1696 else if (ch >= 0xD800 && ch <= 0xDFFF)
1698 // Between possible UTF-16 surrogates (invalid!)
1699 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1701 else array[pos++] = static_cast<uchar16_t>(ch);
1705 // Validate our new UTF-16 string.
1712 //! Appends a ustring16 to this ustring16
1713 //! \param other The string to append to this one.
1714 //! \return A reference to our current string.
1715 ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1717 const uchar16_t* oa = other.c_str();
1719 u32 len = other.size_raw();
1721 if (used + len >= allocated)
1722 reallocate(used + len);
1724 for (u32 l=0; l<len; ++l)
1725 array[used+l] = oa[l];
1734 //! Appends a certain amount of characters of a ustring16 to this ustring16.
1735 //! \param other The string to append to this one.
1736 //! \param length How many characters of the other string to add to this one.
1737 //! \return A reference to our current string.
1738 ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1740 if (other.size() == 0)
1743 if (other.size() < length)
1749 if (used + length * 2 >= allocated)
1750 reallocate(used + length * 2);
1752 const_iterator iter(other, 0);
1754 while (!iter.atEnd() && l)
1756 uchar32_t c = *iter;
1766 //! Reserves some memory.
1767 //! \param count The amount of characters to reserve.
1768 void reserve(u32 count)
1770 if (count < allocated)
1777 //! Finds first occurrence of character.
1778 //! \param c The character to search for.
1779 //! \return Position where the character has been found, or -1 if not found.
1780 s32 findFirst(uchar32_t c) const
1782 const_iterator i(*this, 0);
1797 //! Finds first occurrence of a character of a list.
1798 //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1799 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1800 //! \return Position where one of the characters has been found, or -1 if not found.
1801 s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1806 const_iterator i(*this, 0);
1812 for (u32 j=0; j<count; ++j)
1823 //! Finds first position of a character not in a given list.
1824 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1825 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1826 //! \return Position where the character has been found, or -1 if not found.
1827 s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1832 const_iterator i(*this, 0);
1839 for (j=0; j<count; ++j)
1852 //! Finds last position of a character not in a given list.
1853 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1854 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1855 //! \return Position where the character has been found, or -1 if not found.
1856 s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1861 const_iterator i(end());
1864 s32 pos = size() - 1;
1865 while (!i.atStart())
1869 for (j=0; j<count; ++j)
1882 //! Finds next occurrence of character.
1883 //! \param c The character to search for.
1884 //! \param startPos The position in the string to start searching.
1885 //! \return Position where the character has been found, or -1 if not found.
1886 s32 findNext(uchar32_t c, u32 startPos) const
1888 const_iterator i(*this, startPos);
1904 //! Finds last occurrence of character.
1905 //! \param c The character to search for.
1906 //! \param start The start position of the reverse search ( default = -1, on end ).
1907 //! \return Position where the character has been found, or -1 if not found.
1908 s32 findLast(uchar32_t c, s32 start = -1) const
1911 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1913 const_iterator i(*this, start);
1915 while (!i.atStart())
1927 //! Finds last occurrence of a character in a list.
1928 //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1929 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1930 //! \return Position where one of the characters has been found, or -1 if not found.
1931 s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1936 const_iterator i(end());
1940 while (!i.atStart())
1943 for (u32 j=0; j<count; ++j)
1954 //! Finds another ustring16 in this ustring16.
1955 //! \param str The string to find.
1956 //! \param start The start position of the search.
1957 //! \return Positions where the ustring16 has been found, or -1 if not found.
1958 s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1960 u32 my_size = size();
1961 u32 their_size = str.size();
1963 if (their_size == 0 || my_size - start < their_size)
1966 const_iterator i(*this, start);
1971 const_iterator i2(i);
1972 const_iterator j(str, 0);
1973 uchar32_t t1 = (uchar32_t)*i2;
1974 uchar32_t t2 = (uchar32_t)*j;
1981 t1 = (uchar32_t)*i2;
1992 //! Finds another ustring16 in this ustring16.
1993 //! \param str The string to find.
1994 //! \param start The start position of the search.
1995 //! \return Positions where the string has been found, or -1 if not found.
1996 s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1998 const uchar16_t* data = str.c_str();
2009 for (u32 i=start; i<=used-len; ++i)
2013 while(data[j] && array[i+j] == data[j])
2025 //! Returns a substring.
2026 //! \param begin: Start of substring.
2027 //! \param length: Length of substring.
2028 //! \return A reference to our current string.
2029 ustring16<TAlloc> subString(u32 begin, s32 length) const
2032 // if start after ustring16
2033 // or no proper substring length
2034 if ((length <= 0) || (begin>=len))
2035 return ustring16<TAlloc>("");
2036 // clamp length to maximal value
2037 if ((length+begin) > len)
2040 ustring16<TAlloc> o;
2041 o.reserve((length+1) * 2);
2043 const_iterator i(*this, begin);
2044 while (!i.atEnd() && length)
2055 //! Appends a character to this ustring16.
2056 //! \param c Character to append.
2057 //! \return A reference to our current string.
2058 ustring16<TAlloc>& operator += (char c)
2060 append((uchar32_t)c);
2065 //! Appends a character to this ustring16.
2066 //! \param c Character to append.
2067 //! \return A reference to our current string.
2068 ustring16<TAlloc>& operator += (uchar32_t c)
2075 //! Appends a number to this ustring16.
2076 //! \param c Number to append.
2077 //! \return A reference to our current string.
2078 ustring16<TAlloc>& operator += (short c)
2080 append(core::stringc(c));
2085 //! Appends a number to this ustring16.
2086 //! \param c Number to append.
2087 //! \return A reference to our current string.
2088 ustring16<TAlloc>& operator += (unsigned short c)
2090 append(core::stringc(c));
2095 #ifdef USTRING_CPP0X_NEWLITERALS
2096 //! Appends a number to this ustring16.
2097 //! \param c Number to append.
2098 //! \return A reference to our current string.
2099 ustring16<TAlloc>& operator += (int c)
2101 append(core::stringc(c));
2106 //! Appends a number to this ustring16.
2107 //! \param c Number to append.
2108 //! \return A reference to our current string.
2109 ustring16<TAlloc>& operator += (unsigned int c)
2111 append(core::stringc(c));
2117 //! Appends a number to this ustring16.
2118 //! \param c Number to append.
2119 //! \return A reference to our current string.
2120 ustring16<TAlloc>& operator += (long c)
2122 append(core::stringc(c));
2127 //! Appends a number to this ustring16.
2128 //! \param c Number to append.
2129 //! \return A reference to our current string.
2130 ustring16<TAlloc>& operator += (unsigned long c)
2132 append(core::stringc(c));
2137 //! Appends a number to this ustring16.
2138 //! \param c Number to append.
2139 //! \return A reference to our current string.
2140 ustring16<TAlloc>& operator += (double c)
2142 append(core::stringc(c));
2147 //! Appends a char ustring16 to this ustring16.
2148 //! \param c Char ustring16 to append.
2149 //! \return A reference to our current string.
2150 ustring16<TAlloc>& operator += (const uchar16_t* const c)
2157 //! Appends a ustring16 to this ustring16.
2158 //! \param other ustring16 to append.
2159 //! \return A reference to our current string.
2160 ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2167 //! Replaces all characters of a given type with another one.
2168 //! \param toReplace Character to replace.
2169 //! \param replaceWith Character replacing the old one.
2170 //! \return A reference to our current string.
2171 ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2173 iterator i(*this, 0);
2176 typename ustring16<TAlloc>::access a = *i;
2177 if ((uchar32_t)a == toReplace)
2185 //! Replaces all instances of a string with another one.
2186 //! \param toReplace The string to replace.
2187 //! \param replaceWith The string replacing the old one.
2188 //! \return A reference to our current string.
2189 ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2191 if (toReplace.size() == 0)
2194 const uchar16_t* other = toReplace.c_str();
2195 const uchar16_t* replace = replaceWith.c_str();
2196 const u32 other_size = toReplace.size_raw();
2197 const u32 replace_size = replaceWith.size_raw();
2199 // Determine the delta. The algorithm will change depending on the delta.
2200 s32 delta = replace_size - other_size;
2202 // A character for character replace. The string will not shrink or grow.
2206 while ((pos = find_raw(other, pos)) != -1)
2208 for (u32 i = 0; i < replace_size; ++i)
2209 array[pos + i] = replace[i];
2215 // We are going to be removing some characters. The string will shrink.
2219 for (u32 pos = 0; pos <= used; ++i, ++pos)
2221 // Is this potentially a match?
2222 if (array[pos] == *other)
2224 // Check to see if we have a match.
2226 for (j = 0; j < other_size; ++j)
2228 if (array[pos + j] != other[j])
2232 // If we have a match, replace characters.
2233 if (j == other_size)
2235 for (j = 0; j < replace_size; ++j)
2236 array[i + j] = replace[j];
2237 i += replace_size - 1;
2238 pos += other_size - 1;
2243 // No match found, just copy characters.
2244 array[i - 1] = array[pos];
2252 // We are going to be adding characters, so the string size will increase.
2253 // Count the number of times toReplace exists in the string so we can allocate the new size.
2256 while ((pos = find_raw(other, pos)) != -1)
2262 // Re-allocate the string now, if needed.
2263 u32 len = delta * find_count;
2264 if (used + len >= allocated)
2265 reallocate(used + len);
2269 while ((pos = find_raw(other, pos)) != -1)
2271 uchar16_t* start = array + pos + other_size - 1;
2272 uchar16_t* ptr = array + used;
2273 uchar16_t* end = array + used + delta;
2275 // Shift characters to make room for the string.
2276 while (ptr != start)
2283 // Add the new string now.
2284 for (u32 i = 0; i < replace_size; ++i)
2285 array[pos + i] = replace[i];
2287 pos += replace_size;
2291 // Terminate the string and return ourself.
2297 //! Removes characters from a ustring16..
2298 //! \param c The character to remove.
2299 //! \return A reference to our current string.
2300 ustring16<TAlloc>& remove(uchar32_t c)
2304 u32 len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2305 for (u32 i=0; i<=used; ++i)
2308 if (!UTF16_IS_SURROGATE_HI(array[i]))
2310 else if (i + 1 <= used)
2312 // Convert the surrogate pair into a single UTF-32 character.
2313 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2315 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2323 array[pos++] = array[i];
2325 array[pos++] = array[++i];
2333 //! Removes a ustring16 from the ustring16.
2334 //! \param toRemove The string to remove.
2335 //! \return A reference to our current string.
2336 ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2338 u32 size = toRemove.size_raw();
2339 if (size == 0) return *this;
2341 const uchar16_t* tra = toRemove.c_str();
2344 for (u32 i=0; i<=used; ++i)
2349 if (array[i + j] != tra[j])
2360 array[pos++] = array[i];
2368 //! Removes characters from the ustring16.
2369 //! \param characters The characters to remove.
2370 //! \return A reference to our current string.
2371 ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2373 if (characters.size_raw() == 0)
2378 const_iterator iter(characters);
2379 for (u32 i=0; i<=used; ++i)
2382 if (!UTF16_IS_SURROGATE_HI(array[i]))
2384 else if (i + 1 <= used)
2386 // Convert the surrogate pair into a single UTF-32 character.
2387 uc32 = unicode::toUTF32(array[i], array[i+1]);
2389 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2393 while (!iter.atEnd())
2395 uchar32_t c = *iter;
2398 found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2407 array[pos++] = array[i];
2409 array[pos++] = array[++i];
2417 //! Trims the ustring16.
2418 //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2419 //! \param whitespace The characters that are to be considered as whitespace.
2420 //! \return A reference to our current string.
2421 ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2423 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2425 // find start and end of the substring without the specified characters
2426 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2430 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2432 return (*this = subString(begin, (end +1) - begin));
2436 //! Erases a character from the ustring16.
2437 //! May be slow, because all elements following after the erased element have to be copied.
2438 //! \param index Index of element to be erased.
2439 //! \return A reference to our current string.
2440 ustring16<TAlloc>& erase(u32 index)
2442 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2444 iterator i(*this, index);
2447 u32 len = (t > 0xFFFF ? 2 : 1);
2449 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2450 array[j - len] = array[j];
2459 //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2460 //! \return A reference to our current string.
2461 ustring16<TAlloc>& validate()
2463 // Validate all unicode characters.
2464 for (u32 i=0; i<allocated; ++i)
2466 // Terminate on existing null.
2472 if (UTF16_IS_SURROGATE(array[i]))
2474 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2475 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2476 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2477 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2480 if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2481 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2488 used = allocated - 1;
2495 //! Gets the last char of the ustring16, or 0.
2496 //! \return The last char of the ustring16, or 0.
2497 uchar32_t lastChar() const
2502 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2504 // Make sure we have a paired surrogate.
2508 // Check for an invalid surrogate.
2509 if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2512 // Convert the surrogate pair into a single UTF-32 character.
2513 return unicode::toUTF32(array[used-2], array[used-1]);
2517 return array[used-1];
2522 //! Split the ustring16 into parts.
2523 /** This method will split a ustring16 at certain delimiter characters
2524 into the container passed in as reference. The type of the container
2525 has to be given as template parameter. It must provide a push_back and
2527 \param ret The result container
2528 \param c C-style ustring16 of delimiter characters
2529 \param count Number of delimiter characters
2530 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2531 container. If two delimiters occur without a character in between, an
2532 empty substring would be placed in the result. If this flag is set,
2533 only non-empty strings are stored.
2534 \param keepSeparators Flag which allows to add the separator to the
2535 result ustring16. If this flag is true, the concatenation of the
2536 substrings results in the original ustring16. Otherwise, only the
2537 characters between the delimiters are returned.
2538 \return The number of resulting substrings
2540 template<class container>
2541 u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2546 const_iterator i(*this);
2547 const u32 oldSize=ret.size();
2551 bool lastWasSeparator = false;
2555 bool foundSeparator = false;
2556 for (u32 j=0; j<count; ++j)
2560 if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2562 ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2563 foundSeparator = true;
2564 lastpos = (keepSeparators ? pos : pos + 1);
2565 lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2569 lastWasSeparator = foundSeparator;
2575 ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2576 return ret.size()-oldSize;
2580 //! Split the ustring16 into parts.
2581 /** This method will split a ustring16 at certain delimiter characters
2582 into the container passed in as reference. The type of the container
2583 has to be given as template parameter. It must provide a push_back and
2585 \param ret The result container
2586 \param c A unicode string of delimiter characters
2587 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2588 container. If two delimiters occur without a character in between, an
2589 empty substring would be placed in the result. If this flag is set,
2590 only non-empty strings are stored.
2591 \param keepSeparators Flag which allows to add the separator to the
2592 result ustring16. If this flag is true, the concatenation of the
2593 substrings results in the original ustring16. Otherwise, only the
2594 characters between the delimiters are returned.
2595 \return The number of resulting substrings
2597 template<class container>
2598 u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2600 core::array<uchar32_t> v = c.toUTF32();
2601 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2605 //! Gets the size of the allocated memory buffer for the string.
2606 //! \return The size of the allocated memory buffer.
2607 u32 capacity() const
2613 //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2614 //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2615 u32 size_raw() const
2621 //! Inserts a character into the string.
2622 //! \param c The character to insert.
2623 //! \param pos The position to insert the character.
2624 //! \return A reference to our current string.
2625 ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2627 u8 len = (c > 0xFFFF ? 2 : 1);
2629 if (used + len >= allocated)
2630 reallocate(used + len);
2634 iterator iter(*this, pos);
2635 for (u32 i = used - 2; i > iter.getPos(); --i)
2636 array[i] = array[i - len];
2640 // c will be multibyte, so split it up into a surrogate pair.
2641 uchar16_t x = static_cast<uchar16_t>(c);
2642 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2643 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2644 array[iter.getPos()] = vh;
2645 array[iter.getPos()+1] = vl;
2649 array[iter.getPos()] = static_cast<uchar16_t>(c);
2656 //! Inserts a string into the string.
2657 //! \param c The string to insert.
2658 //! \param pos The position to insert the string.
2659 //! \return A reference to our current string.
2660 ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2662 u32 len = c.size_raw();
2663 if (len == 0) return *this;
2665 if (used + len >= allocated)
2666 reallocate(used + len);
2670 iterator iter(*this, pos);
2671 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2672 array[i] = array[i - len];
2674 const uchar16_t* s = c.c_str();
2675 for (u32 i = 0; i < len; ++i)
2686 //! Inserts a character into the string.
2687 //! \param c The character to insert.
2688 //! \param pos The position to insert the character.
2689 //! \return A reference to our current string.
2690 ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2692 if (used + 1 >= allocated)
2693 reallocate(used + 1);
2697 for (u32 i = used - 1; i > pos; --i)
2698 array[i] = array[i - 1];
2706 //! Removes a character from string.
2707 //! \param pos Position of the character to remove.
2708 //! \return A reference to our current string.
2709 ustring16<TAlloc>& erase_raw(u32 pos)
2711 for (u32 i=pos; i<=used; ++i)
2713 array[i] = array[i + 1];
2721 //! Replaces a character in the string.
2722 //! \param c The new character.
2723 //! \param pos The position of the character to replace.
2724 //! \return A reference to our current string.
2725 ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2732 //! Returns an iterator to the beginning of the string.
2733 //! \return An iterator to the beginning of the string.
2736 iterator i(*this, 0);
2741 //! Returns an iterator to the beginning of the string.
2742 //! \return An iterator to the beginning of the string.
2743 const_iterator begin() const
2745 const_iterator i(*this, 0);
2750 //! Returns an iterator to the beginning of the string.
2751 //! \return An iterator to the beginning of the string.
2752 const_iterator cbegin() const
2754 const_iterator i(*this, 0);
2759 //! Returns an iterator to the end of the string.
2760 //! \return An iterator to the end of the string.
2763 iterator i(*this, 0);
2769 //! Returns an iterator to the end of the string.
2770 //! \return An iterator to the end of the string.
2771 const_iterator end() const
2773 const_iterator i(*this, 0);
2779 //! Returns an iterator to the end of the string.
2780 //! \return An iterator to the end of the string.
2781 const_iterator cend() const
2783 const_iterator i(*this, 0);
2789 //! Converts the string to a UTF-8 encoded string.
2790 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2791 //! \return A string containing the UTF-8 encoded string.
2792 core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2794 core::string<uchar8_t> ret;
2795 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2796 const_iterator iter(*this, 0);
2798 // Add the byte order mark if the user wants it.
2801 ret.append(unicode::BOM_ENCODE_UTF8[0]);
2802 ret.append(unicode::BOM_ENCODE_UTF8[1]);
2803 ret.append(unicode::BOM_ENCODE_UTF8[2]);
2806 while (!iter.atEnd())
2808 uchar32_t c = *iter;
2811 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2812 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2813 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2814 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2822 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2823 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2824 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2831 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2832 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2838 ret.append(static_cast<uchar8_t>(c));
2846 //! Converts the string to a UTF-8 encoded string array.
2847 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2848 //! \return An array containing the UTF-8 encoded string.
2849 core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2851 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2852 const_iterator iter(*this, 0);
2854 // Add the byte order mark if the user wants it.
2857 ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2858 ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2859 ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2862 while (!iter.atEnd())
2864 uchar32_t c = *iter;
2867 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2868 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2869 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2870 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2878 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2879 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2880 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2887 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2888 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2894 ret.push_back(static_cast<uchar8_t>(c));
2903 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2904 //! Converts the string to a UTF-16 encoded string.
2905 //! \param endian The desired endianness of the string.
2906 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2907 //! \return A string containing the UTF-16 encoded string.
2908 core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2910 core::string<char16_t> ret;
2911 ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2913 // Add the BOM if specified.
2916 if (endian == unicode::EUTFEE_NATIVE)
2917 ret[0] = unicode::BOM;
2918 else if (endian == unicode::EUTFEE_LITTLE)
2920 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2921 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2922 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2926 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2927 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2928 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2933 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2935 char16_t* ptr = ret.c_str();
2936 for (u32 i = 0; i < ret.size(); ++i)
2937 *ptr++ = unicode::swapEndian16(*ptr);
2944 //! Converts the string to a UTF-16 encoded string array.
2945 //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2946 //! \param endian The desired endianness of the string.
2947 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2948 //! \return An array containing the UTF-16 encoded string.
2949 core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2951 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2952 uchar16_t* ptr = ret.pointer();
2954 // Add the BOM if specified.
2957 if (endian == unicode::EUTFEE_NATIVE)
2958 *ptr = unicode::BOM;
2959 else if (endian == unicode::EUTFEE_LITTLE)
2961 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2962 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2963 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2967 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2968 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2969 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2974 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2975 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2977 for (u32 i = 0; i <= used; ++i)
2978 ptr[i] = unicode::swapEndian16(ptr[i]);
2980 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2986 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2987 //! Converts the string to a UTF-32 encoded string.
2988 //! \param endian The desired endianness of the string.
2989 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2990 //! \return A string containing the UTF-32 encoded string.
2991 core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2993 core::string<char32_t> ret;
2994 ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2995 const_iterator iter(*this, 0);
2997 // Add the BOM if specified.
3000 if (endian == unicode::EUTFEE_NATIVE)
3001 ret.append(unicode::BOM);
3010 if (endian == unicode::EUTFEE_LITTLE)
3012 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3013 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3014 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3015 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3019 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3020 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3021 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3022 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3028 while (!iter.atEnd())
3030 uchar32_t c = *iter;
3031 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3032 c = unicode::swapEndian32(c);
3041 //! Converts the string to a UTF-32 encoded string array.
3042 //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
3043 //! \param endian The desired endianness of the string.
3044 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3045 //! \return An array containing the UTF-32 encoded string.
3046 core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3048 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3049 const_iterator iter(*this, 0);
3051 // Add the BOM if specified.
3054 if (endian == unicode::EUTFEE_NATIVE)
3055 ret.push_back(unicode::BOM);
3064 if (endian == unicode::EUTFEE_LITTLE)
3066 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3067 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3068 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3069 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3073 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3074 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3075 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3076 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3078 ret.push_back(t.full);
3083 while (!iter.atEnd())
3085 uchar32_t c = *iter;
3086 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3087 c = unicode::swapEndian32(c);
3095 //! Converts the string to a wchar_t encoded string.
3096 /** The size of a wchar_t changes depending on the platform. This function will store a
3097 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3098 //! \param endian The desired endianness of the string.
3099 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3100 //! \return A string containing the wchar_t encoded string.
3101 core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3103 if (sizeof(wchar_t) == 4)
3105 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3106 core::stringw ret(a.pointer());
3109 else if (sizeof(wchar_t) == 2)
3111 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3113 core::stringw ret(array);
3118 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3119 core::stringw ret(a.pointer());
3123 else if (sizeof(wchar_t) == 1)
3125 core::array<uchar8_t> a(toUTF8(addBOM));
3126 core::stringw ret(a.pointer());
3130 // Shouldn't happen.
3131 return core::stringw();
3135 //! Converts the string to a wchar_t encoded string array.
3136 /** The size of a wchar_t changes depending on the platform. This function will store a
3137 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3138 //! \param endian The desired endianness of the string.
3139 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3140 //! \return An array containing the wchar_t encoded string.
3141 core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3143 if (sizeof(wchar_t) == 4)
3145 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3146 core::array<wchar_t> ret(a.size());
3147 ret.set_used(a.size());
3148 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3151 if (sizeof(wchar_t) == 2)
3153 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3155 core::array<wchar_t> ret(used);
3157 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3162 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3163 core::array<wchar_t> ret(a.size());
3164 ret.set_used(a.size());
3165 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3169 if (sizeof(wchar_t) == 1)
3171 core::array<uchar8_t> a(toUTF8(addBOM));
3172 core::array<wchar_t> ret(a.size());
3173 ret.set_used(a.size());
3174 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3178 // Shouldn't happen.
3179 return core::array<wchar_t>();
3182 //! Converts the string to a properly encoded io::path string.
3183 //! \param endian The desired endianness of the string.
3184 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3185 //! \return An io::path string containing the properly encoded string.
3186 io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3188 #if defined(_IRR_WCHAR_FILESYSTEM)
3189 return toWCHAR_s(endian, addBOM);
3191 return toUTF8_s(addBOM);
3195 //! Loads an unknown stream of data.
3196 //! Will attempt to determine if the stream is unicode data. Useful for loading from files.
3197 //! \param data The data stream to load from.
3198 //! \param data_size The length of the data string.
3199 //! \return A reference to our current string.
3200 ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3202 // Clear our string.
3207 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3211 case unicode::EUTFE_UTF8:
3212 append((uchar8_t*)data, data_size);
3215 case unicode::EUTFE_UTF16:
3216 case unicode::EUTFE_UTF16_BE:
3217 case unicode::EUTFE_UTF16_LE:
3218 append((uchar16_t*)data, data_size / 2);
3221 case unicode::EUTFE_UTF32:
3222 case unicode::EUTFE_UTF32_BE:
3223 case unicode::EUTFE_UTF32_LE:
3224 append((uchar32_t*)data, data_size / 4);
3231 //! Gets the encoding of the Unicode string this class contains.
3232 //! \return An enum describing the current encoding of this string.
3233 const unicode::EUTF_ENCODE getEncoding() const
3238 //! Gets the endianness of the Unicode string this class contains.
3239 //! \return An enum describing the endianness of this string.
3240 const unicode::EUTF_ENDIAN getEndianness() const
3242 if (encoding == unicode::EUTFE_UTF16_LE ||
3243 encoding == unicode::EUTFE_UTF32_LE)
3244 return unicode::EUTFEE_LITTLE;
3245 else return unicode::EUTFEE_BIG;
3250 //! Reallocate the string, making it bigger or smaller.
3251 //! \param new_size The new size of the string.
3252 void reallocate(u32 new_size)
3254 uchar16_t* old_array = array;
3256 array = allocator.allocate(new_size + 1); //new u16[new_size];
3257 allocated = new_size + 1;
3258 if (old_array == 0) return;
3260 u32 amount = used < new_size ? used : new_size;
3261 for (u32 i=0; i<=amount; ++i)
3262 array[i] = old_array[i];
3264 if (allocated <= used)
3265 used = allocated - 1;
3269 allocator.deallocate(old_array); // delete [] old_array;
3272 //--- member variables
3275 unicode::EUTF_ENCODE encoding;
3279 //irrAllocator<uchar16_t> allocator;
3282 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3285 //! Appends two ustring16s.
3286 template <typename TAlloc>
3287 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3289 ustring16<TAlloc> ret(left);
3295 //! Appends a ustring16 and a null-terminated unicode string.
3296 template <typename TAlloc, class B>
3297 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3299 ustring16<TAlloc> ret(left);
3305 //! Appends a ustring16 and a null-terminated unicode string.
3306 template <class B, typename TAlloc>
3307 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3309 ustring16<TAlloc> ret(left);
3315 //! Appends a ustring16 and an Irrlicht string.
3316 template <typename TAlloc, typename B, typename BAlloc>
3317 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3319 ustring16<TAlloc> ret(left);
3325 //! Appends a ustring16 and an Irrlicht string.
3326 template <typename TAlloc, typename B, typename BAlloc>
3327 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3329 ustring16<TAlloc> ret(left);
3335 //! Appends a ustring16 and a std::basic_string.
3336 template <typename TAlloc, typename B, typename A, typename BAlloc>
3337 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3339 ustring16<TAlloc> ret(left);
3345 //! Appends a ustring16 and a std::basic_string.
3346 template <typename TAlloc, typename B, typename A, typename BAlloc>
3347 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3349 ustring16<TAlloc> ret(left);
3355 //! Appends a ustring16 and a char.
3356 template <typename TAlloc>
3357 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3359 ustring16<TAlloc> ret(left);
3365 //! Appends a ustring16 and a char.
3366 template <typename TAlloc>
3367 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3369 ustring16<TAlloc> ret(left);
3375 #ifdef USTRING_CPP0X_NEWLITERALS
3376 //! Appends a ustring16 and a uchar32_t.
3377 template <typename TAlloc>
3378 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3380 ustring16<TAlloc> ret(left);
3386 //! Appends a ustring16 and a uchar32_t.
3387 template <typename TAlloc>
3388 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3390 ustring16<TAlloc> ret(left);
3397 //! Appends a ustring16 and a short.
3398 template <typename TAlloc>
3399 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3401 ustring16<TAlloc> ret(left);
3402 ret += core::stringc(right);
3407 //! Appends a ustring16 and a short.
3408 template <typename TAlloc>
3409 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3411 ustring16<TAlloc> ret((core::stringc(left)));
3417 //! Appends a ustring16 and an unsigned short.
3418 template <typename TAlloc>
3419 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3421 ustring16<TAlloc> ret(left);
3422 ret += core::stringc(right);
3427 //! Appends a ustring16 and an unsigned short.
3428 template <typename TAlloc>
3429 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3431 ustring16<TAlloc> ret((core::stringc(left)));
3437 //! Appends a ustring16 and an int.
3438 template <typename TAlloc>
3439 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3441 ustring16<TAlloc> ret(left);
3442 ret += core::stringc(right);
3447 //! Appends a ustring16 and an int.
3448 template <typename TAlloc>
3449 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3451 ustring16<TAlloc> ret((core::stringc(left)));
3457 //! Appends a ustring16 and an unsigned int.
3458 template <typename TAlloc>
3459 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3461 ustring16<TAlloc> ret(left);
3462 ret += core::stringc(right);
3467 //! Appends a ustring16 and an unsigned int.
3468 template <typename TAlloc>
3469 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3471 ustring16<TAlloc> ret((core::stringc(left)));
3477 //! Appends a ustring16 and a long.
3478 template <typename TAlloc>
3479 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3481 ustring16<TAlloc> ret(left);
3482 ret += core::stringc(right);
3487 //! Appends a ustring16 and a long.
3488 template <typename TAlloc>
3489 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3491 ustring16<TAlloc> ret((core::stringc(left)));
3497 //! Appends a ustring16 and an unsigned long.
3498 template <typename TAlloc>
3499 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3501 ustring16<TAlloc> ret(left);
3502 ret += core::stringc(right);
3507 //! Appends a ustring16 and an unsigned long.
3508 template <typename TAlloc>
3509 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3511 ustring16<TAlloc> ret((core::stringc(left)));
3517 //! Appends a ustring16 and a float.
3518 template <typename TAlloc>
3519 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3521 ustring16<TAlloc> ret(left);
3522 ret += core::stringc(right);
3527 //! Appends a ustring16 and a float.
3528 template <typename TAlloc>
3529 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3531 ustring16<TAlloc> ret((core::stringc(left)));
3537 //! Appends a ustring16 and a double.
3538 template <typename TAlloc>
3539 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3541 ustring16<TAlloc> ret(left);
3542 ret += core::stringc(right);
3547 //! Appends a ustring16 and a double.
3548 template <typename TAlloc>
3549 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3551 ustring16<TAlloc> ret((core::stringc(left)));
3557 #ifdef USTRING_CPP0X
3558 //! Appends two ustring16s.
3559 template <typename TAlloc>
3560 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3562 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3563 right.insert(left, 0);
3564 return std::move(right);
3568 //! Appends two ustring16s.
3569 template <typename TAlloc>
3570 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3572 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3574 return std::move(left);
3578 //! Appends two ustring16s.
3579 template <typename TAlloc>
3580 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3582 //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3583 if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3584 (right.capacity() - right.size_raw() < left.size_raw()))
3587 return std::move(left);
3591 right.insert(left, 0);
3592 return std::move(right);
3597 //! Appends a ustring16 and a null-terminated unicode string.
3598 template <typename TAlloc, class B>
3599 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3601 //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3603 return std::move(left);
3607 //! Appends a ustring16 and a null-terminated unicode string.
3608 template <class B, typename TAlloc>
3609 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3611 //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3612 right.insert(left, 0);
3613 return std::move(right);
3617 //! Appends a ustring16 and an Irrlicht string.
3618 template <typename TAlloc, typename B, typename BAlloc>
3619 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3621 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3622 right.insert(left, 0);
3623 return std::move(right);
3627 //! Appends a ustring16 and an Irrlicht string.
3628 template <typename TAlloc, typename B, typename BAlloc>
3629 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3631 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3633 return std::move(left);
3637 //! Appends a ustring16 and a std::basic_string.
3638 template <typename TAlloc, typename B, typename A, typename BAlloc>
3639 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3641 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3642 right.insert(core::ustring16<TAlloc>(left), 0);
3643 return std::move(right);
3647 //! Appends a ustring16 and a std::basic_string.
3648 template <typename TAlloc, typename B, typename A, typename BAlloc>
3649 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3651 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3653 return std::move(left);
3657 //! Appends a ustring16 and a char.
3658 template <typename TAlloc>
3659 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3661 left.append((uchar32_t)right);
3662 return std::move(left);
3666 //! Appends a ustring16 and a char.
3667 template <typename TAlloc>
3668 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3670 right.insert((uchar32_t)left, 0);
3671 return std::move(right);
3675 #ifdef USTRING_CPP0X_NEWLITERALS
3676 //! Appends a ustring16 and a uchar32_t.
3677 template <typename TAlloc>
3678 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3681 return std::move(left);
3685 //! Appends a ustring16 and a uchar32_t.
3686 template <typename TAlloc>
3687 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3689 right.insert(left, 0);
3690 return std::move(right);
3695 //! Appends a ustring16 and a short.
3696 template <typename TAlloc>
3697 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3699 left.append(core::stringc(right));
3700 return std::move(left);
3704 //! Appends a ustring16 and a short.
3705 template <typename TAlloc>
3706 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3708 right.insert(core::stringc(left), 0);
3709 return std::move(right);
3713 //! Appends a ustring16 and an unsigned short.
3714 template <typename TAlloc>
3715 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3717 left.append(core::stringc(right));
3718 return std::move(left);
3722 //! Appends a ustring16 and an unsigned short.
3723 template <typename TAlloc>
3724 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3726 right.insert(core::stringc(left), 0);
3727 return std::move(right);
3731 //! Appends a ustring16 and an int.
3732 template <typename TAlloc>
3733 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3735 left.append(core::stringc(right));
3736 return std::move(left);
3740 //! Appends a ustring16 and an int.
3741 template <typename TAlloc>
3742 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3744 right.insert(core::stringc(left), 0);
3745 return std::move(right);
3749 //! Appends a ustring16 and an unsigned int.
3750 template <typename TAlloc>
3751 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3753 left.append(core::stringc(right));
3754 return std::move(left);
3758 //! Appends a ustring16 and an unsigned int.
3759 template <typename TAlloc>
3760 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3762 right.insert(core::stringc(left), 0);
3763 return std::move(right);
3767 //! Appends a ustring16 and a long.
3768 template <typename TAlloc>
3769 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3771 left.append(core::stringc(right));
3772 return std::move(left);
3776 //! Appends a ustring16 and a long.
3777 template <typename TAlloc>
3778 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3780 right.insert(core::stringc(left), 0);
3781 return std::move(right);
3785 //! Appends a ustring16 and an unsigned long.
3786 template <typename TAlloc>
3787 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3789 left.append(core::stringc(right));
3790 return std::move(left);
3794 //! Appends a ustring16 and an unsigned long.
3795 template <typename TAlloc>
3796 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3798 right.insert(core::stringc(left), 0);
3799 return std::move(right);
3803 //! Appends a ustring16 and a float.
3804 template <typename TAlloc>
3805 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3807 left.append(core::stringc(right));
3808 return std::move(left);
3812 //! Appends a ustring16 and a float.
3813 template <typename TAlloc>
3814 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3816 right.insert(core::stringc(left), 0);
3817 return std::move(right);
3821 //! Appends a ustring16 and a double.
3822 template <typename TAlloc>
3823 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3825 left.append(core::stringc(right));
3826 return std::move(left);
3830 //! Appends a ustring16 and a double.
3831 template <typename TAlloc>
3832 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3834 right.insert(core::stringc(left), 0);
3835 return std::move(right);
3840 #ifndef USTRING_NO_STL
3841 //! Writes a ustring16 to an ostream.
3842 template <typename TAlloc>
3843 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3845 out << in.toUTF8_s().c_str();
3849 //! Writes a ustring16 to a wostream.
3850 template <typename TAlloc>
3851 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3853 out << in.toWCHAR_s().c_str();
3859 #ifndef USTRING_NO_STL
3864 //! Hashing algorithm for hashing a ustring. Used for things like unordered_maps.
3865 //! Algorithm taken from std::hash<std::string>.
3866 class hash : public std::unary_function<core::ustring, size_t>
3869 size_t operator()(const core::ustring& s) const
3871 size_t ret = 2166136261U;
3873 size_t stride = 1 + s.size_raw() / 10;
3875 core::ustring::const_iterator i = s.begin();
3876 while (i != s.end())
3878 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
3879 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3887 } // end namespace unicode
3891 } // end namespace core
3892 } // end namespace irr