2 Basic Unicode string class for Irrlicht.
3 Copyright (c) 2009-2011 John Norman
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you
14 must not claim that you wrote the original software. If you use
15 this software in a product, an acknowledgment in the product
16 documentation would be appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
21 3. This notice may not be removed or altered from any source
24 The original version of this class can be located at:
25 http://irrlicht.suckerfreegames.com/
28 john@suckerfreegames.com
31 #ifndef __IRR_USTRING_H_INCLUDED__
32 #define __IRR_USTRING_H_INCLUDED__
34 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
35 # define USTRING_CPP0X
36 # if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
37 # define USTRING_CPP0X_NEWLITERALS
45 #define __BYTE_ORDER 0
46 #define __LITTLE_ENDIAN 0
47 #define __BIG_ENDIAN 1
56 #ifndef USTRING_NO_STL
63 #include "irrAllocator.h"
66 #include "irrString.h"
69 //! UTF-16 surrogate start values.
70 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
71 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
73 //! Is a UTF-16 code point a surrogate?
74 #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
75 #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
76 #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
82 // Define our character types.
83 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
84 typedef char32_t uchar32_t;
85 typedef char16_t uchar16_t;
86 typedef char uchar8_t;
88 typedef u32 uchar32_t;
89 typedef u16 uchar16_t;
99 //! The unicode replacement character. Used to replace invalid characters.
100 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
102 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
103 //! \param high The high value of the pair.
104 //! \param low The low value of the pair.
105 //! \return The UTF-32 character expressed by the surrogate pair.
106 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
108 // Convert the surrogate pair into a single UTF-32 character.
109 uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
110 uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
111 return (wu << 16) | x;
114 //! Swaps the endianness of a 16-bit value.
115 //! \return The new value.
116 inline uchar16_t swapEndian16(const uchar16_t& c)
118 return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
121 //! Swaps the endianness of a 32-bit value.
122 //! \return The new value.
123 inline uchar32_t swapEndian32(const uchar32_t& c)
125 return ((c >> 24) & 0x000000FF) |
126 ((c >> 8) & 0x0000FF00) |
127 ((c << 8) & 0x00FF0000) |
128 ((c << 24) & 0xFF000000);
131 //! The Unicode byte order mark.
132 const u16 BOM = 0xFEFF;
134 //! The size of the Unicode byte order mark in terms of the Unicode character size.
135 const u8 BOM_UTF8_LEN = 3;
136 const u8 BOM_UTF16_LEN = 1;
137 const u8 BOM_UTF32_LEN = 1;
139 //! Unicode byte order marks for file operations.
140 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
141 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
142 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
143 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
144 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
146 //! The size in bytes of the Unicode byte marks for file operations.
147 const u8 BOM_ENCODE_UTF8_LEN = 3;
148 const u8 BOM_ENCODE_UTF16_LEN = 2;
149 const u8 BOM_ENCODE_UTF32_LEN = 4;
151 //! Unicode encoding type.
164 //! Unicode endianness.
172 //! Returns the specified unicode byte order mark in a byte array.
173 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
174 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
175 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
176 //! \return An array that contains a byte order mark.
177 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
179 #define COPY_ARRAY(source, size) \
180 memcpy(ret.pointer(), source, size); \
183 core::array<u8> ret(4);
187 COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
190 #ifdef __BIG_ENDIAN__
191 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
193 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
197 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
200 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
203 #ifdef __BIG_ENDIAN__
204 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
206 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
210 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
213 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
216 // TODO sapier: fixed warning only,
217 // don't know if something needs to be done here
225 //! Detects if the given data stream starts with a unicode BOM.
226 //! \param data The data stream to check.
227 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
228 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
230 if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
231 if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
232 if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
233 if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
234 if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
238 } // end namespace unicode
241 //! UTF-16 string class.
242 template <typename TAlloc = irrAllocator<uchar16_t> >
247 ///------------------///
248 /// iterator classes ///
249 ///------------------///
251 //! Access an element in a unicode string, allowing one to change it.
252 class _ustring16_iterator_access
255 _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
257 //! Allow the class to be interpreted as a single UTF-32 character.
258 operator uchar32_t() const
263 //! Allow one to change the character in the unicode string.
264 //! \param c The new character to use.
266 _ustring16_iterator_access& operator=(const uchar32_t c)
272 //! Increments the value by 1.
274 _ustring16_iterator_access& operator++()
280 //! Increments the value by 1, returning the old value.
281 //! \return A unicode character.
282 uchar32_t operator++(int)
284 uchar32_t old = _get();
289 //! Decrements the value by 1.
291 _ustring16_iterator_access& operator--()
297 //! Decrements the value by 1, returning the old value.
298 //! \return A unicode character.
299 uchar32_t operator--(int)
301 uchar32_t old = _get();
306 //! Adds to the value by a specified amount.
307 //! \param val The amount to add to this character.
309 _ustring16_iterator_access& operator+=(int val)
315 //! Subtracts from the value by a specified amount.
316 //! \param val The amount to subtract from this character.
318 _ustring16_iterator_access& operator-=(int val)
324 //! Multiples the value by a specified amount.
325 //! \param val The amount to multiply this character by.
327 _ustring16_iterator_access& operator*=(int val)
333 //! Divides the value by a specified amount.
334 //! \param val The amount to divide this character by.
336 _ustring16_iterator_access& operator/=(int val)
342 //! Modulos the value by a specified amount.
343 //! \param val The amount to modulo this character by.
345 _ustring16_iterator_access& operator%=(int val)
351 //! Adds to the value by a specified amount.
352 //! \param val The amount to add to this character.
353 //! \return A unicode character.
354 uchar32_t operator+(int val) const
359 //! Subtracts from the value by a specified amount.
360 //! \param val The amount to subtract from this character.
361 //! \return A unicode character.
362 uchar32_t operator-(int val) const
367 //! Multiplies the value by a specified amount.
368 //! \param val The amount to multiply this character by.
369 //! \return A unicode character.
370 uchar32_t operator*(int val) const
375 //! Divides the value by a specified amount.
376 //! \param val The amount to divide this character by.
377 //! \return A unicode character.
378 uchar32_t operator/(int val) const
383 //! Modulos the value by a specified amount.
384 //! \param val The amount to modulo this character by.
385 //! \return A unicode character.
386 uchar32_t operator%(int val) const
392 //! Gets a uchar32_t from our current position.
393 uchar32_t _get() const
395 const uchar16_t* a = ref->c_str();
396 if (!UTF16_IS_SURROGATE(a[pos]))
397 return static_cast<uchar32_t>(a[pos]);
400 if (pos + 1 >= ref->size_raw())
403 return unicode::toUTF32(a[pos], a[pos + 1]);
407 //! Sets a uchar32_t at our current position.
408 void _set(uchar32_t c)
410 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
411 const uchar16_t* a = ref2->c_str();
414 // c will be multibyte, so split it up into the high and low surrogate pairs.
415 uchar16_t x = static_cast<uchar16_t>(c);
416 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
417 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
419 // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
420 if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
421 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
422 else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
424 ref2->replace_raw(vh, static_cast<u32>(pos));
428 // c will be a single byte.
429 uchar16_t vh = static_cast<uchar16_t>(c);
431 // If the previous position was a surrogate pair, remove the extra byte.
432 if (UTF16_IS_SURROGATE_HI(a[pos]))
433 ref2->erase_raw(static_cast<u32>(pos) + 1);
435 ref2->replace_raw(vh, static_cast<u32>(pos));
439 const ustring16<TAlloc>* ref;
442 typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
445 //! Iterator to iterate through a UTF-16 string.
446 #ifndef USTRING_NO_STL
447 class _ustring16_const_iterator : public std::iterator<
448 std::bidirectional_iterator_tag, // iterator_category
449 access, // value_type
450 ptrdiff_t, // difference_type
451 const access, // pointer
452 const access // reference
455 class _ustring16_const_iterator
459 typedef _ustring16_const_iterator _Iter;
460 typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
461 typedef const access const_pointer;
462 typedef const access const_reference;
464 #ifndef USTRING_NO_STL
465 typedef typename _Base::value_type value_type;
466 typedef typename _Base::difference_type difference_type;
467 typedef typename _Base::difference_type distance_type;
468 typedef typename _Base::pointer pointer;
469 typedef const_reference reference;
471 typedef access value_type;
472 typedef u32 difference_type;
473 typedef u32 distance_type;
474 typedef const_pointer pointer;
475 typedef const_reference reference;
479 _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
480 _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
481 _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
483 if (ref->size_raw() == 0 || p == 0)
486 // Go to the appropriate position.
488 u32 sr = ref->size_raw();
489 const uchar16_t* a = ref->c_str();
490 while (i != 0 && pos < sr)
492 if (UTF16_IS_SURROGATE_HI(a[pos]))
499 //! Test for equalness.
500 bool operator==(const _Iter& iter) const
502 if (ref == iter.ref && pos == iter.pos)
507 //! Test for unequalness.
508 bool operator!=(const _Iter& iter) const
510 if (ref != iter.ref || pos != iter.pos)
515 //! Switch to the next full character in the string.
518 if (pos == ref->size_raw()) return *this;
519 const uchar16_t* a = ref->c_str();
520 if (UTF16_IS_SURROGATE_HI(a[pos]))
521 pos += 2; // TODO: check for valid low surrogate?
523 if (pos > ref->size_raw()) pos = ref->size_raw();
527 //! Switch to the next full character in the string, returning the previous position.
528 _Iter operator++(int)
535 //! Switch to the previous full character in the string.
538 if (pos == 0) return *this;
539 const uchar16_t* a = ref->c_str();
541 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
546 //! Switch to the previous full character in the string, returning the previous position.
547 _Iter operator--(int)
554 //! Advance a specified number of full characters in the string.
556 _Iter& operator+=(const difference_type v)
558 if (v == 0) return *this;
559 if (v < 0) return operator-=(v * -1);
561 if (pos >= ref->size_raw())
564 // Go to the appropriate position.
565 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
567 u32 sr = ref->size_raw();
568 const uchar16_t* a = ref->c_str();
569 while (i != 0 && pos < sr)
571 if (UTF16_IS_SURROGATE_HI(a[pos]))
582 //! Go back a specified number of full characters in the string.
584 _Iter& operator-=(const difference_type v)
586 if (v == 0) return *this;
587 if (v > 0) return operator+=(v * -1);
592 // Go to the appropriate position.
593 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
595 const uchar16_t* a = ref->c_str();
596 while (i != 0 && pos != 0)
599 if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
607 //! Return a new iterator that is a variable number of full characters forward from the current position.
608 _Iter operator+(const difference_type v) const
615 //! Return a new iterator that is a variable number of full characters backward from the current position.
616 _Iter operator-(const difference_type v) const
623 //! Returns the distance between two iterators.
624 difference_type operator-(const _Iter& iter) const
626 // Make sure we reference the same object!
628 return difference_type();
653 //! Accesses the full character at the iterator's position.
654 const_reference operator*() const
656 if (pos >= ref->size_raw())
658 const uchar16_t* a = ref->c_str();
659 u32 p = ref->size_raw();
660 if (UTF16_IS_SURROGATE_LO(a[p]))
662 reference ret(ref, p);
665 const_reference ret(ref, pos);
669 //! Accesses the full character at the iterator's position.
670 reference operator*()
672 if (pos >= ref->size_raw())
674 const uchar16_t* a = ref->c_str();
675 u32 p = ref->size_raw();
676 if (UTF16_IS_SURROGATE_LO(a[p]))
678 reference ret(ref, p);
681 reference ret(ref, pos);
685 //! Accesses the full character at the iterator's position.
686 const_pointer operator->() const
691 //! Accesses the full character at the iterator's position.
697 //! Is the iterator at the start of the string?
703 //! Is the iterator at the end of the string?
706 const uchar16_t* a = ref->c_str();
707 if (UTF16_IS_SURROGATE(a[pos]))
708 return (pos + 1) >= ref->size_raw();
709 else return pos >= ref->size_raw();
712 //! Moves the iterator to the start of the string.
718 //! Moves the iterator to the end of the string.
721 pos = ref->size_raw();
724 //! Returns the iterator's position.
725 //! \return The iterator's position.
732 const ustring16<TAlloc>* ref;
736 //! Iterator to iterate through a UTF-16 string.
737 class _ustring16_iterator : public _ustring16_const_iterator
740 typedef _ustring16_iterator _Iter;
741 typedef _ustring16_const_iterator _Base;
742 typedef typename _Base::const_pointer const_pointer;
743 typedef typename _Base::const_reference const_reference;
746 typedef typename _Base::value_type value_type;
747 typedef typename _Base::difference_type difference_type;
748 typedef typename _Base::distance_type distance_type;
749 typedef access pointer;
750 typedef access reference;
756 _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
757 _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
758 _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
760 //! Accesses the full character at the iterator's position.
761 reference operator*() const
763 if (pos >= ref->size_raw())
765 const uchar16_t* a = ref->c_str();
766 u32 p = ref->size_raw();
767 if (UTF16_IS_SURROGATE_LO(a[p]))
769 reference ret(ref, p);
772 reference ret(ref, pos);
776 //! Accesses the full character at the iterator's position.
777 reference operator*()
779 if (pos >= ref->size_raw())
781 const uchar16_t* a = ref->c_str();
782 u32 p = ref->size_raw();
783 if (UTF16_IS_SURROGATE_LO(a[p]))
785 reference ret(ref, p);
788 reference ret(ref, pos);
792 //! Accesses the full character at the iterator's position.
793 pointer operator->() const
798 //! Accesses the full character at the iterator's position.
805 typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
806 typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
808 ///----------------------///
809 /// end iterator classes ///
810 ///----------------------///
812 //! Default constructor
814 : array(0), allocated(1), used(0)
816 #if __BYTE_ORDER == __BIG_ENDIAN
817 encoding = unicode::EUTFE_UTF16_BE;
819 encoding = unicode::EUTFE_UTF16_LE;
821 array = allocator.allocate(1); // new u16[1];
827 ustring16(const ustring16<TAlloc>& other)
828 : array(0), allocated(0), used(0)
830 #if __BYTE_ORDER == __BIG_ENDIAN
831 encoding = unicode::EUTFE_UTF16_BE;
833 encoding = unicode::EUTFE_UTF16_LE;
839 //! Constructor from other string types
840 template <class B, class A>
841 ustring16(const string<B, A>& other)
842 : array(0), allocated(0), used(0)
844 #if __BYTE_ORDER == __BIG_ENDIAN
845 encoding = unicode::EUTFE_UTF16_BE;
847 encoding = unicode::EUTFE_UTF16_LE;
853 #ifndef USTRING_NO_STL
854 //! Constructor from std::string
855 template <class B, class A, typename Alloc>
856 ustring16(const std::basic_string<B, A, Alloc>& other)
857 : array(0), allocated(0), used(0)
859 #if __BYTE_ORDER == __BIG_ENDIAN
860 encoding = unicode::EUTFE_UTF16_BE;
862 encoding = unicode::EUTFE_UTF16_LE;
864 *this = other.c_str();
868 //! Constructor from iterator.
869 template <typename Itr>
870 ustring16(Itr first, Itr last)
871 : array(0), allocated(0), used(0)
873 #if __BYTE_ORDER == __BIG_ENDIAN
874 encoding = unicode::EUTFE_UTF16_BE;
876 encoding = unicode::EUTFE_UTF16_LE;
878 reserve(std::distance(first, last));
881 for (; first != last; ++first)
882 append((uchar32_t)*first);
887 #ifndef USTRING_CPP0X_NEWLITERALS
888 //! Constructor for copying a character string from a pointer.
889 ustring16(const char* const c)
890 : array(0), allocated(0), used(0)
892 #if __BYTE_ORDER == __BIG_ENDIAN
893 encoding = unicode::EUTFE_UTF16_BE;
895 encoding = unicode::EUTFE_UTF16_LE;
898 loadDataStream(c, strlen(c));
899 //append((uchar8_t*)c);
903 //! Constructor for copying a character string from a pointer with a given length.
904 ustring16(const char* const c, u32 length)
905 : array(0), allocated(0), used(0)
907 #if __BYTE_ORDER == __BIG_ENDIAN
908 encoding = unicode::EUTFE_UTF16_BE;
910 encoding = unicode::EUTFE_UTF16_LE;
913 loadDataStream(c, length);
918 //! Constructor for copying a UTF-8 string from a pointer.
919 ustring16(const uchar8_t* const c)
920 : array(0), allocated(0), used(0)
922 #if __BYTE_ORDER == __BIG_ENDIAN
923 encoding = unicode::EUTFE_UTF16_BE;
925 encoding = unicode::EUTFE_UTF16_LE;
932 //! Constructor for copying a UTF-8 string from a single char.
933 ustring16(const char c)
934 : array(0), allocated(0), used(0)
936 #if __BYTE_ORDER == __BIG_ENDIAN
937 encoding = unicode::EUTFE_UTF16_BE;
939 encoding = unicode::EUTFE_UTF16_LE;
942 append((uchar32_t)c);
946 //! Constructor for copying a UTF-8 string from a pointer with a given length.
947 ustring16(const uchar8_t* const c, u32 length)
948 : array(0), allocated(0), used(0)
950 #if __BYTE_ORDER == __BIG_ENDIAN
951 encoding = unicode::EUTFE_UTF16_BE;
953 encoding = unicode::EUTFE_UTF16_LE;
960 //! Constructor for copying a UTF-16 string from a pointer.
961 ustring16(const uchar16_t* const c)
962 : array(0), allocated(0), used(0)
964 #if __BYTE_ORDER == __BIG_ENDIAN
965 encoding = unicode::EUTFE_UTF16_BE;
967 encoding = unicode::EUTFE_UTF16_LE;
974 //! Constructor for copying a UTF-16 string from a pointer with a given length
975 ustring16(const uchar16_t* const c, u32 length)
976 : array(0), allocated(0), used(0)
978 #if __BYTE_ORDER == __BIG_ENDIAN
979 encoding = unicode::EUTFE_UTF16_BE;
981 encoding = unicode::EUTFE_UTF16_LE;
988 //! Constructor for copying a UTF-32 string from a pointer.
989 ustring16(const uchar32_t* const c)
990 : array(0), allocated(0), used(0)
992 #if __BYTE_ORDER == __BIG_ENDIAN
993 encoding = unicode::EUTFE_UTF16_BE;
995 encoding = unicode::EUTFE_UTF16_LE;
1002 //! Constructor for copying a UTF-32 from a pointer with a given length.
1003 ustring16(const uchar32_t* const c, u32 length)
1004 : array(0), allocated(0), used(0)
1006 #if __BYTE_ORDER == __BIG_ENDIAN
1007 encoding = unicode::EUTFE_UTF16_BE;
1009 encoding = unicode::EUTFE_UTF16_LE;
1016 //! Constructor for copying a wchar_t string from a pointer.
1017 ustring16(const wchar_t* const c)
1018 : array(0), allocated(0), used(0)
1020 #if __BYTE_ORDER == __BIG_ENDIAN
1021 encoding = unicode::EUTFE_UTF16_BE;
1023 encoding = unicode::EUTFE_UTF16_LE;
1026 if (sizeof(wchar_t) == 4)
1027 append(reinterpret_cast<const uchar32_t* const>(c));
1028 else if (sizeof(wchar_t) == 2)
1029 append(reinterpret_cast<const uchar16_t* const>(c));
1030 else if (sizeof(wchar_t) == 1)
1031 append(reinterpret_cast<const uchar8_t* const>(c));
1035 //! Constructor for copying a wchar_t string from a pointer with a given length.
1036 ustring16(const wchar_t* const c, u32 length)
1037 : array(0), allocated(0), used(0)
1039 #if __BYTE_ORDER == __BIG_ENDIAN
1040 encoding = unicode::EUTFE_UTF16_BE;
1042 encoding = unicode::EUTFE_UTF16_LE;
1045 if (sizeof(wchar_t) == 4)
1046 append(reinterpret_cast<const uchar32_t* const>(c), length);
1047 else if (sizeof(wchar_t) == 2)
1048 append(reinterpret_cast<const uchar16_t* const>(c), length);
1049 else if (sizeof(wchar_t) == 1)
1050 append(reinterpret_cast<const uchar8_t* const>(c), length);
1054 #ifdef USTRING_CPP0X
1055 //! Constructor for moving a ustring16
1056 ustring16(ustring16<TAlloc>&& other)
1057 : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1059 //std::cout << "MOVE constructor" << std::endl;
1061 other.allocated = 0;
1070 allocator.deallocate(array); // delete [] array;
1074 //! Assignment operator
1075 ustring16& operator=(const ustring16<TAlloc>& other)
1080 used = other.size_raw();
1081 if (used >= allocated)
1083 allocator.deallocate(array); // delete [] array;
1084 allocated = used + 1;
1085 array = allocator.allocate(used + 1); //new u16[used];
1088 const uchar16_t* p = other.c_str();
1089 for (u32 i=0; i<=used; ++i, ++p)
1094 // Validate our new UTF-16 string.
1101 #ifdef USTRING_CPP0X
1102 //! Move assignment operator
1103 ustring16& operator=(ustring16<TAlloc>&& other)
1107 //std::cout << "MOVE operator=" << std::endl;
1108 allocator.deallocate(array);
1110 array = other.array;
1111 allocated = other.allocated;
1112 encoding = other.encoding;
1122 //! Assignment operator for other string types
1123 template <class B, class A>
1124 ustring16<TAlloc>& operator=(const string<B, A>& other)
1126 *this = other.c_str();
1131 //! Assignment operator for UTF-8 strings
1132 ustring16<TAlloc>& operator=(const uchar8_t* const c)
1136 array = allocator.allocate(1); //new u16[1];
1141 if (!c) return *this;
1143 //! Append our string now.
1149 //! Assignment operator for UTF-16 strings
1150 ustring16<TAlloc>& operator=(const uchar16_t* const c)
1154 array = allocator.allocate(1); //new u16[1];
1159 if (!c) return *this;
1161 //! Append our string now.
1167 //! Assignment operator for UTF-32 strings
1168 ustring16<TAlloc>& operator=(const uchar32_t* const c)
1172 array = allocator.allocate(1); //new u16[1];
1177 if (!c) return *this;
1179 //! Append our string now.
1185 //! Assignment operator for wchar_t strings.
1186 /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1187 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1188 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1189 ustring16<TAlloc>& operator=(const wchar_t* const c)
1191 if (sizeof(wchar_t) == 4)
1192 *this = reinterpret_cast<const uchar32_t* const>(c);
1193 else if (sizeof(wchar_t) == 2)
1194 *this = reinterpret_cast<const uchar16_t* const>(c);
1195 else if (sizeof(wchar_t) == 1)
1196 *this = reinterpret_cast<const uchar8_t* const>(c);
1202 //! Assignment operator for other strings.
1203 /** Note that this assumes that a correct unicode string is stored in the string. **/
1205 ustring16<TAlloc>& operator=(const B* const c)
1208 *this = reinterpret_cast<const uchar32_t* const>(c);
1209 else if (sizeof(B) == 2)
1210 *this = reinterpret_cast<const uchar16_t* const>(c);
1211 else if (sizeof(B) == 1)
1212 *this = reinterpret_cast<const uchar8_t* const>(c);
1218 //! Direct access operator
1219 access operator [](const u32 index)
1221 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1222 iterator iter(*this, index);
1223 return iter.operator*();
1227 //! Direct access operator
1228 const access operator [](const u32 index) const
1230 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1231 const_iterator iter(*this, index);
1232 return iter.operator*();
1236 //! Equality operator
1237 bool operator ==(const uchar16_t* const str) const
1243 for(i=0; array[i] && str[i]; ++i)
1244 if (array[i] != str[i])
1247 return !array[i] && !str[i];
1251 //! Equality operator
1252 bool operator ==(const ustring16<TAlloc>& other) const
1254 for(u32 i=0; array[i] && other.array[i]; ++i)
1255 if (array[i] != other.array[i])
1258 return used == other.used;
1262 //! Is smaller comparator
1263 bool operator <(const ustring16<TAlloc>& other) const
1265 for(u32 i=0; array[i] && other.array[i]; ++i)
1267 s32 diff = array[i] - other.array[i];
1272 return used < other.used;
1276 //! Inequality operator
1277 bool operator !=(const uchar16_t* const str) const
1279 return !(*this == str);
1283 //! Inequality operator
1284 bool operator !=(const ustring16<TAlloc>& other) const
1286 return !(*this == other);
1290 //! Returns the length of a ustring16 in full characters.
1291 //! \return Length of a ustring16 in full characters.
1294 const_iterator i(*this, 0);
1305 //! Informs if the ustring is empty or not.
1306 //! \return True if the ustring is empty, false if not.
1309 return (size_raw() == 0);
1313 //! Returns a pointer to the raw UTF-16 string data.
1314 //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1315 const uchar16_t* c_str() const
1321 //! Compares the first n characters of this string with another.
1322 //! \param other Other string to compare to.
1323 //! \param n Number of characters to compare.
1324 //! \return True if the n first characters of both strings are equal.
1325 bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1328 const uchar16_t* oa = other.c_str();
1329 for(i=0; array[i] && oa[i] && i < n; ++i)
1330 if (array[i] != oa[i])
1333 // if one (or both) of the strings was smaller then they
1334 // are only equal if they have the same length
1335 return (i == n) || (used == other.used);
1339 //! Compares the first n characters of this string with another.
1340 //! \param str Other string to compare to.
1341 //! \param n Number of characters to compare.
1342 //! \return True if the n first characters of both strings are equal.
1343 bool equalsn(const uchar16_t* const str, u32 n) const
1348 for(i=0; array[i] && str[i] && i < n; ++i)
1349 if (array[i] != str[i])
1352 // if one (or both) of the strings was smaller then they
1353 // are only equal if they have the same length
1354 return (i == n) || (array[i] == 0 && str[i] == 0);
1358 //! Appends a character to this ustring16
1359 //! \param character The character to append.
1360 //! \return A reference to our current string.
1361 ustring16<TAlloc>& append(uchar32_t character)
1363 if (used + 2 >= allocated)
1364 reallocate(used + 2);
1366 if (character > 0xFFFF)
1370 // character will be multibyte, so split it up into a surrogate pair.
1371 uchar16_t x = static_cast<uchar16_t>(character);
1372 uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1373 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1380 array[used-1] = character;
1388 //! Appends a UTF-8 string to this ustring16
1389 //! \param other The UTF-8 string to append.
1390 //! \param length The length of the string to append.
1391 //! \return A reference to our current string.
1392 ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1397 // Determine if the string is long enough for a BOM.
1399 const uchar8_t* p = other;
1403 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1406 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1407 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1409 if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1410 c_bom = unicode::EUTFE_UTF8;
1413 // If a BOM was found, don't include it in the string.
1414 const uchar8_t* c2 = other;
1415 if (c_bom != unicode::EUTFE_NONE)
1417 c2 = other + unicode::BOM_UTF8_LEN;
1418 length -= unicode::BOM_UTF8_LEN;
1421 // Calculate the size of the string to read in.
1427 } while(*p++ && len < length);
1431 // If we need to grow the array, do it now.
1432 if (used + len >= allocated)
1433 reallocate(used + (len * 2));
1436 // Convert UTF-8 to UTF-16.
1438 for (u32 l = 0; l<len;)
1441 if (((c2[l] >> 6) & 0x03) == 0x02)
1442 { // Invalid continuation byte.
1443 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1446 else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1447 { // Invalid byte - overlong encoding.
1448 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1451 else if ((c2[l] & 0xF8) == 0xF0)
1452 { // 4 bytes UTF-8, 2 bytes UTF-16.
1453 // Check for a full string.
1456 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1464 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1465 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1466 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1469 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1475 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1476 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1477 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1478 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1480 // Split v up into a surrogate pair.
1481 uchar16_t x = static_cast<uchar16_t>(v);
1482 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1483 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1488 ++used; // Using two shorts this time, so increase used by 1.
1490 else if ((c2[l] & 0xF0) == 0xE0)
1491 { // 3 bytes UTF-8, 1 byte UTF-16.
1492 // Check for a full string.
1495 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1503 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1504 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1507 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1513 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1514 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1515 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1519 else if ((c2[l] & 0xE0) == 0xC0)
1520 { // 2 bytes UTF-8, 1 byte UTF-16.
1521 // Check for a full string.
1524 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1530 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1532 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1538 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1539 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1540 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1545 { // 1 byte UTF-8, 1 byte UTF-16.
1548 { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
1549 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1551 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1557 // Validate our new UTF-16 string.
1564 //! Appends a UTF-16 string to this ustring16
1565 //! \param other The UTF-16 string to append.
1566 //! \param length The length of the string to append.
1567 //! \return A reference to our current string.
1568 ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1573 // Determine if the string is long enough for a BOM.
1575 const uchar16_t* p = other;
1579 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1581 // Check for the BOM to determine the string's endianness.
1582 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1583 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1584 c_end = unicode::EUTFEE_LITTLE;
1585 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1586 c_end = unicode::EUTFEE_BIG;
1588 // If a BOM was found, don't include it in the string.
1589 const uchar16_t* c2 = other;
1590 if (c_end != unicode::EUTFEE_NATIVE)
1592 c2 = other + unicode::BOM_UTF16_LEN;
1593 length -= unicode::BOM_UTF16_LEN;
1596 // Calculate the size of the string to read in.
1602 } while(*p++ && len < length);
1606 // If we need to grow the size of the array, do it now.
1607 if (used + len >= allocated)
1608 reallocate(used + (len * 2));
1612 // Copy the string now.
1613 unicode::EUTF_ENDIAN m_end = getEndianness();
1614 for (u32 l = start; l < start + len; ++l)
1616 array[l] = (uchar16_t)c2[l];
1617 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1618 array[l] = unicode::swapEndian16(array[l]);
1623 // Validate our new UTF-16 string.
1629 //! Appends a UTF-32 string to this ustring16
1630 //! \param other The UTF-32 string to append.
1631 //! \param length The length of the string to append.
1632 //! \return A reference to our current string.
1633 ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1638 // Check for the BOM to determine the string's endianness.
1639 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1640 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1641 c_end = unicode::EUTFEE_LITTLE;
1642 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1643 c_end = unicode::EUTFEE_BIG;
1645 // If a BOM was found, don't include it in the string.
1646 const uchar32_t* c2 = other;
1647 if (c_end != unicode::EUTFEE_NATIVE)
1649 c2 = other + unicode::BOM_UTF32_LEN;
1650 length -= unicode::BOM_UTF32_LEN;
1653 // Calculate the size of the string to read in.
1655 const uchar32_t* p = c2;
1659 } while(*p++ && len < length);
1663 // If we need to grow the size of the array, do it now.
1664 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1665 if (used + (len * 2) >= allocated)
1666 reallocate(used + ((len * 2) * 2));
1669 // Convert UTF-32 to UTF-16.
1670 unicode::EUTF_ENDIAN m_end = getEndianness();
1672 for (u32 l = 0; l<len; ++l)
1676 uchar32_t ch = c2[l];
1677 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1678 ch = unicode::swapEndian32(ch);
1682 // Split ch up into a surrogate pair as it is over 16 bits long.
1683 uchar16_t x = static_cast<uchar16_t>(ch);
1684 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1685 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1688 ++used; // Using two shorts, so increased used again.
1690 else if (ch >= 0xD800 && ch <= 0xDFFF)
1692 // Between possible UTF-16 surrogates (invalid!)
1693 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1695 else array[pos++] = static_cast<uchar16_t>(ch);
1699 // Validate our new UTF-16 string.
1706 //! Appends a ustring16 to this ustring16
1707 //! \param other The string to append to this one.
1708 //! \return A reference to our current string.
1709 ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1711 const uchar16_t* oa = other.c_str();
1713 u32 len = other.size_raw();
1715 if (used + len >= allocated)
1716 reallocate(used + len);
1718 for (u32 l=0; l<len; ++l)
1719 array[used+l] = oa[l];
1728 //! Appends a certain amount of characters of a ustring16 to this ustring16.
1729 //! \param other The string to append to this one.
1730 //! \param length How many characters of the other string to add to this one.
1731 //! \return A reference to our current string.
1732 ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1734 if (other.size() == 0)
1737 if (other.size() < length)
1743 if (used + length * 2 >= allocated)
1744 reallocate(used + length * 2);
1746 const_iterator iter(other, 0);
1748 while (!iter.atEnd() && l)
1750 uchar32_t c = *iter;
1760 //! Reserves some memory.
1761 //! \param count The amount of characters to reserve.
1762 void reserve(u32 count)
1764 if (count < allocated)
1771 //! Finds first occurrence of character.
1772 //! \param c The character to search for.
1773 //! \return Position where the character has been found, or -1 if not found.
1774 s32 findFirst(uchar32_t c) const
1776 const_iterator i(*this, 0);
1791 //! Finds first occurrence of a character of a list.
1792 //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1793 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1794 //! \return Position where one of the characters has been found, or -1 if not found.
1795 s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1800 const_iterator i(*this, 0);
1806 for (u32 j=0; j<count; ++j)
1817 //! Finds first position of a character not in a given list.
1818 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1819 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1820 //! \return Position where the character has been found, or -1 if not found.
1821 s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1826 const_iterator i(*this, 0);
1833 for (j=0; j<count; ++j)
1846 //! Finds last position of a character not in a given list.
1847 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1848 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1849 //! \return Position where the character has been found, or -1 if not found.
1850 s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1855 const_iterator i(end());
1858 s32 pos = size() - 1;
1859 while (!i.atStart())
1863 for (j=0; j<count; ++j)
1876 //! Finds next occurrence of character.
1877 //! \param c The character to search for.
1878 //! \param startPos The position in the string to start searching.
1879 //! \return Position where the character has been found, or -1 if not found.
1880 s32 findNext(uchar32_t c, u32 startPos) const
1882 const_iterator i(*this, startPos);
1898 //! Finds last occurrence of character.
1899 //! \param c The character to search for.
1900 //! \param start The start position of the reverse search ( default = -1, on end ).
1901 //! \return Position where the character has been found, or -1 if not found.
1902 s32 findLast(uchar32_t c, s32 start = -1) const
1905 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1907 const_iterator i(*this, start);
1909 while (!i.atStart())
1921 //! Finds last occurrence of a character in a list.
1922 //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1923 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1924 //! \return Position where one of the characters has been found, or -1 if not found.
1925 s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1930 const_iterator i(end());
1934 while (!i.atStart())
1937 for (u32 j=0; j<count; ++j)
1948 //! Finds another ustring16 in this ustring16.
1949 //! \param str The string to find.
1950 //! \param start The start position of the search.
1951 //! \return Positions where the ustring16 has been found, or -1 if not found.
1952 s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1954 u32 my_size = size();
1955 u32 their_size = str.size();
1957 if (their_size == 0 || my_size - start < their_size)
1960 const_iterator i(*this, start);
1965 const_iterator i2(i);
1966 const_iterator j(str, 0);
1967 uchar32_t t1 = (uchar32_t)*i2;
1968 uchar32_t t2 = (uchar32_t)*j;
1975 t1 = (uchar32_t)*i2;
1986 //! Finds another ustring16 in this ustring16.
1987 //! \param str The string to find.
1988 //! \param start The start position of the search.
1989 //! \return Positions where the string has been found, or -1 if not found.
1990 s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1992 const uchar16_t* data = str.c_str();
2003 for (u32 i=start; i<=used-len; ++i)
2007 while(data[j] && array[i+j] == data[j])
2019 //! Returns a substring.
2020 //! \param begin: Start of substring.
2021 //! \param length: Length of substring.
2022 //! \return A reference to our current string.
2023 ustring16<TAlloc> subString(u32 begin, s32 length) const
2026 // if start after ustring16
2027 // or no proper substring length
2028 if ((length <= 0) || (begin>=len))
2029 return ustring16<TAlloc>("");
2030 // clamp length to maximal value
2031 if ((length+begin) > len)
2034 ustring16<TAlloc> o;
2035 o.reserve((length+1) * 2);
2037 const_iterator i(*this, begin);
2038 while (!i.atEnd() && length)
2049 //! Appends a character to this ustring16.
2050 //! \param c Character to append.
2051 //! \return A reference to our current string.
2052 ustring16<TAlloc>& operator += (char c)
2054 append((uchar32_t)c);
2059 //! Appends a character to this ustring16.
2060 //! \param c Character to append.
2061 //! \return A reference to our current string.
2062 ustring16<TAlloc>& operator += (uchar32_t c)
2069 //! Appends a number to this ustring16.
2070 //! \param c Number to append.
2071 //! \return A reference to our current string.
2072 ustring16<TAlloc>& operator += (short c)
2074 append(core::stringc(c));
2079 //! Appends a number to this ustring16.
2080 //! \param c Number to append.
2081 //! \return A reference to our current string.
2082 ustring16<TAlloc>& operator += (unsigned short c)
2084 append(core::stringc(c));
2089 #ifdef USTRING_CPP0X_NEWLITERALS
2090 //! Appends a number to this ustring16.
2091 //! \param c Number to append.
2092 //! \return A reference to our current string.
2093 ustring16<TAlloc>& operator += (int c)
2095 append(core::stringc(c));
2100 //! Appends a number to this ustring16.
2101 //! \param c Number to append.
2102 //! \return A reference to our current string.
2103 ustring16<TAlloc>& operator += (unsigned int c)
2105 append(core::stringc(c));
2111 //! Appends a number to this ustring16.
2112 //! \param c Number to append.
2113 //! \return A reference to our current string.
2114 ustring16<TAlloc>& operator += (long c)
2116 append(core::stringc(c));
2121 //! Appends a number to this ustring16.
2122 //! \param c Number to append.
2123 //! \return A reference to our current string.
2124 ustring16<TAlloc>& operator += (unsigned long c)
2126 append(core::stringc(c));
2131 //! Appends a number to this ustring16.
2132 //! \param c Number to append.
2133 //! \return A reference to our current string.
2134 ustring16<TAlloc>& operator += (double c)
2136 append(core::stringc(c));
2141 //! Appends a char ustring16 to this ustring16.
2142 //! \param c Char ustring16 to append.
2143 //! \return A reference to our current string.
2144 ustring16<TAlloc>& operator += (const uchar16_t* const c)
2151 //! Appends a ustring16 to this ustring16.
2152 //! \param other ustring16 to append.
2153 //! \return A reference to our current string.
2154 ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2161 //! Replaces all characters of a given type with another one.
2162 //! \param toReplace Character to replace.
2163 //! \param replaceWith Character replacing the old one.
2164 //! \return A reference to our current string.
2165 ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2167 iterator i(*this, 0);
2170 typename ustring16<TAlloc>::access a = *i;
2171 if ((uchar32_t)a == toReplace)
2179 //! Replaces all instances of a string with another one.
2180 //! \param toReplace The string to replace.
2181 //! \param replaceWith The string replacing the old one.
2182 //! \return A reference to our current string.
2183 ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2185 if (toReplace.size() == 0)
2188 const uchar16_t* other = toReplace.c_str();
2189 const uchar16_t* replace = replaceWith.c_str();
2190 const u32 other_size = toReplace.size_raw();
2191 const u32 replace_size = replaceWith.size_raw();
2193 // Determine the delta. The algorithm will change depending on the delta.
2194 s32 delta = replace_size - other_size;
2196 // A character for character replace. The string will not shrink or grow.
2200 while ((pos = find_raw(other, pos)) != -1)
2202 for (u32 i = 0; i < replace_size; ++i)
2203 array[pos + i] = replace[i];
2209 // We are going to be removing some characters. The string will shrink.
2213 for (u32 pos = 0; pos <= used; ++i, ++pos)
2215 // Is this potentially a match?
2216 if (array[pos] == *other)
2218 // Check to see if we have a match.
2220 for (j = 0; j < other_size; ++j)
2222 if (array[pos + j] != other[j])
2226 // If we have a match, replace characters.
2227 if (j == other_size)
2229 for (j = 0; j < replace_size; ++j)
2230 array[i + j] = replace[j];
2231 i += replace_size - 1;
2232 pos += other_size - 1;
2237 // No match found, just copy characters.
2238 array[i - 1] = array[pos];
2246 // We are going to be adding characters, so the string size will increase.
2247 // Count the number of times toReplace exists in the string so we can allocate the new size.
2250 while ((pos = find_raw(other, pos)) != -1)
2256 // Re-allocate the string now, if needed.
2257 u32 len = delta * find_count;
2258 if (used + len >= allocated)
2259 reallocate(used + len);
2263 while ((pos = find_raw(other, pos)) != -1)
2265 uchar16_t* start = array + pos + other_size - 1;
2266 uchar16_t* ptr = array + used;
2267 uchar16_t* end = array + used + delta;
2269 // Shift characters to make room for the string.
2270 while (ptr != start)
2277 // Add the new string now.
2278 for (u32 i = 0; i < replace_size; ++i)
2279 array[pos + i] = replace[i];
2281 pos += replace_size;
2285 // Terminate the string and return ourself.
2291 //! Removes characters from a ustring16..
2292 //! \param c The character to remove.
2293 //! \return A reference to our current string.
2294 ustring16<TAlloc>& remove(uchar32_t c)
2298 u32 len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2299 for (u32 i=0; i<=used; ++i)
2302 if (!UTF16_IS_SURROGATE_HI(array[i]))
2304 else if (i + 1 <= used)
2306 // Convert the surrogate pair into a single UTF-32 character.
2307 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2309 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2317 array[pos++] = array[i];
2319 array[pos++] = array[++i];
2327 //! Removes a ustring16 from the ustring16.
2328 //! \param toRemove The string to remove.
2329 //! \return A reference to our current string.
2330 ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2332 u32 size = toRemove.size_raw();
2333 if (size == 0) return *this;
2335 const uchar16_t* tra = toRemove.c_str();
2338 for (u32 i=0; i<=used; ++i)
2343 if (array[i + j] != tra[j])
2354 array[pos++] = array[i];
2362 //! Removes characters from the ustring16.
2363 //! \param characters The characters to remove.
2364 //! \return A reference to our current string.
2365 ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2367 if (characters.size_raw() == 0)
2372 const_iterator iter(characters);
2373 for (u32 i=0; i<=used; ++i)
2376 if (!UTF16_IS_SURROGATE_HI(array[i]))
2378 else if (i + 1 <= used)
2380 // Convert the surrogate pair into a single UTF-32 character.
2381 uc32 = unicode::toUTF32(array[i], array[i+1]);
2383 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2387 while (!iter.atEnd())
2389 uchar32_t c = *iter;
2392 found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2401 array[pos++] = array[i];
2403 array[pos++] = array[++i];
2411 //! Trims the ustring16.
2412 //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2413 //! \param whitespace The characters that are to be considered as whitespace.
2414 //! \return A reference to our current string.
2415 ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2417 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2419 // find start and end of the substring without the specified characters
2420 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2424 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2426 return (*this = subString(begin, (end +1) - begin));
2430 //! Erases a character from the ustring16.
2431 //! May be slow, because all elements following after the erased element have to be copied.
2432 //! \param index Index of element to be erased.
2433 //! \return A reference to our current string.
2434 ustring16<TAlloc>& erase(u32 index)
2436 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2438 iterator i(*this, index);
2441 u32 len = (t > 0xFFFF ? 2 : 1);
2443 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2444 array[j - len] = array[j];
2453 //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2454 //! \return A reference to our current string.
2455 ustring16<TAlloc>& validate()
2457 // Validate all unicode characters.
2458 for (u32 i=0; i<allocated; ++i)
2460 // Terminate on existing null.
2466 if (UTF16_IS_SURROGATE(array[i]))
2468 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2469 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2470 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2471 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2474 if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2475 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2482 used = allocated - 1;
2489 //! Gets the last char of the ustring16, or 0.
2490 //! \return The last char of the ustring16, or 0.
2491 uchar32_t lastChar() const
2496 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2498 // Make sure we have a paired surrogate.
2502 // Check for an invalid surrogate.
2503 if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2506 // Convert the surrogate pair into a single UTF-32 character.
2507 return unicode::toUTF32(array[used-2], array[used-1]);
2511 return array[used-1];
2516 //! Split the ustring16 into parts.
2517 /** This method will split a ustring16 at certain delimiter characters
2518 into the container passed in as reference. The type of the container
2519 has to be given as template parameter. It must provide a push_back and
2521 \param ret The result container
2522 \param c C-style ustring16 of delimiter characters
2523 \param count Number of delimiter characters
2524 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2525 container. If two delimiters occur without a character in between, an
2526 empty substring would be placed in the result. If this flag is set,
2527 only non-empty strings are stored.
2528 \param keepSeparators Flag which allows to add the separator to the
2529 result ustring16. If this flag is true, the concatenation of the
2530 substrings results in the original ustring16. Otherwise, only the
2531 characters between the delimiters are returned.
2532 \return The number of resulting substrings
2534 template<class container>
2535 u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2540 const_iterator i(*this);
2541 const u32 oldSize=ret.size();
2545 bool lastWasSeparator = false;
2549 bool foundSeparator = false;
2550 for (u32 j=0; j<count; ++j)
2554 if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2556 ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2557 foundSeparator = true;
2558 lastpos = (keepSeparators ? pos : pos + 1);
2559 lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2563 lastWasSeparator = foundSeparator;
2569 ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2570 return ret.size()-oldSize;
2574 //! Split the ustring16 into parts.
2575 /** This method will split a ustring16 at certain delimiter characters
2576 into the container passed in as reference. The type of the container
2577 has to be given as template parameter. It must provide a push_back and
2579 \param ret The result container
2580 \param c A unicode string of delimiter characters
2581 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2582 container. If two delimiters occur without a character in between, an
2583 empty substring would be placed in the result. If this flag is set,
2584 only non-empty strings are stored.
2585 \param keepSeparators Flag which allows to add the separator to the
2586 result ustring16. If this flag is true, the concatenation of the
2587 substrings results in the original ustring16. Otherwise, only the
2588 characters between the delimiters are returned.
2589 \return The number of resulting substrings
2591 template<class container>
2592 u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2594 core::array<uchar32_t> v = c.toUTF32();
2595 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2599 //! Gets the size of the allocated memory buffer for the string.
2600 //! \return The size of the allocated memory buffer.
2601 u32 capacity() const
2607 //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2608 //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2609 u32 size_raw() const
2615 //! Inserts a character into the string.
2616 //! \param c The character to insert.
2617 //! \param pos The position to insert the character.
2618 //! \return A reference to our current string.
2619 ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2621 u8 len = (c > 0xFFFF ? 2 : 1);
2623 if (used + len >= allocated)
2624 reallocate(used + len);
2628 iterator iter(*this, pos);
2629 for (u32 i = used - 2; i > iter.getPos(); --i)
2630 array[i] = array[i - len];
2634 // c will be multibyte, so split it up into a surrogate pair.
2635 uchar16_t x = static_cast<uchar16_t>(c);
2636 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2637 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2638 array[iter.getPos()] = vh;
2639 array[iter.getPos()+1] = vl;
2643 array[iter.getPos()] = static_cast<uchar16_t>(c);
2650 //! Inserts a string into the string.
2651 //! \param c The string to insert.
2652 //! \param pos The position to insert the string.
2653 //! \return A reference to our current string.
2654 ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2656 u32 len = c.size_raw();
2657 if (len == 0) return *this;
2659 if (used + len >= allocated)
2660 reallocate(used + len);
2664 iterator iter(*this, pos);
2665 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2666 array[i] = array[i - len];
2668 const uchar16_t* s = c.c_str();
2669 for (u32 i = 0; i < len; ++i)
2680 //! Inserts a character into the string.
2681 //! \param c The character to insert.
2682 //! \param pos The position to insert the character.
2683 //! \return A reference to our current string.
2684 ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2686 if (used + 1 >= allocated)
2687 reallocate(used + 1);
2691 for (u32 i = used - 1; i > pos; --i)
2692 array[i] = array[i - 1];
2700 //! Removes a character from string.
2701 //! \param pos Position of the character to remove.
2702 //! \return A reference to our current string.
2703 ustring16<TAlloc>& erase_raw(u32 pos)
2705 for (u32 i=pos; i<=used; ++i)
2707 array[i] = array[i + 1];
2715 //! Replaces a character in the string.
2716 //! \param c The new character.
2717 //! \param pos The position of the character to replace.
2718 //! \return A reference to our current string.
2719 ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2726 //! Returns an iterator to the beginning of the string.
2727 //! \return An iterator to the beginning of the string.
2730 iterator i(*this, 0);
2735 //! Returns an iterator to the beginning of the string.
2736 //! \return An iterator to the beginning of the string.
2737 const_iterator begin() const
2739 const_iterator i(*this, 0);
2744 //! Returns an iterator to the beginning of the string.
2745 //! \return An iterator to the beginning of the string.
2746 const_iterator cbegin() const
2748 const_iterator i(*this, 0);
2753 //! Returns an iterator to the end of the string.
2754 //! \return An iterator to the end of the string.
2757 iterator i(*this, 0);
2763 //! Returns an iterator to the end of the string.
2764 //! \return An iterator to the end of the string.
2765 const_iterator end() const
2767 const_iterator i(*this, 0);
2773 //! Returns an iterator to the end of the string.
2774 //! \return An iterator to the end of the string.
2775 const_iterator cend() const
2777 const_iterator i(*this, 0);
2783 //! Converts the string to a UTF-8 encoded string.
2784 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2785 //! \return A string containing the UTF-8 encoded string.
2786 core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2788 core::string<uchar8_t> ret;
2789 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2790 const_iterator iter(*this, 0);
2792 // Add the byte order mark if the user wants it.
2795 ret.append(unicode::BOM_ENCODE_UTF8[0]);
2796 ret.append(unicode::BOM_ENCODE_UTF8[1]);
2797 ret.append(unicode::BOM_ENCODE_UTF8[2]);
2800 while (!iter.atEnd())
2802 uchar32_t c = *iter;
2805 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2806 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2807 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2808 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2816 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2817 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2818 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2825 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2826 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2832 ret.append(static_cast<uchar8_t>(c));
2840 //! Converts the string to a UTF-8 encoded string array.
2841 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2842 //! \return An array containing the UTF-8 encoded string.
2843 core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2845 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2846 const_iterator iter(*this, 0);
2848 // Add the byte order mark if the user wants it.
2851 ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2852 ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2853 ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2856 while (!iter.atEnd())
2858 uchar32_t c = *iter;
2861 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2862 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2863 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2864 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2872 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2873 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2874 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2881 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2882 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2888 ret.push_back(static_cast<uchar8_t>(c));
2897 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2898 //! Converts the string to a UTF-16 encoded string.
2899 //! \param endian The desired endianness of the string.
2900 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2901 //! \return A string containing the UTF-16 encoded string.
2902 core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2904 core::string<char16_t> ret;
2905 ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2907 // Add the BOM if specified.
2910 if (endian == unicode::EUTFEE_NATIVE)
2911 ret[0] = unicode::BOM;
2912 else if (endian == unicode::EUTFEE_LITTLE)
2914 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2915 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2916 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2920 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2921 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2922 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2927 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2929 char16_t* ptr = ret.c_str();
2930 for (u32 i = 0; i < ret.size(); ++i)
2931 *ptr++ = unicode::swapEndian16(*ptr);
2938 //! Converts the string to a UTF-16 encoded string array.
2939 //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2940 //! \param endian The desired endianness of the string.
2941 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2942 //! \return An array containing the UTF-16 encoded string.
2943 core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2945 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2946 uchar16_t* ptr = ret.pointer();
2948 // Add the BOM if specified.
2951 if (endian == unicode::EUTFEE_NATIVE)
2952 *ptr = unicode::BOM;
2953 else if (endian == unicode::EUTFEE_LITTLE)
2955 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2956 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2957 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2961 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2962 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2963 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2968 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2969 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2971 for (u32 i = 0; i <= used; ++i)
2972 ptr[i] = unicode::swapEndian16(ptr[i]);
2974 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2980 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2981 //! Converts the string to a UTF-32 encoded string.
2982 //! \param endian The desired endianness of the string.
2983 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2984 //! \return A string containing the UTF-32 encoded string.
2985 core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2987 core::string<char32_t> ret;
2988 ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2989 const_iterator iter(*this, 0);
2991 // Add the BOM if specified.
2994 if (endian == unicode::EUTFEE_NATIVE)
2995 ret.append(unicode::BOM);
3004 if (endian == unicode::EUTFEE_LITTLE)
3006 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3007 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3008 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3009 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3013 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3014 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3015 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3016 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3022 while (!iter.atEnd())
3024 uchar32_t c = *iter;
3025 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3026 c = unicode::swapEndian32(c);
3035 //! Converts the string to a UTF-32 encoded string array.
3036 //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
3037 //! \param endian The desired endianness of the string.
3038 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3039 //! \return An array containing the UTF-32 encoded string.
3040 core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3042 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3043 const_iterator iter(*this, 0);
3045 // Add the BOM if specified.
3048 if (endian == unicode::EUTFEE_NATIVE)
3049 ret.push_back(unicode::BOM);
3058 if (endian == unicode::EUTFEE_LITTLE)
3060 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3061 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3062 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3063 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3067 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3068 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3069 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3070 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3072 ret.push_back(t.full);
3077 while (!iter.atEnd())
3079 uchar32_t c = *iter;
3080 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3081 c = unicode::swapEndian32(c);
3089 //! Converts the string to a wchar_t encoded string.
3090 /** The size of a wchar_t changes depending on the platform. This function will store a
3091 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3092 //! \param endian The desired endianness of the string.
3093 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3094 //! \return A string containing the wchar_t encoded string.
3095 core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3097 if (sizeof(wchar_t) == 4)
3099 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3100 core::stringw ret(a.pointer());
3103 else if (sizeof(wchar_t) == 2)
3105 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3107 core::stringw ret(array);
3112 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3113 core::stringw ret(a.pointer());
3117 else if (sizeof(wchar_t) == 1)
3119 core::array<uchar8_t> a(toUTF8(addBOM));
3120 core::stringw ret(a.pointer());
3124 // Shouldn't happen.
3125 return core::stringw();
3129 //! Converts the string to a wchar_t encoded string array.
3130 /** The size of a wchar_t changes depending on the platform. This function will store a
3131 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3132 //! \param endian The desired endianness of the string.
3133 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3134 //! \return An array containing the wchar_t encoded string.
3135 core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3137 if (sizeof(wchar_t) == 4)
3139 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3140 core::array<wchar_t> ret(a.size());
3141 ret.set_used(a.size());
3142 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3145 if (sizeof(wchar_t) == 2)
3147 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3149 core::array<wchar_t> ret(used);
3151 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3156 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3157 core::array<wchar_t> ret(a.size());
3158 ret.set_used(a.size());
3159 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3163 if (sizeof(wchar_t) == 1)
3165 core::array<uchar8_t> a(toUTF8(addBOM));
3166 core::array<wchar_t> ret(a.size());
3167 ret.set_used(a.size());
3168 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3172 // Shouldn't happen.
3173 return core::array<wchar_t>();
3176 //! Converts the string to a properly encoded io::path string.
3177 //! \param endian The desired endianness of the string.
3178 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3179 //! \return An io::path string containing the properly encoded string.
3180 io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3182 #if defined(_IRR_WCHAR_FILESYSTEM)
3183 return toWCHAR_s(endian, addBOM);
3185 return toUTF8_s(addBOM);
3189 //! Loads an unknown stream of data.
3190 //! Will attempt to determine if the stream is unicode data. Useful for loading from files.
3191 //! \param data The data stream to load from.
3192 //! \param data_size The length of the data string.
3193 //! \return A reference to our current string.
3194 ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3196 // Clear our string.
3201 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3205 case unicode::EUTFE_UTF8:
3206 append((uchar8_t*)data, data_size);
3209 case unicode::EUTFE_UTF16:
3210 case unicode::EUTFE_UTF16_BE:
3211 case unicode::EUTFE_UTF16_LE:
3212 append((uchar16_t*)data, data_size / 2);
3215 case unicode::EUTFE_UTF32:
3216 case unicode::EUTFE_UTF32_BE:
3217 case unicode::EUTFE_UTF32_LE:
3218 append((uchar32_t*)data, data_size / 4);
3225 //! Gets the encoding of the Unicode string this class contains.
3226 //! \return An enum describing the current encoding of this string.
3227 const unicode::EUTF_ENCODE getEncoding() const
3232 //! Gets the endianness of the Unicode string this class contains.
3233 //! \return An enum describing the endianness of this string.
3234 const unicode::EUTF_ENDIAN getEndianness() const
3236 if (encoding == unicode::EUTFE_UTF16_LE ||
3237 encoding == unicode::EUTFE_UTF32_LE)
3238 return unicode::EUTFEE_LITTLE;
3239 else return unicode::EUTFEE_BIG;
3244 //! Reallocate the string, making it bigger or smaller.
3245 //! \param new_size The new size of the string.
3246 void reallocate(u32 new_size)
3248 uchar16_t* old_array = array;
3250 array = allocator.allocate(new_size + 1); //new u16[new_size];
3251 allocated = new_size + 1;
3252 if (old_array == 0) return;
3254 u32 amount = used < new_size ? used : new_size;
3255 for (u32 i=0; i<=amount; ++i)
3256 array[i] = old_array[i];
3258 if (allocated <= used)
3259 used = allocated - 1;
3263 allocator.deallocate(old_array); // delete [] old_array;
3266 //--- member variables
3269 unicode::EUTF_ENCODE encoding;
3273 //irrAllocator<uchar16_t> allocator;
3276 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3279 //! Appends two ustring16s.
3280 template <typename TAlloc>
3281 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3283 ustring16<TAlloc> ret(left);
3289 //! Appends a ustring16 and a null-terminated unicode string.
3290 template <typename TAlloc, class B>
3291 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3293 ustring16<TAlloc> ret(left);
3299 //! Appends a ustring16 and a null-terminated unicode string.
3300 template <class B, typename TAlloc>
3301 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3303 ustring16<TAlloc> ret(left);
3309 //! Appends a ustring16 and an Irrlicht string.
3310 template <typename TAlloc, typename B, typename BAlloc>
3311 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3313 ustring16<TAlloc> ret(left);
3319 //! Appends a ustring16 and an Irrlicht string.
3320 template <typename TAlloc, typename B, typename BAlloc>
3321 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3323 ustring16<TAlloc> ret(left);
3329 //! Appends a ustring16 and a std::basic_string.
3330 template <typename TAlloc, typename B, typename A, typename BAlloc>
3331 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3333 ustring16<TAlloc> ret(left);
3339 //! Appends a ustring16 and a std::basic_string.
3340 template <typename TAlloc, typename B, typename A, typename BAlloc>
3341 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3343 ustring16<TAlloc> ret(left);
3349 //! Appends a ustring16 and a char.
3350 template <typename TAlloc>
3351 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3353 ustring16<TAlloc> ret(left);
3359 //! Appends a ustring16 and a char.
3360 template <typename TAlloc>
3361 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3363 ustring16<TAlloc> ret(left);
3369 #ifdef USTRING_CPP0X_NEWLITERALS
3370 //! Appends a ustring16 and a uchar32_t.
3371 template <typename TAlloc>
3372 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3374 ustring16<TAlloc> ret(left);
3380 //! Appends a ustring16 and a uchar32_t.
3381 template <typename TAlloc>
3382 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3384 ustring16<TAlloc> ret(left);
3391 //! Appends a ustring16 and a short.
3392 template <typename TAlloc>
3393 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3395 ustring16<TAlloc> ret(left);
3396 ret += core::stringc(right);
3401 //! Appends a ustring16 and a short.
3402 template <typename TAlloc>
3403 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3405 ustring16<TAlloc> ret((core::stringc(left)));
3411 //! Appends a ustring16 and an unsigned short.
3412 template <typename TAlloc>
3413 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3415 ustring16<TAlloc> ret(left);
3416 ret += core::stringc(right);
3421 //! Appends a ustring16 and an unsigned short.
3422 template <typename TAlloc>
3423 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3425 ustring16<TAlloc> ret((core::stringc(left)));
3431 //! Appends a ustring16 and an int.
3432 template <typename TAlloc>
3433 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3435 ustring16<TAlloc> ret(left);
3436 ret += core::stringc(right);
3441 //! Appends a ustring16 and an int.
3442 template <typename TAlloc>
3443 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3445 ustring16<TAlloc> ret((core::stringc(left)));
3451 //! Appends a ustring16 and an unsigned int.
3452 template <typename TAlloc>
3453 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3455 ustring16<TAlloc> ret(left);
3456 ret += core::stringc(right);
3461 //! Appends a ustring16 and an unsigned int.
3462 template <typename TAlloc>
3463 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3465 ustring16<TAlloc> ret((core::stringc(left)));
3471 //! Appends a ustring16 and a long.
3472 template <typename TAlloc>
3473 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3475 ustring16<TAlloc> ret(left);
3476 ret += core::stringc(right);
3481 //! Appends a ustring16 and a long.
3482 template <typename TAlloc>
3483 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3485 ustring16<TAlloc> ret((core::stringc(left)));
3491 //! Appends a ustring16 and an unsigned long.
3492 template <typename TAlloc>
3493 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3495 ustring16<TAlloc> ret(left);
3496 ret += core::stringc(right);
3501 //! Appends a ustring16 and an unsigned long.
3502 template <typename TAlloc>
3503 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3505 ustring16<TAlloc> ret((core::stringc(left)));
3511 //! Appends a ustring16 and a float.
3512 template <typename TAlloc>
3513 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3515 ustring16<TAlloc> ret(left);
3516 ret += core::stringc(right);
3521 //! Appends a ustring16 and a float.
3522 template <typename TAlloc>
3523 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3525 ustring16<TAlloc> ret((core::stringc(left)));
3531 //! Appends a ustring16 and a double.
3532 template <typename TAlloc>
3533 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3535 ustring16<TAlloc> ret(left);
3536 ret += core::stringc(right);
3541 //! Appends a ustring16 and a double.
3542 template <typename TAlloc>
3543 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3545 ustring16<TAlloc> ret((core::stringc(left)));
3551 #ifdef USTRING_CPP0X
3552 //! Appends two ustring16s.
3553 template <typename TAlloc>
3554 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3556 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3557 right.insert(left, 0);
3558 return std::move(right);
3562 //! Appends two ustring16s.
3563 template <typename TAlloc>
3564 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3566 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3568 return std::move(left);
3572 //! Appends two ustring16s.
3573 template <typename TAlloc>
3574 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3576 //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3577 if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3578 (right.capacity() - right.size_raw() < left.size_raw()))
3581 return std::move(left);
3585 right.insert(left, 0);
3586 return std::move(right);
3591 //! Appends a ustring16 and a null-terminated unicode string.
3592 template <typename TAlloc, class B>
3593 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3595 //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3597 return std::move(left);
3601 //! Appends a ustring16 and a null-terminated unicode string.
3602 template <class B, typename TAlloc>
3603 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3605 //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3606 right.insert(left, 0);
3607 return std::move(right);
3611 //! Appends a ustring16 and an Irrlicht string.
3612 template <typename TAlloc, typename B, typename BAlloc>
3613 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3615 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3616 right.insert(left, 0);
3617 return std::move(right);
3621 //! Appends a ustring16 and an Irrlicht string.
3622 template <typename TAlloc, typename B, typename BAlloc>
3623 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3625 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3627 return std::move(left);
3631 //! Appends a ustring16 and a std::basic_string.
3632 template <typename TAlloc, typename B, typename A, typename BAlloc>
3633 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3635 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3636 right.insert(core::ustring16<TAlloc>(left), 0);
3637 return std::move(right);
3641 //! Appends a ustring16 and a std::basic_string.
3642 template <typename TAlloc, typename B, typename A, typename BAlloc>
3643 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3645 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3647 return std::move(left);
3651 //! Appends a ustring16 and a char.
3652 template <typename TAlloc>
3653 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3655 left.append((uchar32_t)right);
3656 return std::move(left);
3660 //! Appends a ustring16 and a char.
3661 template <typename TAlloc>
3662 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3664 right.insert((uchar32_t)left, 0);
3665 return std::move(right);
3669 #ifdef USTRING_CPP0X_NEWLITERALS
3670 //! Appends a ustring16 and a uchar32_t.
3671 template <typename TAlloc>
3672 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3675 return std::move(left);
3679 //! Appends a ustring16 and a uchar32_t.
3680 template <typename TAlloc>
3681 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3683 right.insert(left, 0);
3684 return std::move(right);
3689 //! Appends a ustring16 and a short.
3690 template <typename TAlloc>
3691 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3693 left.append(core::stringc(right));
3694 return std::move(left);
3698 //! Appends a ustring16 and a short.
3699 template <typename TAlloc>
3700 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3702 right.insert(core::stringc(left), 0);
3703 return std::move(right);
3707 //! Appends a ustring16 and an unsigned short.
3708 template <typename TAlloc>
3709 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3711 left.append(core::stringc(right));
3712 return std::move(left);
3716 //! Appends a ustring16 and an unsigned short.
3717 template <typename TAlloc>
3718 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3720 right.insert(core::stringc(left), 0);
3721 return std::move(right);
3725 //! Appends a ustring16 and an int.
3726 template <typename TAlloc>
3727 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3729 left.append(core::stringc(right));
3730 return std::move(left);
3734 //! Appends a ustring16 and an int.
3735 template <typename TAlloc>
3736 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3738 right.insert(core::stringc(left), 0);
3739 return std::move(right);
3743 //! Appends a ustring16 and an unsigned int.
3744 template <typename TAlloc>
3745 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3747 left.append(core::stringc(right));
3748 return std::move(left);
3752 //! Appends a ustring16 and an unsigned int.
3753 template <typename TAlloc>
3754 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3756 right.insert(core::stringc(left), 0);
3757 return std::move(right);
3761 //! Appends a ustring16 and a long.
3762 template <typename TAlloc>
3763 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3765 left.append(core::stringc(right));
3766 return std::move(left);
3770 //! Appends a ustring16 and a long.
3771 template <typename TAlloc>
3772 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3774 right.insert(core::stringc(left), 0);
3775 return std::move(right);
3779 //! Appends a ustring16 and an unsigned long.
3780 template <typename TAlloc>
3781 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3783 left.append(core::stringc(right));
3784 return std::move(left);
3788 //! Appends a ustring16 and an unsigned long.
3789 template <typename TAlloc>
3790 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3792 right.insert(core::stringc(left), 0);
3793 return std::move(right);
3797 //! Appends a ustring16 and a float.
3798 template <typename TAlloc>
3799 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3801 left.append(core::stringc(right));
3802 return std::move(left);
3806 //! Appends a ustring16 and a float.
3807 template <typename TAlloc>
3808 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3810 right.insert(core::stringc(left), 0);
3811 return std::move(right);
3815 //! Appends a ustring16 and a double.
3816 template <typename TAlloc>
3817 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3819 left.append(core::stringc(right));
3820 return std::move(left);
3824 //! Appends a ustring16 and a double.
3825 template <typename TAlloc>
3826 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3828 right.insert(core::stringc(left), 0);
3829 return std::move(right);
3834 #ifndef USTRING_NO_STL
3835 //! Writes a ustring16 to an ostream.
3836 template <typename TAlloc>
3837 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3839 out << in.toUTF8_s().c_str();
3843 //! Writes a ustring16 to a wostream.
3844 template <typename TAlloc>
3845 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3847 out << in.toWCHAR_s().c_str();
3853 #ifndef USTRING_NO_STL
3858 //! Hashing algorithm for hashing a ustring. Used for things like unordered_maps.
3859 //! Algorithm taken from std::hash<std::string>.
3860 class hash : public std::unary_function<core::ustring, size_t>
3863 size_t operator()(const core::ustring& s) const
3865 size_t ret = 2166136261U;
3867 size_t stride = 1 + s.size_raw() / 10;
3869 core::ustring::const_iterator i = s.begin();
3870 while (i != s.end())
3872 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
3873 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3881 } // end namespace unicode
3885 } // end namespace core
3886 } // end namespace irr