2 Basic Unicode string class for Irrlicht.
3 Copyright (c) 2009-2011 John Norman
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you
14 must not claim that you wrote the original software. If you use
15 this software in a product, an acknowledgment in the product
16 documentation would be appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
21 3. This notice may not be removed or altered from any source
24 The original version of this class can be located at:
25 http://irrlicht.suckerfreegames.com/
28 john@suckerfreegames.com
31 #ifndef __IRR_USTRING_H_INCLUDED__
32 #define __IRR_USTRING_H_INCLUDED__
34 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
35 # define USTRING_CPP0X
36 # if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
37 # define USTRING_CPP0X_NEWLITERALS
45 #define __BYTE_ORDER 0
46 #define __LITTLE_ENDIAN 0
47 #define __BIG_ENDIAN 1
49 #include <machine/endian.h>
58 #ifndef USTRING_NO_STL
65 #include "irrAllocator.h"
68 #include "irrString.h"
71 //! UTF-16 surrogate start values.
72 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
73 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
75 //! Is a UTF-16 code point a surrogate?
76 #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
77 #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
78 #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
84 // Define our character types.
85 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
86 typedef char32_t uchar32_t;
87 typedef char16_t uchar16_t;
88 typedef char uchar8_t;
90 typedef u32 uchar32_t;
91 typedef u16 uchar16_t;
101 //! The unicode replacement character. Used to replace invalid characters.
102 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
104 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
105 //! \param high The high value of the pair.
106 //! \param low The low value of the pair.
107 //! \return The UTF-32 character expressed by the surrogate pair.
108 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
110 // Convert the surrogate pair into a single UTF-32 character.
111 uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
112 uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
113 return (wu << 16) | x;
116 //! Swaps the endianness of a 16-bit value.
117 //! \return The new value.
118 inline uchar16_t swapEndian16(const uchar16_t& c)
120 return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
123 //! Swaps the endianness of a 32-bit value.
124 //! \return The new value.
125 inline uchar32_t swapEndian32(const uchar32_t& c)
127 return ((c >> 24) & 0x000000FF) |
128 ((c >> 8) & 0x0000FF00) |
129 ((c << 8) & 0x00FF0000) |
130 ((c << 24) & 0xFF000000);
133 //! The Unicode byte order mark.
134 const u16 BOM = 0xFEFF;
136 //! The size of the Unicode byte order mark in terms of the Unicode character size.
137 const u8 BOM_UTF8_LEN = 3;
138 const u8 BOM_UTF16_LEN = 1;
139 const u8 BOM_UTF32_LEN = 1;
141 //! Unicode byte order marks for file operations.
142 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
143 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
144 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
145 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
146 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
148 //! The size in bytes of the Unicode byte marks for file operations.
149 const u8 BOM_ENCODE_UTF8_LEN = 3;
150 const u8 BOM_ENCODE_UTF16_LEN = 2;
151 const u8 BOM_ENCODE_UTF32_LEN = 4;
153 //! Unicode encoding type.
166 //! Unicode endianness.
174 //! Returns the specified unicode byte order mark in a byte array.
175 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
176 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
177 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
178 //! \return An array that contains a byte order mark.
179 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
181 #define COPY_ARRAY(source, size) \
182 memcpy(ret.pointer(), source, size); \
185 core::array<u8> ret(4);
189 COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
192 #ifdef __BIG_ENDIAN__
193 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
195 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
199 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
202 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
205 #ifdef __BIG_ENDIAN__
206 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
208 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
212 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
215 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
218 // TODO sapier: fixed warning only,
219 // don't know if something needs to be done here
227 //! Detects if the given data stream starts with a unicode BOM.
228 //! \param data The data stream to check.
229 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
230 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
232 if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
233 if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
234 if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
235 if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
236 if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
240 } // end namespace unicode
243 //! UTF-16 string class.
244 template <typename TAlloc = irrAllocator<uchar16_t> >
249 ///------------------///
250 /// iterator classes ///
251 ///------------------///
253 //! Access an element in a unicode string, allowing one to change it.
254 class _ustring16_iterator_access
257 _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
259 //! Allow the class to be interpreted as a single UTF-32 character.
260 operator uchar32_t() const
265 //! Allow one to change the character in the unicode string.
266 //! \param c The new character to use.
268 _ustring16_iterator_access& operator=(const uchar32_t c)
274 //! Increments the value by 1.
276 _ustring16_iterator_access& operator++()
282 //! Increments the value by 1, returning the old value.
283 //! \return A unicode character.
284 uchar32_t operator++(int)
286 uchar32_t old = _get();
291 //! Decrements the value by 1.
293 _ustring16_iterator_access& operator--()
299 //! Decrements the value by 1, returning the old value.
300 //! \return A unicode character.
301 uchar32_t operator--(int)
303 uchar32_t old = _get();
308 //! Adds to the value by a specified amount.
309 //! \param val The amount to add to this character.
311 _ustring16_iterator_access& operator+=(int val)
317 //! Subtracts from the value by a specified amount.
318 //! \param val The amount to subtract from this character.
320 _ustring16_iterator_access& operator-=(int val)
326 //! Multiples the value by a specified amount.
327 //! \param val The amount to multiply this character by.
329 _ustring16_iterator_access& operator*=(int val)
335 //! Divides the value by a specified amount.
336 //! \param val The amount to divide this character by.
338 _ustring16_iterator_access& operator/=(int val)
344 //! Modulos the value by a specified amount.
345 //! \param val The amount to modulo this character by.
347 _ustring16_iterator_access& operator%=(int val)
353 //! Adds to the value by a specified amount.
354 //! \param val The amount to add to this character.
355 //! \return A unicode character.
356 uchar32_t operator+(int val) const
361 //! Subtracts from the value by a specified amount.
362 //! \param val The amount to subtract from this character.
363 //! \return A unicode character.
364 uchar32_t operator-(int val) const
369 //! Multiplies the value by a specified amount.
370 //! \param val The amount to multiply this character by.
371 //! \return A unicode character.
372 uchar32_t operator*(int val) const
377 //! Divides the value by a specified amount.
378 //! \param val The amount to divide this character by.
379 //! \return A unicode character.
380 uchar32_t operator/(int val) const
385 //! Modulos the value by a specified amount.
386 //! \param val The amount to modulo this character by.
387 //! \return A unicode character.
388 uchar32_t operator%(int val) const
394 //! Gets a uchar32_t from our current position.
395 uchar32_t _get() const
397 const uchar16_t* a = ref->c_str();
398 if (!UTF16_IS_SURROGATE(a[pos]))
399 return static_cast<uchar32_t>(a[pos]);
402 if (pos + 1 >= ref->size_raw())
405 return unicode::toUTF32(a[pos], a[pos + 1]);
409 //! Sets a uchar32_t at our current position.
410 void _set(uchar32_t c)
412 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
413 const uchar16_t* a = ref2->c_str();
416 // c will be multibyte, so split it up into the high and low surrogate pairs.
417 uchar16_t x = static_cast<uchar16_t>(c);
418 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
419 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
421 // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
422 if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
423 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
424 else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
426 ref2->replace_raw(vh, static_cast<u32>(pos));
430 // c will be a single byte.
431 uchar16_t vh = static_cast<uchar16_t>(c);
433 // If the previous position was a surrogate pair, remove the extra byte.
434 if (UTF16_IS_SURROGATE_HI(a[pos]))
435 ref2->erase_raw(static_cast<u32>(pos) + 1);
437 ref2->replace_raw(vh, static_cast<u32>(pos));
441 const ustring16<TAlloc>* ref;
444 typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
447 //! Iterator to iterate through a UTF-16 string.
448 #ifndef USTRING_NO_STL
449 class _ustring16_const_iterator : public std::iterator<
450 std::bidirectional_iterator_tag, // iterator_category
451 access, // value_type
452 ptrdiff_t, // difference_type
453 const access, // pointer
454 const access // reference
457 class _ustring16_const_iterator
461 typedef _ustring16_const_iterator _Iter;
462 typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
463 typedef const access const_pointer;
464 typedef const access const_reference;
466 #ifndef USTRING_NO_STL
467 typedef typename _Base::value_type value_type;
468 typedef typename _Base::difference_type difference_type;
469 typedef typename _Base::difference_type distance_type;
470 typedef typename _Base::pointer pointer;
471 typedef const_reference reference;
473 typedef access value_type;
474 typedef u32 difference_type;
475 typedef u32 distance_type;
476 typedef const_pointer pointer;
477 typedef const_reference reference;
481 _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
482 _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
483 _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
485 if (ref->size_raw() == 0 || p == 0)
488 // Go to the appropriate position.
490 u32 sr = ref->size_raw();
491 const uchar16_t* a = ref->c_str();
492 while (i != 0 && pos < sr)
494 if (UTF16_IS_SURROGATE_HI(a[pos]))
501 //! Test for equalness.
502 bool operator==(const _Iter& iter) const
504 if (ref == iter.ref && pos == iter.pos)
509 //! Test for unequalness.
510 bool operator!=(const _Iter& iter) const
512 if (ref != iter.ref || pos != iter.pos)
517 //! Switch to the next full character in the string.
520 if (pos == ref->size_raw()) return *this;
521 const uchar16_t* a = ref->c_str();
522 if (UTF16_IS_SURROGATE_HI(a[pos]))
523 pos += 2; // TODO: check for valid low surrogate?
525 if (pos > ref->size_raw()) pos = ref->size_raw();
529 //! Switch to the next full character in the string, returning the previous position.
530 _Iter operator++(int)
537 //! Switch to the previous full character in the string.
540 if (pos == 0) return *this;
541 const uchar16_t* a = ref->c_str();
543 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
548 //! Switch to the previous full character in the string, returning the previous position.
549 _Iter operator--(int)
556 //! Advance a specified number of full characters in the string.
558 _Iter& operator+=(const difference_type v)
560 if (v == 0) return *this;
561 if (v < 0) return operator-=(v * -1);
563 if (pos >= ref->size_raw())
566 // Go to the appropriate position.
567 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
569 u32 sr = ref->size_raw();
570 const uchar16_t* a = ref->c_str();
571 while (i != 0 && pos < sr)
573 if (UTF16_IS_SURROGATE_HI(a[pos]))
584 //! Go back a specified number of full characters in the string.
586 _Iter& operator-=(const difference_type v)
588 if (v == 0) return *this;
589 if (v > 0) return operator+=(v * -1);
594 // Go to the appropriate position.
595 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
597 const uchar16_t* a = ref->c_str();
598 while (i != 0 && pos != 0)
601 if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
609 //! Return a new iterator that is a variable number of full characters forward from the current position.
610 _Iter operator+(const difference_type v) const
617 //! Return a new iterator that is a variable number of full characters backward from the current position.
618 _Iter operator-(const difference_type v) const
625 //! Returns the distance between two iterators.
626 difference_type operator-(const _Iter& iter) const
628 // Make sure we reference the same object!
630 return difference_type();
655 //! Accesses the full character at the iterator's position.
656 const_reference operator*() const
658 if (pos >= ref->size_raw())
660 const uchar16_t* a = ref->c_str();
661 u32 p = ref->size_raw();
662 if (UTF16_IS_SURROGATE_LO(a[p]))
664 reference ret(ref, p);
667 const_reference ret(ref, pos);
671 //! Accesses the full character at the iterator's position.
672 reference operator*()
674 if (pos >= ref->size_raw())
676 const uchar16_t* a = ref->c_str();
677 u32 p = ref->size_raw();
678 if (UTF16_IS_SURROGATE_LO(a[p]))
680 reference ret(ref, p);
683 reference ret(ref, pos);
687 //! Accesses the full character at the iterator's position.
688 const_pointer operator->() const
693 //! Accesses the full character at the iterator's position.
699 //! Is the iterator at the start of the string?
705 //! Is the iterator at the end of the string?
708 const uchar16_t* a = ref->c_str();
709 if (UTF16_IS_SURROGATE(a[pos]))
710 return (pos + 1) >= ref->size_raw();
711 else return pos >= ref->size_raw();
714 //! Moves the iterator to the start of the string.
720 //! Moves the iterator to the end of the string.
723 pos = ref->size_raw();
726 //! Returns the iterator's position.
727 //! \return The iterator's position.
734 const ustring16<TAlloc>* ref;
738 //! Iterator to iterate through a UTF-16 string.
739 class _ustring16_iterator : public _ustring16_const_iterator
742 typedef _ustring16_iterator _Iter;
743 typedef _ustring16_const_iterator _Base;
744 typedef typename _Base::const_pointer const_pointer;
745 typedef typename _Base::const_reference const_reference;
748 typedef typename _Base::value_type value_type;
749 typedef typename _Base::difference_type difference_type;
750 typedef typename _Base::distance_type distance_type;
751 typedef access pointer;
752 typedef access reference;
758 _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
759 _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
760 _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
762 //! Accesses the full character at the iterator's position.
763 reference operator*() const
765 if (pos >= ref->size_raw())
767 const uchar16_t* a = ref->c_str();
768 u32 p = ref->size_raw();
769 if (UTF16_IS_SURROGATE_LO(a[p]))
771 reference ret(ref, p);
774 reference ret(ref, pos);
778 //! Accesses the full character at the iterator's position.
779 reference operator*()
781 if (pos >= ref->size_raw())
783 const uchar16_t* a = ref->c_str();
784 u32 p = ref->size_raw();
785 if (UTF16_IS_SURROGATE_LO(a[p]))
787 reference ret(ref, p);
790 reference ret(ref, pos);
794 //! Accesses the full character at the iterator's position.
795 pointer operator->() const
800 //! Accesses the full character at the iterator's position.
807 typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
808 typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
810 ///----------------------///
811 /// end iterator classes ///
812 ///----------------------///
814 //! Default constructor
816 : array(0), allocated(1), used(0)
818 #if __BYTE_ORDER == __BIG_ENDIAN
819 encoding = unicode::EUTFE_UTF16_BE;
821 encoding = unicode::EUTFE_UTF16_LE;
823 array = allocator.allocate(1); // new u16[1];
829 ustring16(const ustring16<TAlloc>& other)
830 : array(0), allocated(0), used(0)
832 #if __BYTE_ORDER == __BIG_ENDIAN
833 encoding = unicode::EUTFE_UTF16_BE;
835 encoding = unicode::EUTFE_UTF16_LE;
841 //! Constructor from other string types
842 template <class B, class A>
843 ustring16(const string<B, A>& other)
844 : array(0), allocated(0), used(0)
846 #if __BYTE_ORDER == __BIG_ENDIAN
847 encoding = unicode::EUTFE_UTF16_BE;
849 encoding = unicode::EUTFE_UTF16_LE;
855 #ifndef USTRING_NO_STL
856 //! Constructor from std::string
857 template <class B, class A, typename Alloc>
858 ustring16(const std::basic_string<B, A, Alloc>& other)
859 : array(0), allocated(0), used(0)
861 #if __BYTE_ORDER == __BIG_ENDIAN
862 encoding = unicode::EUTFE_UTF16_BE;
864 encoding = unicode::EUTFE_UTF16_LE;
866 *this = other.c_str();
870 //! Constructor from iterator.
871 template <typename Itr>
872 ustring16(Itr first, Itr last)
873 : array(0), allocated(0), used(0)
875 #if __BYTE_ORDER == __BIG_ENDIAN
876 encoding = unicode::EUTFE_UTF16_BE;
878 encoding = unicode::EUTFE_UTF16_LE;
880 reserve(std::distance(first, last));
883 for (; first != last; ++first)
884 append((uchar32_t)*first);
889 #ifndef USTRING_CPP0X_NEWLITERALS
890 //! Constructor for copying a character string from a pointer.
891 ustring16(const char* const c)
892 : array(0), allocated(0), used(0)
894 #if __BYTE_ORDER == __BIG_ENDIAN
895 encoding = unicode::EUTFE_UTF16_BE;
897 encoding = unicode::EUTFE_UTF16_LE;
900 loadDataStream(c, strlen(c));
901 //append((uchar8_t*)c);
905 //! Constructor for copying a character string from a pointer with a given length.
906 ustring16(const char* const c, u32 length)
907 : array(0), allocated(0), used(0)
909 #if __BYTE_ORDER == __BIG_ENDIAN
910 encoding = unicode::EUTFE_UTF16_BE;
912 encoding = unicode::EUTFE_UTF16_LE;
915 loadDataStream(c, length);
920 //! Constructor for copying a UTF-8 string from a pointer.
921 ustring16(const uchar8_t* const c)
922 : array(0), allocated(0), used(0)
924 #if __BYTE_ORDER == __BIG_ENDIAN
925 encoding = unicode::EUTFE_UTF16_BE;
927 encoding = unicode::EUTFE_UTF16_LE;
934 //! Constructor for copying a UTF-8 string from a single char.
935 ustring16(const char c)
936 : array(0), allocated(0), used(0)
938 #if __BYTE_ORDER == __BIG_ENDIAN
939 encoding = unicode::EUTFE_UTF16_BE;
941 encoding = unicode::EUTFE_UTF16_LE;
944 append((uchar32_t)c);
948 //! Constructor for copying a UTF-8 string from a pointer with a given length.
949 ustring16(const uchar8_t* const c, u32 length)
950 : array(0), allocated(0), used(0)
952 #if __BYTE_ORDER == __BIG_ENDIAN
953 encoding = unicode::EUTFE_UTF16_BE;
955 encoding = unicode::EUTFE_UTF16_LE;
962 //! Constructor for copying a UTF-16 string from a pointer.
963 ustring16(const uchar16_t* const c)
964 : array(0), allocated(0), used(0)
966 #if __BYTE_ORDER == __BIG_ENDIAN
967 encoding = unicode::EUTFE_UTF16_BE;
969 encoding = unicode::EUTFE_UTF16_LE;
976 //! Constructor for copying a UTF-16 string from a pointer with a given length
977 ustring16(const uchar16_t* const c, u32 length)
978 : array(0), allocated(0), used(0)
980 #if __BYTE_ORDER == __BIG_ENDIAN
981 encoding = unicode::EUTFE_UTF16_BE;
983 encoding = unicode::EUTFE_UTF16_LE;
990 //! Constructor for copying a UTF-32 string from a pointer.
991 ustring16(const uchar32_t* const c)
992 : array(0), allocated(0), used(0)
994 #if __BYTE_ORDER == __BIG_ENDIAN
995 encoding = unicode::EUTFE_UTF16_BE;
997 encoding = unicode::EUTFE_UTF16_LE;
1004 //! Constructor for copying a UTF-32 from a pointer with a given length.
1005 ustring16(const uchar32_t* const c, u32 length)
1006 : array(0), allocated(0), used(0)
1008 #if __BYTE_ORDER == __BIG_ENDIAN
1009 encoding = unicode::EUTFE_UTF16_BE;
1011 encoding = unicode::EUTFE_UTF16_LE;
1018 //! Constructor for copying a wchar_t string from a pointer.
1019 ustring16(const wchar_t* const c)
1020 : array(0), allocated(0), used(0)
1022 #if __BYTE_ORDER == __BIG_ENDIAN
1023 encoding = unicode::EUTFE_UTF16_BE;
1025 encoding = unicode::EUTFE_UTF16_LE;
1028 if (sizeof(wchar_t) == 4)
1029 append(reinterpret_cast<const uchar32_t* const>(c));
1030 else if (sizeof(wchar_t) == 2)
1031 append(reinterpret_cast<const uchar16_t* const>(c));
1032 else if (sizeof(wchar_t) == 1)
1033 append(reinterpret_cast<const uchar8_t* const>(c));
1037 //! Constructor for copying a wchar_t string from a pointer with a given length.
1038 ustring16(const wchar_t* const c, u32 length)
1039 : array(0), allocated(0), used(0)
1041 #if __BYTE_ORDER == __BIG_ENDIAN
1042 encoding = unicode::EUTFE_UTF16_BE;
1044 encoding = unicode::EUTFE_UTF16_LE;
1047 if (sizeof(wchar_t) == 4)
1048 append(reinterpret_cast<const uchar32_t* const>(c), length);
1049 else if (sizeof(wchar_t) == 2)
1050 append(reinterpret_cast<const uchar16_t* const>(c), length);
1051 else if (sizeof(wchar_t) == 1)
1052 append(reinterpret_cast<const uchar8_t* const>(c), length);
1056 #ifdef USTRING_CPP0X
1057 //! Constructor for moving a ustring16
1058 ustring16(ustring16<TAlloc>&& other)
1059 : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1061 //std::cout << "MOVE constructor" << std::endl;
1063 other.allocated = 0;
1072 allocator.deallocate(array); // delete [] array;
1076 //! Assignment operator
1077 ustring16& operator=(const ustring16<TAlloc>& other)
1082 used = other.size_raw();
1083 if (used >= allocated)
1085 allocator.deallocate(array); // delete [] array;
1086 allocated = used + 1;
1087 array = allocator.allocate(used + 1); //new u16[used];
1090 const uchar16_t* p = other.c_str();
1091 for (u32 i=0; i<=used; ++i, ++p)
1096 // Validate our new UTF-16 string.
1103 #ifdef USTRING_CPP0X
1104 //! Move assignment operator
1105 ustring16& operator=(ustring16<TAlloc>&& other)
1109 //std::cout << "MOVE operator=" << std::endl;
1110 allocator.deallocate(array);
1112 array = other.array;
1113 allocated = other.allocated;
1114 encoding = other.encoding;
1124 //! Assignment operator for other string types
1125 template <class B, class A>
1126 ustring16<TAlloc>& operator=(const string<B, A>& other)
1128 *this = other.c_str();
1133 //! Assignment operator for UTF-8 strings
1134 ustring16<TAlloc>& operator=(const uchar8_t* const c)
1138 array = allocator.allocate(1); //new u16[1];
1143 if (!c) return *this;
1145 //! Append our string now.
1151 //! Assignment operator for UTF-16 strings
1152 ustring16<TAlloc>& operator=(const uchar16_t* const c)
1156 array = allocator.allocate(1); //new u16[1];
1161 if (!c) return *this;
1163 //! Append our string now.
1169 //! Assignment operator for UTF-32 strings
1170 ustring16<TAlloc>& operator=(const uchar32_t* const c)
1174 array = allocator.allocate(1); //new u16[1];
1179 if (!c) return *this;
1181 //! Append our string now.
1187 //! Assignment operator for wchar_t strings.
1188 /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1189 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1190 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1191 ustring16<TAlloc>& operator=(const wchar_t* const c)
1193 if (sizeof(wchar_t) == 4)
1194 *this = reinterpret_cast<const uchar32_t* const>(c);
1195 else if (sizeof(wchar_t) == 2)
1196 *this = reinterpret_cast<const uchar16_t* const>(c);
1197 else if (sizeof(wchar_t) == 1)
1198 *this = reinterpret_cast<const uchar8_t* const>(c);
1204 //! Assignment operator for other strings.
1205 /** Note that this assumes that a correct unicode string is stored in the string. **/
1207 ustring16<TAlloc>& operator=(const B* const c)
1210 *this = reinterpret_cast<const uchar32_t* const>(c);
1211 else if (sizeof(B) == 2)
1212 *this = reinterpret_cast<const uchar16_t* const>(c);
1213 else if (sizeof(B) == 1)
1214 *this = reinterpret_cast<const uchar8_t* const>(c);
1220 //! Direct access operator
1221 access operator [](const u32 index)
1223 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1224 iterator iter(*this, index);
1225 return iter.operator*();
1229 //! Direct access operator
1230 const access operator [](const u32 index) const
1232 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1233 const_iterator iter(*this, index);
1234 return iter.operator*();
1238 //! Equality operator
1239 bool operator ==(const uchar16_t* const str) const
1245 for(i=0; array[i] && str[i]; ++i)
1246 if (array[i] != str[i])
1249 return !array[i] && !str[i];
1253 //! Equality operator
1254 bool operator ==(const ustring16<TAlloc>& other) const
1256 for(u32 i=0; array[i] && other.array[i]; ++i)
1257 if (array[i] != other.array[i])
1260 return used == other.used;
1264 //! Is smaller comparator
1265 bool operator <(const ustring16<TAlloc>& other) const
1267 for(u32 i=0; array[i] && other.array[i]; ++i)
1269 s32 diff = array[i] - other.array[i];
1274 return used < other.used;
1278 //! Inequality operator
1279 bool operator !=(const uchar16_t* const str) const
1281 return !(*this == str);
1285 //! Inequality operator
1286 bool operator !=(const ustring16<TAlloc>& other) const
1288 return !(*this == other);
1292 //! Returns the length of a ustring16 in full characters.
1293 //! \return Length of a ustring16 in full characters.
1296 const_iterator i(*this, 0);
1307 //! Informs if the ustring is empty or not.
1308 //! \return True if the ustring is empty, false if not.
1311 return (size_raw() == 0);
1315 //! Returns a pointer to the raw UTF-16 string data.
1316 //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1317 const uchar16_t* c_str() const
1323 //! Compares the first n characters of this string with another.
1324 //! \param other Other string to compare to.
1325 //! \param n Number of characters to compare.
1326 //! \return True if the n first characters of both strings are equal.
1327 bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1330 const uchar16_t* oa = other.c_str();
1331 for(i=0; array[i] && oa[i] && i < n; ++i)
1332 if (array[i] != oa[i])
1335 // if one (or both) of the strings was smaller then they
1336 // are only equal if they have the same length
1337 return (i == n) || (used == other.used);
1341 //! Compares the first n characters of this string with another.
1342 //! \param str Other string to compare to.
1343 //! \param n Number of characters to compare.
1344 //! \return True if the n first characters of both strings are equal.
1345 bool equalsn(const uchar16_t* const str, u32 n) const
1350 for(i=0; array[i] && str[i] && i < n; ++i)
1351 if (array[i] != str[i])
1354 // if one (or both) of the strings was smaller then they
1355 // are only equal if they have the same length
1356 return (i == n) || (array[i] == 0 && str[i] == 0);
1360 //! Appends a character to this ustring16
1361 //! \param character The character to append.
1362 //! \return A reference to our current string.
1363 ustring16<TAlloc>& append(uchar32_t character)
1365 if (used + 2 >= allocated)
1366 reallocate(used + 2);
1368 if (character > 0xFFFF)
1372 // character will be multibyte, so split it up into a surrogate pair.
1373 uchar16_t x = static_cast<uchar16_t>(character);
1374 uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1375 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1382 array[used-1] = character;
1390 //! Appends a UTF-8 string to this ustring16
1391 //! \param other The UTF-8 string to append.
1392 //! \param length The length of the string to append.
1393 //! \return A reference to our current string.
1394 ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1399 // Determine if the string is long enough for a BOM.
1401 const uchar8_t* p = other;
1405 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1408 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1409 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1411 if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1412 c_bom = unicode::EUTFE_UTF8;
1415 // If a BOM was found, don't include it in the string.
1416 const uchar8_t* c2 = other;
1417 if (c_bom != unicode::EUTFE_NONE)
1419 c2 = other + unicode::BOM_UTF8_LEN;
1420 length -= unicode::BOM_UTF8_LEN;
1423 // Calculate the size of the string to read in.
1429 } while(*p++ && len < length);
1433 // If we need to grow the array, do it now.
1434 if (used + len >= allocated)
1435 reallocate(used + (len * 2));
1438 // Convert UTF-8 to UTF-16.
1440 for (u32 l = 0; l<len;)
1443 if (((c2[l] >> 6) & 0x03) == 0x02)
1444 { // Invalid continuation byte.
1445 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1448 else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1449 { // Invalid byte - overlong encoding.
1450 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1453 else if ((c2[l] & 0xF8) == 0xF0)
1454 { // 4 bytes UTF-8, 2 bytes UTF-16.
1455 // Check for a full string.
1458 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1466 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1467 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1468 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1471 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1477 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1478 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1479 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1480 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1482 // Split v up into a surrogate pair.
1483 uchar16_t x = static_cast<uchar16_t>(v);
1484 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1485 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1490 ++used; // Using two shorts this time, so increase used by 1.
1492 else if ((c2[l] & 0xF0) == 0xE0)
1493 { // 3 bytes UTF-8, 1 byte UTF-16.
1494 // Check for a full string.
1497 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1505 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1506 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1509 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1515 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1516 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1517 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1521 else if ((c2[l] & 0xE0) == 0xC0)
1522 { // 2 bytes UTF-8, 1 byte UTF-16.
1523 // Check for a full string.
1526 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1532 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1534 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1540 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1541 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1542 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1547 { // 1 byte UTF-8, 1 byte UTF-16.
1550 { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
1551 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1553 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1559 // Validate our new UTF-16 string.
1566 //! Appends a UTF-16 string to this ustring16
1567 //! \param other The UTF-16 string to append.
1568 //! \param length The length of the string to append.
1569 //! \return A reference to our current string.
1570 ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1575 // Determine if the string is long enough for a BOM.
1577 const uchar16_t* p = other;
1581 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1583 // Check for the BOM to determine the string's endianness.
1584 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1585 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1586 c_end = unicode::EUTFEE_LITTLE;
1587 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1588 c_end = unicode::EUTFEE_BIG;
1590 // If a BOM was found, don't include it in the string.
1591 const uchar16_t* c2 = other;
1592 if (c_end != unicode::EUTFEE_NATIVE)
1594 c2 = other + unicode::BOM_UTF16_LEN;
1595 length -= unicode::BOM_UTF16_LEN;
1598 // Calculate the size of the string to read in.
1604 } while(*p++ && len < length);
1608 // If we need to grow the size of the array, do it now.
1609 if (used + len >= allocated)
1610 reallocate(used + (len * 2));
1614 // Copy the string now.
1615 unicode::EUTF_ENDIAN m_end = getEndianness();
1616 for (u32 l = start; l < start + len; ++l)
1618 array[l] = (uchar16_t)c2[l];
1619 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1620 array[l] = unicode::swapEndian16(array[l]);
1625 // Validate our new UTF-16 string.
1631 //! Appends a UTF-32 string to this ustring16
1632 //! \param other The UTF-32 string to append.
1633 //! \param length The length of the string to append.
1634 //! \return A reference to our current string.
1635 ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1640 // Check for the BOM to determine the string's endianness.
1641 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1642 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1643 c_end = unicode::EUTFEE_LITTLE;
1644 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1645 c_end = unicode::EUTFEE_BIG;
1647 // If a BOM was found, don't include it in the string.
1648 const uchar32_t* c2 = other;
1649 if (c_end != unicode::EUTFEE_NATIVE)
1651 c2 = other + unicode::BOM_UTF32_LEN;
1652 length -= unicode::BOM_UTF32_LEN;
1655 // Calculate the size of the string to read in.
1657 const uchar32_t* p = c2;
1661 } while(*p++ && len < length);
1665 // If we need to grow the size of the array, do it now.
1666 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1667 if (used + (len * 2) >= allocated)
1668 reallocate(used + ((len * 2) * 2));
1671 // Convert UTF-32 to UTF-16.
1672 unicode::EUTF_ENDIAN m_end = getEndianness();
1674 for (u32 l = 0; l<len; ++l)
1678 uchar32_t ch = c2[l];
1679 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1680 ch = unicode::swapEndian32(ch);
1684 // Split ch up into a surrogate pair as it is over 16 bits long.
1685 uchar16_t x = static_cast<uchar16_t>(ch);
1686 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1687 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1690 ++used; // Using two shorts, so increased used again.
1692 else if (ch >= 0xD800 && ch <= 0xDFFF)
1694 // Between possible UTF-16 surrogates (invalid!)
1695 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1697 else array[pos++] = static_cast<uchar16_t>(ch);
1701 // Validate our new UTF-16 string.
1708 //! Appends a ustring16 to this ustring16
1709 //! \param other The string to append to this one.
1710 //! \return A reference to our current string.
1711 ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1713 const uchar16_t* oa = other.c_str();
1715 u32 len = other.size_raw();
1717 if (used + len >= allocated)
1718 reallocate(used + len);
1720 for (u32 l=0; l<len; ++l)
1721 array[used+l] = oa[l];
1730 //! Appends a certain amount of characters of a ustring16 to this ustring16.
1731 //! \param other The string to append to this one.
1732 //! \param length How many characters of the other string to add to this one.
1733 //! \return A reference to our current string.
1734 ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1736 if (other.size() == 0)
1739 if (other.size() < length)
1745 if (used + length * 2 >= allocated)
1746 reallocate(used + length * 2);
1748 const_iterator iter(other, 0);
1750 while (!iter.atEnd() && l)
1752 uchar32_t c = *iter;
1762 //! Reserves some memory.
1763 //! \param count The amount of characters to reserve.
1764 void reserve(u32 count)
1766 if (count < allocated)
1773 //! Finds first occurrence of character.
1774 //! \param c The character to search for.
1775 //! \return Position where the character has been found, or -1 if not found.
1776 s32 findFirst(uchar32_t c) const
1778 const_iterator i(*this, 0);
1793 //! Finds first occurrence of a character of a list.
1794 //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1795 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1796 //! \return Position where one of the characters has been found, or -1 if not found.
1797 s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1802 const_iterator i(*this, 0);
1808 for (u32 j=0; j<count; ++j)
1819 //! Finds first position of a character not in a given list.
1820 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1821 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1822 //! \return Position where the character has been found, or -1 if not found.
1823 s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1828 const_iterator i(*this, 0);
1835 for (j=0; j<count; ++j)
1848 //! Finds last position of a character not in a given list.
1849 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1850 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1851 //! \return Position where the character has been found, or -1 if not found.
1852 s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1857 const_iterator i(end());
1860 s32 pos = size() - 1;
1861 while (!i.atStart())
1865 for (j=0; j<count; ++j)
1878 //! Finds next occurrence of character.
1879 //! \param c The character to search for.
1880 //! \param startPos The position in the string to start searching.
1881 //! \return Position where the character has been found, or -1 if not found.
1882 s32 findNext(uchar32_t c, u32 startPos) const
1884 const_iterator i(*this, startPos);
1900 //! Finds last occurrence of character.
1901 //! \param c The character to search for.
1902 //! \param start The start position of the reverse search ( default = -1, on end ).
1903 //! \return Position where the character has been found, or -1 if not found.
1904 s32 findLast(uchar32_t c, s32 start = -1) const
1907 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1909 const_iterator i(*this, start);
1911 while (!i.atStart())
1923 //! Finds last occurrence of a character in a list.
1924 //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1925 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1926 //! \return Position where one of the characters has been found, or -1 if not found.
1927 s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1932 const_iterator i(end());
1936 while (!i.atStart())
1939 for (u32 j=0; j<count; ++j)
1950 //! Finds another ustring16 in this ustring16.
1951 //! \param str The string to find.
1952 //! \param start The start position of the search.
1953 //! \return Positions where the ustring16 has been found, or -1 if not found.
1954 s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1956 u32 my_size = size();
1957 u32 their_size = str.size();
1959 if (their_size == 0 || my_size - start < their_size)
1962 const_iterator i(*this, start);
1967 const_iterator i2(i);
1968 const_iterator j(str, 0);
1969 uchar32_t t1 = (uchar32_t)*i2;
1970 uchar32_t t2 = (uchar32_t)*j;
1977 t1 = (uchar32_t)*i2;
1988 //! Finds another ustring16 in this ustring16.
1989 //! \param str The string to find.
1990 //! \param start The start position of the search.
1991 //! \return Positions where the string has been found, or -1 if not found.
1992 s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1994 const uchar16_t* data = str.c_str();
2005 for (u32 i=start; i<=used-len; ++i)
2009 while(data[j] && array[i+j] == data[j])
2021 //! Returns a substring.
2022 //! \param begin: Start of substring.
2023 //! \param length: Length of substring.
2024 //! \return A reference to our current string.
2025 ustring16<TAlloc> subString(u32 begin, s32 length) const
2028 // if start after ustring16
2029 // or no proper substring length
2030 if ((length <= 0) || (begin>=len))
2031 return ustring16<TAlloc>("");
2032 // clamp length to maximal value
2033 if ((length+begin) > len)
2036 ustring16<TAlloc> o;
2037 o.reserve((length+1) * 2);
2039 const_iterator i(*this, begin);
2040 while (!i.atEnd() && length)
2051 //! Appends a character to this ustring16.
2052 //! \param c Character to append.
2053 //! \return A reference to our current string.
2054 ustring16<TAlloc>& operator += (char c)
2056 append((uchar32_t)c);
2061 //! Appends a character to this ustring16.
2062 //! \param c Character to append.
2063 //! \return A reference to our current string.
2064 ustring16<TAlloc>& operator += (uchar32_t c)
2071 //! Appends a number to this ustring16.
2072 //! \param c Number to append.
2073 //! \return A reference to our current string.
2074 ustring16<TAlloc>& operator += (short c)
2076 append(core::stringc(c));
2081 //! Appends a number to this ustring16.
2082 //! \param c Number to append.
2083 //! \return A reference to our current string.
2084 ustring16<TAlloc>& operator += (unsigned short c)
2086 append(core::stringc(c));
2091 #ifdef USTRING_CPP0X_NEWLITERALS
2092 //! Appends a number to this ustring16.
2093 //! \param c Number to append.
2094 //! \return A reference to our current string.
2095 ustring16<TAlloc>& operator += (int c)
2097 append(core::stringc(c));
2102 //! Appends a number to this ustring16.
2103 //! \param c Number to append.
2104 //! \return A reference to our current string.
2105 ustring16<TAlloc>& operator += (unsigned int c)
2107 append(core::stringc(c));
2113 //! Appends a number to this ustring16.
2114 //! \param c Number to append.
2115 //! \return A reference to our current string.
2116 ustring16<TAlloc>& operator += (long c)
2118 append(core::stringc(c));
2123 //! Appends a number to this ustring16.
2124 //! \param c Number to append.
2125 //! \return A reference to our current string.
2126 ustring16<TAlloc>& operator += (unsigned long c)
2128 append(core::stringc(c));
2133 //! Appends a number to this ustring16.
2134 //! \param c Number to append.
2135 //! \return A reference to our current string.
2136 ustring16<TAlloc>& operator += (double c)
2138 append(core::stringc(c));
2143 //! Appends a char ustring16 to this ustring16.
2144 //! \param c Char ustring16 to append.
2145 //! \return A reference to our current string.
2146 ustring16<TAlloc>& operator += (const uchar16_t* const c)
2153 //! Appends a ustring16 to this ustring16.
2154 //! \param other ustring16 to append.
2155 //! \return A reference to our current string.
2156 ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2163 //! Replaces all characters of a given type with another one.
2164 //! \param toReplace Character to replace.
2165 //! \param replaceWith Character replacing the old one.
2166 //! \return A reference to our current string.
2167 ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2169 iterator i(*this, 0);
2172 typename ustring16<TAlloc>::access a = *i;
2173 if ((uchar32_t)a == toReplace)
2181 //! Replaces all instances of a string with another one.
2182 //! \param toReplace The string to replace.
2183 //! \param replaceWith The string replacing the old one.
2184 //! \return A reference to our current string.
2185 ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2187 if (toReplace.size() == 0)
2190 const uchar16_t* other = toReplace.c_str();
2191 const uchar16_t* replace = replaceWith.c_str();
2192 const u32 other_size = toReplace.size_raw();
2193 const u32 replace_size = replaceWith.size_raw();
2195 // Determine the delta. The algorithm will change depending on the delta.
2196 s32 delta = replace_size - other_size;
2198 // A character for character replace. The string will not shrink or grow.
2202 while ((pos = find_raw(other, pos)) != -1)
2204 for (u32 i = 0; i < replace_size; ++i)
2205 array[pos + i] = replace[i];
2211 // We are going to be removing some characters. The string will shrink.
2215 for (u32 pos = 0; pos <= used; ++i, ++pos)
2217 // Is this potentially a match?
2218 if (array[pos] == *other)
2220 // Check to see if we have a match.
2222 for (j = 0; j < other_size; ++j)
2224 if (array[pos + j] != other[j])
2228 // If we have a match, replace characters.
2229 if (j == other_size)
2231 for (j = 0; j < replace_size; ++j)
2232 array[i + j] = replace[j];
2233 i += replace_size - 1;
2234 pos += other_size - 1;
2239 // No match found, just copy characters.
2240 array[i - 1] = array[pos];
2248 // We are going to be adding characters, so the string size will increase.
2249 // Count the number of times toReplace exists in the string so we can allocate the new size.
2252 while ((pos = find_raw(other, pos)) != -1)
2258 // Re-allocate the string now, if needed.
2259 u32 len = delta * find_count;
2260 if (used + len >= allocated)
2261 reallocate(used + len);
2265 while ((pos = find_raw(other, pos)) != -1)
2267 uchar16_t* start = array + pos + other_size - 1;
2268 uchar16_t* ptr = array + used;
2269 uchar16_t* end = array + used + delta;
2271 // Shift characters to make room for the string.
2272 while (ptr != start)
2279 // Add the new string now.
2280 for (u32 i = 0; i < replace_size; ++i)
2281 array[pos + i] = replace[i];
2283 pos += replace_size;
2287 // Terminate the string and return ourself.
2293 //! Removes characters from a ustring16..
2294 //! \param c The character to remove.
2295 //! \return A reference to our current string.
2296 ustring16<TAlloc>& remove(uchar32_t c)
2300 u32 len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2301 for (u32 i=0; i<=used; ++i)
2304 if (!UTF16_IS_SURROGATE_HI(array[i]))
2306 else if (i + 1 <= used)
2308 // Convert the surrogate pair into a single UTF-32 character.
2309 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2311 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2319 array[pos++] = array[i];
2321 array[pos++] = array[++i];
2329 //! Removes a ustring16 from the ustring16.
2330 //! \param toRemove The string to remove.
2331 //! \return A reference to our current string.
2332 ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2334 u32 size = toRemove.size_raw();
2335 if (size == 0) return *this;
2337 const uchar16_t* tra = toRemove.c_str();
2340 for (u32 i=0; i<=used; ++i)
2345 if (array[i + j] != tra[j])
2356 array[pos++] = array[i];
2364 //! Removes characters from the ustring16.
2365 //! \param characters The characters to remove.
2366 //! \return A reference to our current string.
2367 ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2369 if (characters.size_raw() == 0)
2374 const_iterator iter(characters);
2375 for (u32 i=0; i<=used; ++i)
2378 if (!UTF16_IS_SURROGATE_HI(array[i]))
2380 else if (i + 1 <= used)
2382 // Convert the surrogate pair into a single UTF-32 character.
2383 uc32 = unicode::toUTF32(array[i], array[i+1]);
2385 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2389 while (!iter.atEnd())
2391 uchar32_t c = *iter;
2394 found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2403 array[pos++] = array[i];
2405 array[pos++] = array[++i];
2413 //! Trims the ustring16.
2414 //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2415 //! \param whitespace The characters that are to be considered as whitespace.
2416 //! \return A reference to our current string.
2417 ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2419 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2421 // find start and end of the substring without the specified characters
2422 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2426 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2428 return (*this = subString(begin, (end +1) - begin));
2432 //! Erases a character from the ustring16.
2433 //! May be slow, because all elements following after the erased element have to be copied.
2434 //! \param index Index of element to be erased.
2435 //! \return A reference to our current string.
2436 ustring16<TAlloc>& erase(u32 index)
2438 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2440 iterator i(*this, index);
2443 u32 len = (t > 0xFFFF ? 2 : 1);
2445 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2446 array[j - len] = array[j];
2455 //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2456 //! \return A reference to our current string.
2457 ustring16<TAlloc>& validate()
2459 // Validate all unicode characters.
2460 for (u32 i=0; i<allocated; ++i)
2462 // Terminate on existing null.
2468 if (UTF16_IS_SURROGATE(array[i]))
2470 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2471 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2472 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2473 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2476 if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2477 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2484 used = allocated - 1;
2491 //! Gets the last char of the ustring16, or 0.
2492 //! \return The last char of the ustring16, or 0.
2493 uchar32_t lastChar() const
2498 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2500 // Make sure we have a paired surrogate.
2504 // Check for an invalid surrogate.
2505 if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2508 // Convert the surrogate pair into a single UTF-32 character.
2509 return unicode::toUTF32(array[used-2], array[used-1]);
2513 return array[used-1];
2518 //! Split the ustring16 into parts.
2519 /** This method will split a ustring16 at certain delimiter characters
2520 into the container passed in as reference. The type of the container
2521 has to be given as template parameter. It must provide a push_back and
2523 \param ret The result container
2524 \param c C-style ustring16 of delimiter characters
2525 \param count Number of delimiter characters
2526 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2527 container. If two delimiters occur without a character in between, an
2528 empty substring would be placed in the result. If this flag is set,
2529 only non-empty strings are stored.
2530 \param keepSeparators Flag which allows to add the separator to the
2531 result ustring16. If this flag is true, the concatenation of the
2532 substrings results in the original ustring16. Otherwise, only the
2533 characters between the delimiters are returned.
2534 \return The number of resulting substrings
2536 template<class container>
2537 u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2542 const_iterator i(*this);
2543 const u32 oldSize=ret.size();
2547 bool lastWasSeparator = false;
2551 bool foundSeparator = false;
2552 for (u32 j=0; j<count; ++j)
2556 if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2558 ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2559 foundSeparator = true;
2560 lastpos = (keepSeparators ? pos : pos + 1);
2561 lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2565 lastWasSeparator = foundSeparator;
2571 ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2572 return ret.size()-oldSize;
2576 //! Split the ustring16 into parts.
2577 /** This method will split a ustring16 at certain delimiter characters
2578 into the container passed in as reference. The type of the container
2579 has to be given as template parameter. It must provide a push_back and
2581 \param ret The result container
2582 \param c A unicode string of delimiter characters
2583 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2584 container. If two delimiters occur without a character in between, an
2585 empty substring would be placed in the result. If this flag is set,
2586 only non-empty strings are stored.
2587 \param keepSeparators Flag which allows to add the separator to the
2588 result ustring16. If this flag is true, the concatenation of the
2589 substrings results in the original ustring16. Otherwise, only the
2590 characters between the delimiters are returned.
2591 \return The number of resulting substrings
2593 template<class container>
2594 u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2596 core::array<uchar32_t> v = c.toUTF32();
2597 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2601 //! Gets the size of the allocated memory buffer for the string.
2602 //! \return The size of the allocated memory buffer.
2603 u32 capacity() const
2609 //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2610 //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2611 u32 size_raw() const
2617 //! Inserts a character into the string.
2618 //! \param c The character to insert.
2619 //! \param pos The position to insert the character.
2620 //! \return A reference to our current string.
2621 ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2623 u8 len = (c > 0xFFFF ? 2 : 1);
2625 if (used + len >= allocated)
2626 reallocate(used + len);
2630 iterator iter(*this, pos);
2631 for (u32 i = used - 2; i > iter.getPos(); --i)
2632 array[i] = array[i - len];
2636 // c will be multibyte, so split it up into a surrogate pair.
2637 uchar16_t x = static_cast<uchar16_t>(c);
2638 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2639 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2640 array[iter.getPos()] = vh;
2641 array[iter.getPos()+1] = vl;
2645 array[iter.getPos()] = static_cast<uchar16_t>(c);
2652 //! Inserts a string into the string.
2653 //! \param c The string to insert.
2654 //! \param pos The position to insert the string.
2655 //! \return A reference to our current string.
2656 ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2658 u32 len = c.size_raw();
2659 if (len == 0) return *this;
2661 if (used + len >= allocated)
2662 reallocate(used + len);
2666 iterator iter(*this, pos);
2667 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2668 array[i] = array[i - len];
2670 const uchar16_t* s = c.c_str();
2671 for (u32 i = 0; i < len; ++i)
2682 //! Inserts a character into the string.
2683 //! \param c The character to insert.
2684 //! \param pos The position to insert the character.
2685 //! \return A reference to our current string.
2686 ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2688 if (used + 1 >= allocated)
2689 reallocate(used + 1);
2693 for (u32 i = used - 1; i > pos; --i)
2694 array[i] = array[i - 1];
2702 //! Removes a character from string.
2703 //! \param pos Position of the character to remove.
2704 //! \return A reference to our current string.
2705 ustring16<TAlloc>& erase_raw(u32 pos)
2707 for (u32 i=pos; i<=used; ++i)
2709 array[i] = array[i + 1];
2717 //! Replaces a character in the string.
2718 //! \param c The new character.
2719 //! \param pos The position of the character to replace.
2720 //! \return A reference to our current string.
2721 ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2728 //! Returns an iterator to the beginning of the string.
2729 //! \return An iterator to the beginning of the string.
2732 iterator i(*this, 0);
2737 //! Returns an iterator to the beginning of the string.
2738 //! \return An iterator to the beginning of the string.
2739 const_iterator begin() const
2741 const_iterator i(*this, 0);
2746 //! Returns an iterator to the beginning of the string.
2747 //! \return An iterator to the beginning of the string.
2748 const_iterator cbegin() const
2750 const_iterator i(*this, 0);
2755 //! Returns an iterator to the end of the string.
2756 //! \return An iterator to the end of the string.
2759 iterator i(*this, 0);
2765 //! Returns an iterator to the end of the string.
2766 //! \return An iterator to the end of the string.
2767 const_iterator end() const
2769 const_iterator i(*this, 0);
2775 //! Returns an iterator to the end of the string.
2776 //! \return An iterator to the end of the string.
2777 const_iterator cend() const
2779 const_iterator i(*this, 0);
2785 //! Converts the string to a UTF-8 encoded string.
2786 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2787 //! \return A string containing the UTF-8 encoded string.
2788 core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2790 core::string<uchar8_t> ret;
2791 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2792 const_iterator iter(*this, 0);
2794 // Add the byte order mark if the user wants it.
2797 ret.append(unicode::BOM_ENCODE_UTF8[0]);
2798 ret.append(unicode::BOM_ENCODE_UTF8[1]);
2799 ret.append(unicode::BOM_ENCODE_UTF8[2]);
2802 while (!iter.atEnd())
2804 uchar32_t c = *iter;
2807 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2808 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2809 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2810 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2818 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2819 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2820 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2827 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2828 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2834 ret.append(static_cast<uchar8_t>(c));
2842 //! Converts the string to a UTF-8 encoded string array.
2843 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2844 //! \return An array containing the UTF-8 encoded string.
2845 core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2847 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2848 const_iterator iter(*this, 0);
2850 // Add the byte order mark if the user wants it.
2853 ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2854 ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2855 ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2858 while (!iter.atEnd())
2860 uchar32_t c = *iter;
2863 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2864 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2865 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2866 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2874 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2875 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2876 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2883 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2884 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2890 ret.push_back(static_cast<uchar8_t>(c));
2899 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2900 //! Converts the string to a UTF-16 encoded string.
2901 //! \param endian The desired endianness of the string.
2902 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2903 //! \return A string containing the UTF-16 encoded string.
2904 core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2906 core::string<char16_t> ret;
2907 ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2909 // Add the BOM if specified.
2912 if (endian == unicode::EUTFEE_NATIVE)
2913 ret[0] = unicode::BOM;
2914 else if (endian == unicode::EUTFEE_LITTLE)
2916 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2917 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2918 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2922 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2923 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2924 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2929 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2931 char16_t* ptr = ret.c_str();
2932 for (u32 i = 0; i < ret.size(); ++i)
2933 *ptr++ = unicode::swapEndian16(*ptr);
2940 //! Converts the string to a UTF-16 encoded string array.
2941 //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2942 //! \param endian The desired endianness of the string.
2943 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2944 //! \return An array containing the UTF-16 encoded string.
2945 core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2947 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2948 uchar16_t* ptr = ret.pointer();
2950 // Add the BOM if specified.
2953 if (endian == unicode::EUTFEE_NATIVE)
2954 *ptr = unicode::BOM;
2955 else if (endian == unicode::EUTFEE_LITTLE)
2957 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2958 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2959 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2963 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2964 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2965 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2970 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2971 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2973 for (u32 i = 0; i <= used; ++i)
2974 ptr[i] = unicode::swapEndian16(ptr[i]);
2976 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2982 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2983 //! Converts the string to a UTF-32 encoded string.
2984 //! \param endian The desired endianness of the string.
2985 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2986 //! \return A string containing the UTF-32 encoded string.
2987 core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2989 core::string<char32_t> ret;
2990 ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2991 const_iterator iter(*this, 0);
2993 // Add the BOM if specified.
2996 if (endian == unicode::EUTFEE_NATIVE)
2997 ret.append(unicode::BOM);
3006 if (endian == unicode::EUTFEE_LITTLE)
3008 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3009 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3010 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3011 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3015 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3016 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3017 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3018 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3024 while (!iter.atEnd())
3026 uchar32_t c = *iter;
3027 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3028 c = unicode::swapEndian32(c);
3037 //! Converts the string to a UTF-32 encoded string array.
3038 //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
3039 //! \param endian The desired endianness of the string.
3040 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3041 //! \return An array containing the UTF-32 encoded string.
3042 core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3044 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3045 const_iterator iter(*this, 0);
3047 // Add the BOM if specified.
3050 if (endian == unicode::EUTFEE_NATIVE)
3051 ret.push_back(unicode::BOM);
3060 if (endian == unicode::EUTFEE_LITTLE)
3062 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3063 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3064 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3065 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3069 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3070 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3071 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3072 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3074 ret.push_back(t.full);
3079 while (!iter.atEnd())
3081 uchar32_t c = *iter;
3082 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3083 c = unicode::swapEndian32(c);
3091 //! Converts the string to a wchar_t encoded string.
3092 /** The size of a wchar_t changes depending on the platform. This function will store a
3093 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3094 //! \param endian The desired endianness of the string.
3095 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3096 //! \return A string containing the wchar_t encoded string.
3097 core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3099 if (sizeof(wchar_t) == 4)
3101 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3102 core::stringw ret(a.pointer());
3105 else if (sizeof(wchar_t) == 2)
3107 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3109 core::stringw ret(array);
3114 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3115 core::stringw ret(a.pointer());
3119 else if (sizeof(wchar_t) == 1)
3121 core::array<uchar8_t> a(toUTF8(addBOM));
3122 core::stringw ret(a.pointer());
3126 // Shouldn't happen.
3127 return core::stringw();
3131 //! Converts the string to a wchar_t encoded string array.
3132 /** The size of a wchar_t changes depending on the platform. This function will store a
3133 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3134 //! \param endian The desired endianness of the string.
3135 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3136 //! \return An array containing the wchar_t encoded string.
3137 core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3139 if (sizeof(wchar_t) == 4)
3141 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3142 core::array<wchar_t> ret(a.size());
3143 ret.set_used(a.size());
3144 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3147 if (sizeof(wchar_t) == 2)
3149 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3151 core::array<wchar_t> ret(used);
3153 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3158 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3159 core::array<wchar_t> ret(a.size());
3160 ret.set_used(a.size());
3161 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3165 if (sizeof(wchar_t) == 1)
3167 core::array<uchar8_t> a(toUTF8(addBOM));
3168 core::array<wchar_t> ret(a.size());
3169 ret.set_used(a.size());
3170 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3174 // Shouldn't happen.
3175 return core::array<wchar_t>();
3178 //! Converts the string to a properly encoded io::path string.
3179 //! \param endian The desired endianness of the string.
3180 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3181 //! \return An io::path string containing the properly encoded string.
3182 io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3184 #if defined(_IRR_WCHAR_FILESYSTEM)
3185 return toWCHAR_s(endian, addBOM);
3187 return toUTF8_s(addBOM);
3191 //! Loads an unknown stream of data.
3192 //! Will attempt to determine if the stream is unicode data. Useful for loading from files.
3193 //! \param data The data stream to load from.
3194 //! \param data_size The length of the data string.
3195 //! \return A reference to our current string.
3196 ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3198 // Clear our string.
3203 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3207 case unicode::EUTFE_UTF8:
3208 append((uchar8_t*)data, data_size);
3211 case unicode::EUTFE_UTF16:
3212 case unicode::EUTFE_UTF16_BE:
3213 case unicode::EUTFE_UTF16_LE:
3214 append((uchar16_t*)data, data_size / 2);
3217 case unicode::EUTFE_UTF32:
3218 case unicode::EUTFE_UTF32_BE:
3219 case unicode::EUTFE_UTF32_LE:
3220 append((uchar32_t*)data, data_size / 4);
3227 //! Gets the encoding of the Unicode string this class contains.
3228 //! \return An enum describing the current encoding of this string.
3229 const unicode::EUTF_ENCODE getEncoding() const
3234 //! Gets the endianness of the Unicode string this class contains.
3235 //! \return An enum describing the endianness of this string.
3236 const unicode::EUTF_ENDIAN getEndianness() const
3238 if (encoding == unicode::EUTFE_UTF16_LE ||
3239 encoding == unicode::EUTFE_UTF32_LE)
3240 return unicode::EUTFEE_LITTLE;
3241 else return unicode::EUTFEE_BIG;
3246 //! Reallocate the string, making it bigger or smaller.
3247 //! \param new_size The new size of the string.
3248 void reallocate(u32 new_size)
3250 uchar16_t* old_array = array;
3252 array = allocator.allocate(new_size + 1); //new u16[new_size];
3253 allocated = new_size + 1;
3254 if (old_array == 0) return;
3256 u32 amount = used < new_size ? used : new_size;
3257 for (u32 i=0; i<=amount; ++i)
3258 array[i] = old_array[i];
3260 if (allocated <= used)
3261 used = allocated - 1;
3265 allocator.deallocate(old_array); // delete [] old_array;
3268 //--- member variables
3271 unicode::EUTF_ENCODE encoding;
3275 //irrAllocator<uchar16_t> allocator;
3278 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3281 //! Appends two ustring16s.
3282 template <typename TAlloc>
3283 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3285 ustring16<TAlloc> ret(left);
3291 //! Appends a ustring16 and a null-terminated unicode string.
3292 template <typename TAlloc, class B>
3293 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3295 ustring16<TAlloc> ret(left);
3301 //! Appends a ustring16 and a null-terminated unicode string.
3302 template <class B, typename TAlloc>
3303 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3305 ustring16<TAlloc> ret(left);
3311 //! Appends a ustring16 and an Irrlicht string.
3312 template <typename TAlloc, typename B, typename BAlloc>
3313 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3315 ustring16<TAlloc> ret(left);
3321 //! Appends a ustring16 and an Irrlicht string.
3322 template <typename TAlloc, typename B, typename BAlloc>
3323 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3325 ustring16<TAlloc> ret(left);
3331 //! Appends a ustring16 and a std::basic_string.
3332 template <typename TAlloc, typename B, typename A, typename BAlloc>
3333 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3335 ustring16<TAlloc> ret(left);
3341 //! Appends a ustring16 and a std::basic_string.
3342 template <typename TAlloc, typename B, typename A, typename BAlloc>
3343 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3345 ustring16<TAlloc> ret(left);
3351 //! Appends a ustring16 and a char.
3352 template <typename TAlloc>
3353 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3355 ustring16<TAlloc> ret(left);
3361 //! Appends a ustring16 and a char.
3362 template <typename TAlloc>
3363 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3365 ustring16<TAlloc> ret(left);
3371 #ifdef USTRING_CPP0X_NEWLITERALS
3372 //! Appends a ustring16 and a uchar32_t.
3373 template <typename TAlloc>
3374 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3376 ustring16<TAlloc> ret(left);
3382 //! Appends a ustring16 and a uchar32_t.
3383 template <typename TAlloc>
3384 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3386 ustring16<TAlloc> ret(left);
3393 //! Appends a ustring16 and a short.
3394 template <typename TAlloc>
3395 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3397 ustring16<TAlloc> ret(left);
3398 ret += core::stringc(right);
3403 //! Appends a ustring16 and a short.
3404 template <typename TAlloc>
3405 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3407 ustring16<TAlloc> ret((core::stringc(left)));
3413 //! Appends a ustring16 and an unsigned short.
3414 template <typename TAlloc>
3415 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3417 ustring16<TAlloc> ret(left);
3418 ret += core::stringc(right);
3423 //! Appends a ustring16 and an unsigned short.
3424 template <typename TAlloc>
3425 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3427 ustring16<TAlloc> ret((core::stringc(left)));
3433 //! Appends a ustring16 and an int.
3434 template <typename TAlloc>
3435 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3437 ustring16<TAlloc> ret(left);
3438 ret += core::stringc(right);
3443 //! Appends a ustring16 and an int.
3444 template <typename TAlloc>
3445 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3447 ustring16<TAlloc> ret((core::stringc(left)));
3453 //! Appends a ustring16 and an unsigned int.
3454 template <typename TAlloc>
3455 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3457 ustring16<TAlloc> ret(left);
3458 ret += core::stringc(right);
3463 //! Appends a ustring16 and an unsigned int.
3464 template <typename TAlloc>
3465 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3467 ustring16<TAlloc> ret((core::stringc(left)));
3473 //! Appends a ustring16 and a long.
3474 template <typename TAlloc>
3475 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3477 ustring16<TAlloc> ret(left);
3478 ret += core::stringc(right);
3483 //! Appends a ustring16 and a long.
3484 template <typename TAlloc>
3485 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3487 ustring16<TAlloc> ret((core::stringc(left)));
3493 //! Appends a ustring16 and an unsigned long.
3494 template <typename TAlloc>
3495 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3497 ustring16<TAlloc> ret(left);
3498 ret += core::stringc(right);
3503 //! Appends a ustring16 and an unsigned long.
3504 template <typename TAlloc>
3505 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3507 ustring16<TAlloc> ret((core::stringc(left)));
3513 //! Appends a ustring16 and a float.
3514 template <typename TAlloc>
3515 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3517 ustring16<TAlloc> ret(left);
3518 ret += core::stringc(right);
3523 //! Appends a ustring16 and a float.
3524 template <typename TAlloc>
3525 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3527 ustring16<TAlloc> ret((core::stringc(left)));
3533 //! Appends a ustring16 and a double.
3534 template <typename TAlloc>
3535 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3537 ustring16<TAlloc> ret(left);
3538 ret += core::stringc(right);
3543 //! Appends a ustring16 and a double.
3544 template <typename TAlloc>
3545 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3547 ustring16<TAlloc> ret((core::stringc(left)));
3553 #ifdef USTRING_CPP0X
3554 //! Appends two ustring16s.
3555 template <typename TAlloc>
3556 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3558 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3559 right.insert(left, 0);
3560 return std::move(right);
3564 //! Appends two ustring16s.
3565 template <typename TAlloc>
3566 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3568 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3570 return std::move(left);
3574 //! Appends two ustring16s.
3575 template <typename TAlloc>
3576 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3578 //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3579 if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3580 (right.capacity() - right.size_raw() < left.size_raw()))
3583 return std::move(left);
3587 right.insert(left, 0);
3588 return std::move(right);
3593 //! Appends a ustring16 and a null-terminated unicode string.
3594 template <typename TAlloc, class B>
3595 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3597 //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3599 return std::move(left);
3603 //! Appends a ustring16 and a null-terminated unicode string.
3604 template <class B, typename TAlloc>
3605 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3607 //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3608 right.insert(left, 0);
3609 return std::move(right);
3613 //! Appends a ustring16 and an Irrlicht string.
3614 template <typename TAlloc, typename B, typename BAlloc>
3615 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3617 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3618 right.insert(left, 0);
3619 return std::move(right);
3623 //! Appends a ustring16 and an Irrlicht string.
3624 template <typename TAlloc, typename B, typename BAlloc>
3625 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3627 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3629 return std::move(left);
3633 //! Appends a ustring16 and a std::basic_string.
3634 template <typename TAlloc, typename B, typename A, typename BAlloc>
3635 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3637 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3638 right.insert(core::ustring16<TAlloc>(left), 0);
3639 return std::move(right);
3643 //! Appends a ustring16 and a std::basic_string.
3644 template <typename TAlloc, typename B, typename A, typename BAlloc>
3645 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3647 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3649 return std::move(left);
3653 //! Appends a ustring16 and a char.
3654 template <typename TAlloc>
3655 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3657 left.append((uchar32_t)right);
3658 return std::move(left);
3662 //! Appends a ustring16 and a char.
3663 template <typename TAlloc>
3664 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3666 right.insert((uchar32_t)left, 0);
3667 return std::move(right);
3671 #ifdef USTRING_CPP0X_NEWLITERALS
3672 //! Appends a ustring16 and a uchar32_t.
3673 template <typename TAlloc>
3674 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3677 return std::move(left);
3681 //! Appends a ustring16 and a uchar32_t.
3682 template <typename TAlloc>
3683 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3685 right.insert(left, 0);
3686 return std::move(right);
3691 //! Appends a ustring16 and a short.
3692 template <typename TAlloc>
3693 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3695 left.append(core::stringc(right));
3696 return std::move(left);
3700 //! Appends a ustring16 and a short.
3701 template <typename TAlloc>
3702 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3704 right.insert(core::stringc(left), 0);
3705 return std::move(right);
3709 //! Appends a ustring16 and an unsigned short.
3710 template <typename TAlloc>
3711 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3713 left.append(core::stringc(right));
3714 return std::move(left);
3718 //! Appends a ustring16 and an unsigned short.
3719 template <typename TAlloc>
3720 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3722 right.insert(core::stringc(left), 0);
3723 return std::move(right);
3727 //! Appends a ustring16 and an int.
3728 template <typename TAlloc>
3729 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3731 left.append(core::stringc(right));
3732 return std::move(left);
3736 //! Appends a ustring16 and an int.
3737 template <typename TAlloc>
3738 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3740 right.insert(core::stringc(left), 0);
3741 return std::move(right);
3745 //! Appends a ustring16 and an unsigned int.
3746 template <typename TAlloc>
3747 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3749 left.append(core::stringc(right));
3750 return std::move(left);
3754 //! Appends a ustring16 and an unsigned int.
3755 template <typename TAlloc>
3756 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3758 right.insert(core::stringc(left), 0);
3759 return std::move(right);
3763 //! Appends a ustring16 and a long.
3764 template <typename TAlloc>
3765 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3767 left.append(core::stringc(right));
3768 return std::move(left);
3772 //! Appends a ustring16 and a long.
3773 template <typename TAlloc>
3774 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3776 right.insert(core::stringc(left), 0);
3777 return std::move(right);
3781 //! Appends a ustring16 and an unsigned long.
3782 template <typename TAlloc>
3783 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3785 left.append(core::stringc(right));
3786 return std::move(left);
3790 //! Appends a ustring16 and an unsigned long.
3791 template <typename TAlloc>
3792 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3794 right.insert(core::stringc(left), 0);
3795 return std::move(right);
3799 //! Appends a ustring16 and a float.
3800 template <typename TAlloc>
3801 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3803 left.append(core::stringc(right));
3804 return std::move(left);
3808 //! Appends a ustring16 and a float.
3809 template <typename TAlloc>
3810 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3812 right.insert(core::stringc(left), 0);
3813 return std::move(right);
3817 //! Appends a ustring16 and a double.
3818 template <typename TAlloc>
3819 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3821 left.append(core::stringc(right));
3822 return std::move(left);
3826 //! Appends a ustring16 and a double.
3827 template <typename TAlloc>
3828 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3830 right.insert(core::stringc(left), 0);
3831 return std::move(right);
3836 #ifndef USTRING_NO_STL
3837 //! Writes a ustring16 to an ostream.
3838 template <typename TAlloc>
3839 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3841 out << in.toUTF8_s().c_str();
3845 //! Writes a ustring16 to a wostream.
3846 template <typename TAlloc>
3847 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3849 out << in.toWCHAR_s().c_str();
3855 #ifndef USTRING_NO_STL
3860 //! Hashing algorithm for hashing a ustring. Used for things like unordered_maps.
3861 //! Algorithm taken from std::hash<std::string>.
3862 class hash : public std::unary_function<core::ustring, size_t>
3865 size_t operator()(const core::ustring& s) const
3867 size_t ret = 2166136261U;
3869 size_t stride = 1 + s.size_raw() / 10;
3871 core::ustring::const_iterator i = s.begin();
3872 while (i != s.end())
3874 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
3875 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3883 } // end namespace unicode
3887 } // end namespace core
3888 } // end namespace irr