2 Basic Unicode string class for Irrlicht.
3 Copyright (c) 2009-2011 John Norman
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you
14 must not claim that you wrote the original software. If you use
15 this software in a product, an acknowledgment in the product
16 documentation would be appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
21 3. This notice may not be removed or altered from any source
24 The original version of this class can be located at:
25 http://irrlicht.suckerfreegames.com/
28 john@suckerfreegames.com
31 #ifndef __IRR_USTRING_H_INCLUDED__
32 #define __IRR_USTRING_H_INCLUDED__
34 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
35 # define USTRING_CPP0X
36 # if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
37 # define USTRING_CPP0X_NEWLITERALS
45 #define __BYTE_ORDER 0
46 #define __LITTLE_ENDIAN 0
47 #define __BIG_ENDIAN 1
48 #elif defined(__MACH__) && defined(__APPLE__)
49 #include <machine/endian.h>
50 #elif defined(__FreeBSD__)
51 #include <sys/endian.h>
60 #ifndef USTRING_NO_STL
67 #include "irrAllocator.h"
70 #include "irrString.h"
73 //! UTF-16 surrogate start values.
74 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
75 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
77 //! Is a UTF-16 code point a surrogate?
78 #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
79 #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
80 #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
86 // Define our character types.
87 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
88 typedef char32_t uchar32_t;
89 typedef char16_t uchar16_t;
90 typedef char uchar8_t;
92 typedef u32 uchar32_t;
93 typedef u16 uchar16_t;
103 //! The unicode replacement character. Used to replace invalid characters.
104 const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
106 //! Convert a UTF-16 surrogate pair into a UTF-32 character.
107 //! \param high The high value of the pair.
108 //! \param low The low value of the pair.
109 //! \return The UTF-32 character expressed by the surrogate pair.
110 inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
112 // Convert the surrogate pair into a single UTF-32 character.
113 uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
114 uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
115 return (wu << 16) | x;
118 //! Swaps the endianness of a 16-bit value.
119 //! \return The new value.
120 inline uchar16_t swapEndian16(const uchar16_t& c)
122 return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
125 //! Swaps the endianness of a 32-bit value.
126 //! \return The new value.
127 inline uchar32_t swapEndian32(const uchar32_t& c)
129 return ((c >> 24) & 0x000000FF) |
130 ((c >> 8) & 0x0000FF00) |
131 ((c << 8) & 0x00FF0000) |
132 ((c << 24) & 0xFF000000);
135 //! The Unicode byte order mark.
136 const u16 BOM = 0xFEFF;
138 //! The size of the Unicode byte order mark in terms of the Unicode character size.
139 const u8 BOM_UTF8_LEN = 3;
140 const u8 BOM_UTF16_LEN = 1;
141 const u8 BOM_UTF32_LEN = 1;
143 //! Unicode byte order marks for file operations.
144 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
145 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
146 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
147 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
148 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
150 //! The size in bytes of the Unicode byte marks for file operations.
151 const u8 BOM_ENCODE_UTF8_LEN = 3;
152 const u8 BOM_ENCODE_UTF16_LEN = 2;
153 const u8 BOM_ENCODE_UTF32_LEN = 4;
155 //! Unicode encoding type.
168 //! Unicode endianness.
176 //! Returns the specified unicode byte order mark in a byte array.
177 //! The byte order mark is the first few bytes in a text file that signifies its encoding.
178 /** \param mode The Unicode encoding method that we want to get the byte order mark for.
179 If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
180 //! \return An array that contains a byte order mark.
181 inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
183 #define COPY_ARRAY(source, size) \
184 memcpy(ret.pointer(), source, size); \
187 core::array<u8> ret(4);
191 COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
194 #ifdef __BIG_ENDIAN__
195 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
197 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
201 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
204 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
207 #ifdef __BIG_ENDIAN__
208 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
210 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
214 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
217 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
220 // TODO sapier: fixed warning only,
221 // don't know if something needs to be done here
229 //! Detects if the given data stream starts with a unicode BOM.
230 //! \param data The data stream to check.
231 //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
232 inline EUTF_ENCODE determineUnicodeBOM(const char* data)
234 if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
235 if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
236 if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
237 if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
238 if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
242 } // end namespace unicode
245 //! UTF-16 string class.
246 template <typename TAlloc = irrAllocator<uchar16_t> >
251 ///------------------///
252 /// iterator classes ///
253 ///------------------///
255 //! Access an element in a unicode string, allowing one to change it.
256 class _ustring16_iterator_access
259 _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
261 //! Allow the class to be interpreted as a single UTF-32 character.
262 operator uchar32_t() const
267 //! Allow one to change the character in the unicode string.
268 //! \param c The new character to use.
270 _ustring16_iterator_access& operator=(const uchar32_t c)
276 //! Increments the value by 1.
278 _ustring16_iterator_access& operator++()
284 //! Increments the value by 1, returning the old value.
285 //! \return A unicode character.
286 uchar32_t operator++(int)
288 uchar32_t old = _get();
293 //! Decrements the value by 1.
295 _ustring16_iterator_access& operator--()
301 //! Decrements the value by 1, returning the old value.
302 //! \return A unicode character.
303 uchar32_t operator--(int)
305 uchar32_t old = _get();
310 //! Adds to the value by a specified amount.
311 //! \param val The amount to add to this character.
313 _ustring16_iterator_access& operator+=(int val)
319 //! Subtracts from the value by a specified amount.
320 //! \param val The amount to subtract from this character.
322 _ustring16_iterator_access& operator-=(int val)
328 //! Multiples the value by a specified amount.
329 //! \param val The amount to multiply this character by.
331 _ustring16_iterator_access& operator*=(int val)
337 //! Divides the value by a specified amount.
338 //! \param val The amount to divide this character by.
340 _ustring16_iterator_access& operator/=(int val)
346 //! Modulos the value by a specified amount.
347 //! \param val The amount to modulo this character by.
349 _ustring16_iterator_access& operator%=(int val)
355 //! Adds to the value by a specified amount.
356 //! \param val The amount to add to this character.
357 //! \return A unicode character.
358 uchar32_t operator+(int val) const
363 //! Subtracts from the value by a specified amount.
364 //! \param val The amount to subtract from this character.
365 //! \return A unicode character.
366 uchar32_t operator-(int val) const
371 //! Multiplies the value by a specified amount.
372 //! \param val The amount to multiply this character by.
373 //! \return A unicode character.
374 uchar32_t operator*(int val) const
379 //! Divides the value by a specified amount.
380 //! \param val The amount to divide this character by.
381 //! \return A unicode character.
382 uchar32_t operator/(int val) const
387 //! Modulos the value by a specified amount.
388 //! \param val The amount to modulo this character by.
389 //! \return A unicode character.
390 uchar32_t operator%(int val) const
396 //! Gets a uchar32_t from our current position.
397 uchar32_t _get() const
399 const uchar16_t* a = ref->c_str();
400 if (!UTF16_IS_SURROGATE(a[pos]))
401 return static_cast<uchar32_t>(a[pos]);
404 if (pos + 1 >= ref->size_raw())
407 return unicode::toUTF32(a[pos], a[pos + 1]);
411 //! Sets a uchar32_t at our current position.
412 void _set(uchar32_t c)
414 ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
415 const uchar16_t* a = ref2->c_str();
418 // c will be multibyte, so split it up into the high and low surrogate pairs.
419 uchar16_t x = static_cast<uchar16_t>(c);
420 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
421 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
423 // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
424 if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
425 ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
426 else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
428 ref2->replace_raw(vh, static_cast<u32>(pos));
432 // c will be a single byte.
433 uchar16_t vh = static_cast<uchar16_t>(c);
435 // If the previous position was a surrogate pair, remove the extra byte.
436 if (UTF16_IS_SURROGATE_HI(a[pos]))
437 ref2->erase_raw(static_cast<u32>(pos) + 1);
439 ref2->replace_raw(vh, static_cast<u32>(pos));
443 const ustring16<TAlloc>* ref;
446 typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
449 //! Iterator to iterate through a UTF-16 string.
450 #ifndef USTRING_NO_STL
451 class _ustring16_const_iterator : public std::iterator<
452 std::bidirectional_iterator_tag, // iterator_category
453 access, // value_type
454 ptrdiff_t, // difference_type
455 const access, // pointer
456 const access // reference
459 class _ustring16_const_iterator
463 typedef _ustring16_const_iterator _Iter;
464 typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
465 typedef const access const_pointer;
466 typedef const access const_reference;
468 #ifndef USTRING_NO_STL
469 typedef typename _Base::value_type value_type;
470 typedef typename _Base::difference_type difference_type;
471 typedef typename _Base::difference_type distance_type;
472 typedef typename _Base::pointer pointer;
473 typedef const_reference reference;
475 typedef access value_type;
476 typedef u32 difference_type;
477 typedef u32 distance_type;
478 typedef const_pointer pointer;
479 typedef const_reference reference;
483 _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
484 _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
485 _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
487 if (ref->size_raw() == 0 || p == 0)
490 // Go to the appropriate position.
492 u32 sr = ref->size_raw();
493 const uchar16_t* a = ref->c_str();
494 while (i != 0 && pos < sr)
496 if (UTF16_IS_SURROGATE_HI(a[pos]))
503 //! Test for equalness.
504 bool operator==(const _Iter& iter) const
506 if (ref == iter.ref && pos == iter.pos)
511 //! Test for unequalness.
512 bool operator!=(const _Iter& iter) const
514 if (ref != iter.ref || pos != iter.pos)
519 //! Switch to the next full character in the string.
522 if (pos == ref->size_raw()) return *this;
523 const uchar16_t* a = ref->c_str();
524 if (UTF16_IS_SURROGATE_HI(a[pos]))
525 pos += 2; // TODO: check for valid low surrogate?
527 if (pos > ref->size_raw()) pos = ref->size_raw();
531 //! Switch to the next full character in the string, returning the previous position.
532 _Iter operator++(int)
539 //! Switch to the previous full character in the string.
542 if (pos == 0) return *this;
543 const uchar16_t* a = ref->c_str();
545 if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
550 //! Switch to the previous full character in the string, returning the previous position.
551 _Iter operator--(int)
558 //! Advance a specified number of full characters in the string.
560 _Iter& operator+=(const difference_type v)
562 if (v == 0) return *this;
563 if (v < 0) return operator-=(v * -1);
565 if (pos >= ref->size_raw())
568 // Go to the appropriate position.
569 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
571 u32 sr = ref->size_raw();
572 const uchar16_t* a = ref->c_str();
573 while (i != 0 && pos < sr)
575 if (UTF16_IS_SURROGATE_HI(a[pos]))
586 //! Go back a specified number of full characters in the string.
588 _Iter& operator-=(const difference_type v)
590 if (v == 0) return *this;
591 if (v > 0) return operator+=(v * -1);
596 // Go to the appropriate position.
597 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
599 const uchar16_t* a = ref->c_str();
600 while (i != 0 && pos != 0)
603 if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
611 //! Return a new iterator that is a variable number of full characters forward from the current position.
612 _Iter operator+(const difference_type v) const
619 //! Return a new iterator that is a variable number of full characters backward from the current position.
620 _Iter operator-(const difference_type v) const
627 //! Returns the distance between two iterators.
628 difference_type operator-(const _Iter& iter) const
630 // Make sure we reference the same object!
632 return difference_type();
657 //! Accesses the full character at the iterator's position.
658 const_reference operator*() const
660 if (pos >= ref->size_raw())
662 const uchar16_t* a = ref->c_str();
663 u32 p = ref->size_raw();
664 if (UTF16_IS_SURROGATE_LO(a[p]))
666 reference ret(ref, p);
669 const_reference ret(ref, pos);
673 //! Accesses the full character at the iterator's position.
674 reference operator*()
676 if (pos >= ref->size_raw())
678 const uchar16_t* a = ref->c_str();
679 u32 p = ref->size_raw();
680 if (UTF16_IS_SURROGATE_LO(a[p]))
682 reference ret(ref, p);
685 reference ret(ref, pos);
689 //! Accesses the full character at the iterator's position.
690 const_pointer operator->() const
695 //! Accesses the full character at the iterator's position.
701 //! Is the iterator at the start of the string?
707 //! Is the iterator at the end of the string?
710 const uchar16_t* a = ref->c_str();
711 if (UTF16_IS_SURROGATE(a[pos]))
712 return (pos + 1) >= ref->size_raw();
713 else return pos >= ref->size_raw();
716 //! Moves the iterator to the start of the string.
722 //! Moves the iterator to the end of the string.
725 pos = ref->size_raw();
728 //! Returns the iterator's position.
729 //! \return The iterator's position.
736 const ustring16<TAlloc>* ref;
740 //! Iterator to iterate through a UTF-16 string.
741 class _ustring16_iterator : public _ustring16_const_iterator
744 typedef _ustring16_iterator _Iter;
745 typedef _ustring16_const_iterator _Base;
746 typedef typename _Base::const_pointer const_pointer;
747 typedef typename _Base::const_reference const_reference;
750 typedef typename _Base::value_type value_type;
751 typedef typename _Base::difference_type difference_type;
752 typedef typename _Base::distance_type distance_type;
753 typedef access pointer;
754 typedef access reference;
760 _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
761 _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
762 _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
764 //! Accesses the full character at the iterator's position.
765 reference operator*() const
767 if (pos >= ref->size_raw())
769 const uchar16_t* a = ref->c_str();
770 u32 p = ref->size_raw();
771 if (UTF16_IS_SURROGATE_LO(a[p]))
773 reference ret(ref, p);
776 reference ret(ref, pos);
780 //! Accesses the full character at the iterator's position.
781 reference operator*()
783 if (pos >= ref->size_raw())
785 const uchar16_t* a = ref->c_str();
786 u32 p = ref->size_raw();
787 if (UTF16_IS_SURROGATE_LO(a[p]))
789 reference ret(ref, p);
792 reference ret(ref, pos);
796 //! Accesses the full character at the iterator's position.
797 pointer operator->() const
802 //! Accesses the full character at the iterator's position.
809 typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
810 typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
812 ///----------------------///
813 /// end iterator classes ///
814 ///----------------------///
816 //! Default constructor
818 : array(0), allocated(1), used(0)
820 #if __BYTE_ORDER == __BIG_ENDIAN
821 encoding = unicode::EUTFE_UTF16_BE;
823 encoding = unicode::EUTFE_UTF16_LE;
825 array = allocator.allocate(1); // new u16[1];
831 ustring16(const ustring16<TAlloc>& other)
832 : array(0), allocated(0), used(0)
834 #if __BYTE_ORDER == __BIG_ENDIAN
835 encoding = unicode::EUTFE_UTF16_BE;
837 encoding = unicode::EUTFE_UTF16_LE;
843 //! Constructor from other string types
844 template <class B, class A>
845 ustring16(const string<B, A>& other)
846 : array(0), allocated(0), used(0)
848 #if __BYTE_ORDER == __BIG_ENDIAN
849 encoding = unicode::EUTFE_UTF16_BE;
851 encoding = unicode::EUTFE_UTF16_LE;
857 #ifndef USTRING_NO_STL
858 //! Constructor from std::string
859 template <class B, class A, typename Alloc>
860 ustring16(const std::basic_string<B, A, Alloc>& other)
861 : array(0), allocated(0), used(0)
863 #if __BYTE_ORDER == __BIG_ENDIAN
864 encoding = unicode::EUTFE_UTF16_BE;
866 encoding = unicode::EUTFE_UTF16_LE;
868 *this = other.c_str();
872 //! Constructor from iterator.
873 template <typename Itr>
874 ustring16(Itr first, Itr last)
875 : array(0), allocated(0), used(0)
877 #if __BYTE_ORDER == __BIG_ENDIAN
878 encoding = unicode::EUTFE_UTF16_BE;
880 encoding = unicode::EUTFE_UTF16_LE;
882 reserve(std::distance(first, last));
885 for (; first != last; ++first)
886 append((uchar32_t)*first);
891 #ifndef USTRING_CPP0X_NEWLITERALS
892 //! Constructor for copying a character string from a pointer.
893 ustring16(const char* const c)
894 : array(0), allocated(0), used(0)
896 #if __BYTE_ORDER == __BIG_ENDIAN
897 encoding = unicode::EUTFE_UTF16_BE;
899 encoding = unicode::EUTFE_UTF16_LE;
902 loadDataStream(c, strlen(c));
903 //append((uchar8_t*)c);
907 //! Constructor for copying a character string from a pointer with a given length.
908 ustring16(const char* const c, u32 length)
909 : array(0), allocated(0), used(0)
911 #if __BYTE_ORDER == __BIG_ENDIAN
912 encoding = unicode::EUTFE_UTF16_BE;
914 encoding = unicode::EUTFE_UTF16_LE;
917 loadDataStream(c, length);
922 //! Constructor for copying a UTF-8 string from a pointer.
923 ustring16(const uchar8_t* const c)
924 : array(0), allocated(0), used(0)
926 #if __BYTE_ORDER == __BIG_ENDIAN
927 encoding = unicode::EUTFE_UTF16_BE;
929 encoding = unicode::EUTFE_UTF16_LE;
936 //! Constructor for copying a UTF-8 string from a single char.
937 ustring16(const char c)
938 : array(0), allocated(0), used(0)
940 #if __BYTE_ORDER == __BIG_ENDIAN
941 encoding = unicode::EUTFE_UTF16_BE;
943 encoding = unicode::EUTFE_UTF16_LE;
946 append((uchar32_t)c);
950 //! Constructor for copying a UTF-8 string from a pointer with a given length.
951 ustring16(const uchar8_t* const c, u32 length)
952 : array(0), allocated(0), used(0)
954 #if __BYTE_ORDER == __BIG_ENDIAN
955 encoding = unicode::EUTFE_UTF16_BE;
957 encoding = unicode::EUTFE_UTF16_LE;
964 //! Constructor for copying a UTF-16 string from a pointer.
965 ustring16(const uchar16_t* const c)
966 : array(0), allocated(0), used(0)
968 #if __BYTE_ORDER == __BIG_ENDIAN
969 encoding = unicode::EUTFE_UTF16_BE;
971 encoding = unicode::EUTFE_UTF16_LE;
978 //! Constructor for copying a UTF-16 string from a pointer with a given length
979 ustring16(const uchar16_t* const c, u32 length)
980 : array(0), allocated(0), used(0)
982 #if __BYTE_ORDER == __BIG_ENDIAN
983 encoding = unicode::EUTFE_UTF16_BE;
985 encoding = unicode::EUTFE_UTF16_LE;
992 //! Constructor for copying a UTF-32 string from a pointer.
993 ustring16(const uchar32_t* const c)
994 : array(0), allocated(0), used(0)
996 #if __BYTE_ORDER == __BIG_ENDIAN
997 encoding = unicode::EUTFE_UTF16_BE;
999 encoding = unicode::EUTFE_UTF16_LE;
1006 //! Constructor for copying a UTF-32 from a pointer with a given length.
1007 ustring16(const uchar32_t* const c, u32 length)
1008 : array(0), allocated(0), used(0)
1010 #if __BYTE_ORDER == __BIG_ENDIAN
1011 encoding = unicode::EUTFE_UTF16_BE;
1013 encoding = unicode::EUTFE_UTF16_LE;
1020 //! Constructor for copying a wchar_t string from a pointer.
1021 ustring16(const wchar_t* const c)
1022 : array(0), allocated(0), used(0)
1024 #if __BYTE_ORDER == __BIG_ENDIAN
1025 encoding = unicode::EUTFE_UTF16_BE;
1027 encoding = unicode::EUTFE_UTF16_LE;
1030 if (sizeof(wchar_t) == 4)
1031 append(reinterpret_cast<const uchar32_t* const>(c));
1032 else if (sizeof(wchar_t) == 2)
1033 append(reinterpret_cast<const uchar16_t* const>(c));
1034 else if (sizeof(wchar_t) == 1)
1035 append(reinterpret_cast<const uchar8_t* const>(c));
1039 //! Constructor for copying a wchar_t string from a pointer with a given length.
1040 ustring16(const wchar_t* const c, u32 length)
1041 : array(0), allocated(0), used(0)
1043 #if __BYTE_ORDER == __BIG_ENDIAN
1044 encoding = unicode::EUTFE_UTF16_BE;
1046 encoding = unicode::EUTFE_UTF16_LE;
1049 if (sizeof(wchar_t) == 4)
1050 append(reinterpret_cast<const uchar32_t* const>(c), length);
1051 else if (sizeof(wchar_t) == 2)
1052 append(reinterpret_cast<const uchar16_t* const>(c), length);
1053 else if (sizeof(wchar_t) == 1)
1054 append(reinterpret_cast<const uchar8_t* const>(c), length);
1058 #ifdef USTRING_CPP0X
1059 //! Constructor for moving a ustring16
1060 ustring16(ustring16<TAlloc>&& other)
1061 : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1063 //std::cout << "MOVE constructor" << std::endl;
1065 other.allocated = 0;
1074 allocator.deallocate(array); // delete [] array;
1078 //! Assignment operator
1079 ustring16& operator=(const ustring16<TAlloc>& other)
1084 used = other.size_raw();
1085 if (used >= allocated)
1087 allocator.deallocate(array); // delete [] array;
1088 allocated = used + 1;
1089 array = allocator.allocate(used + 1); //new u16[used];
1092 const uchar16_t* p = other.c_str();
1093 for (u32 i=0; i<=used; ++i, ++p)
1098 // Validate our new UTF-16 string.
1105 #ifdef USTRING_CPP0X
1106 //! Move assignment operator
1107 ustring16& operator=(ustring16<TAlloc>&& other)
1111 //std::cout << "MOVE operator=" << std::endl;
1112 allocator.deallocate(array);
1114 array = other.array;
1115 allocated = other.allocated;
1116 encoding = other.encoding;
1126 //! Assignment operator for other string types
1127 template <class B, class A>
1128 ustring16<TAlloc>& operator=(const string<B, A>& other)
1130 *this = other.c_str();
1135 //! Assignment operator for UTF-8 strings
1136 ustring16<TAlloc>& operator=(const uchar8_t* const c)
1140 array = allocator.allocate(1); //new u16[1];
1145 if (!c) return *this;
1147 //! Append our string now.
1153 //! Assignment operator for UTF-16 strings
1154 ustring16<TAlloc>& operator=(const uchar16_t* const c)
1158 array = allocator.allocate(1); //new u16[1];
1163 if (!c) return *this;
1165 //! Append our string now.
1171 //! Assignment operator for UTF-32 strings
1172 ustring16<TAlloc>& operator=(const uchar32_t* const c)
1176 array = allocator.allocate(1); //new u16[1];
1181 if (!c) return *this;
1183 //! Append our string now.
1189 //! Assignment operator for wchar_t strings.
1190 /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
1191 Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
1192 This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
1193 ustring16<TAlloc>& operator=(const wchar_t* const c)
1195 if (sizeof(wchar_t) == 4)
1196 *this = reinterpret_cast<const uchar32_t* const>(c);
1197 else if (sizeof(wchar_t) == 2)
1198 *this = reinterpret_cast<const uchar16_t* const>(c);
1199 else if (sizeof(wchar_t) == 1)
1200 *this = reinterpret_cast<const uchar8_t* const>(c);
1206 //! Assignment operator for other strings.
1207 /** Note that this assumes that a correct unicode string is stored in the string. **/
1209 ustring16<TAlloc>& operator=(const B* const c)
1212 *this = reinterpret_cast<const uchar32_t* const>(c);
1213 else if (sizeof(B) == 2)
1214 *this = reinterpret_cast<const uchar16_t* const>(c);
1215 else if (sizeof(B) == 1)
1216 *this = reinterpret_cast<const uchar8_t* const>(c);
1222 //! Direct access operator
1223 access operator [](const u32 index)
1225 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1226 iterator iter(*this, index);
1227 return iter.operator*();
1231 //! Direct access operator
1232 const access operator [](const u32 index) const
1234 _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1235 const_iterator iter(*this, index);
1236 return iter.operator*();
1240 //! Equality operator
1241 bool operator ==(const uchar16_t* const str) const
1247 for(i=0; array[i] && str[i]; ++i)
1248 if (array[i] != str[i])
1251 return !array[i] && !str[i];
1255 //! Equality operator
1256 bool operator ==(const ustring16<TAlloc>& other) const
1258 for(u32 i=0; array[i] && other.array[i]; ++i)
1259 if (array[i] != other.array[i])
1262 return used == other.used;
1266 //! Is smaller comparator
1267 bool operator <(const ustring16<TAlloc>& other) const
1269 for(u32 i=0; array[i] && other.array[i]; ++i)
1271 s32 diff = array[i] - other.array[i];
1276 return used < other.used;
1280 //! Inequality operator
1281 bool operator !=(const uchar16_t* const str) const
1283 return !(*this == str);
1287 //! Inequality operator
1288 bool operator !=(const ustring16<TAlloc>& other) const
1290 return !(*this == other);
1294 //! Returns the length of a ustring16 in full characters.
1295 //! \return Length of a ustring16 in full characters.
1298 const_iterator i(*this, 0);
1309 //! Informs if the ustring is empty or not.
1310 //! \return True if the ustring is empty, false if not.
1313 return (size_raw() == 0);
1317 //! Returns a pointer to the raw UTF-16 string data.
1318 //! \return pointer to C-style NUL terminated array of UTF-16 code points.
1319 const uchar16_t* c_str() const
1325 //! Compares the first n characters of this string with another.
1326 //! \param other Other string to compare to.
1327 //! \param n Number of characters to compare.
1328 //! \return True if the n first characters of both strings are equal.
1329 bool equalsn(const ustring16<TAlloc>& other, u32 n) const
1332 const uchar16_t* oa = other.c_str();
1333 for(i=0; array[i] && oa[i] && i < n; ++i)
1334 if (array[i] != oa[i])
1337 // if one (or both) of the strings was smaller then they
1338 // are only equal if they have the same length
1339 return (i == n) || (used == other.used);
1343 //! Compares the first n characters of this string with another.
1344 //! \param str Other string to compare to.
1345 //! \param n Number of characters to compare.
1346 //! \return True if the n first characters of both strings are equal.
1347 bool equalsn(const uchar16_t* const str, u32 n) const
1352 for(i=0; array[i] && str[i] && i < n; ++i)
1353 if (array[i] != str[i])
1356 // if one (or both) of the strings was smaller then they
1357 // are only equal if they have the same length
1358 return (i == n) || (array[i] == 0 && str[i] == 0);
1362 //! Appends a character to this ustring16
1363 //! \param character The character to append.
1364 //! \return A reference to our current string.
1365 ustring16<TAlloc>& append(uchar32_t character)
1367 if (used + 2 >= allocated)
1368 reallocate(used + 2);
1370 if (character > 0xFFFF)
1374 // character will be multibyte, so split it up into a surrogate pair.
1375 uchar16_t x = static_cast<uchar16_t>(character);
1376 uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1377 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1384 array[used-1] = character;
1392 //! Appends a UTF-8 string to this ustring16
1393 //! \param other The UTF-8 string to append.
1394 //! \param length The length of the string to append.
1395 //! \return A reference to our current string.
1396 ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
1401 // Determine if the string is long enough for a BOM.
1403 const uchar8_t* p = other;
1407 } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1410 unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
1411 if (len == unicode::BOM_ENCODE_UTF8_LEN)
1413 if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1414 c_bom = unicode::EUTFE_UTF8;
1417 // If a BOM was found, don't include it in the string.
1418 const uchar8_t* c2 = other;
1419 if (c_bom != unicode::EUTFE_NONE)
1421 c2 = other + unicode::BOM_UTF8_LEN;
1422 length -= unicode::BOM_UTF8_LEN;
1425 // Calculate the size of the string to read in.
1431 } while(*p++ && len < length);
1435 // If we need to grow the array, do it now.
1436 if (used + len >= allocated)
1437 reallocate(used + (len * 2));
1440 // Convert UTF-8 to UTF-16.
1442 for (u32 l = 0; l<len;)
1445 if (((c2[l] >> 6) & 0x03) == 0x02)
1446 { // Invalid continuation byte.
1447 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1450 else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1451 { // Invalid byte - overlong encoding.
1452 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1455 else if ((c2[l] & 0xF8) == 0xF0)
1456 { // 4 bytes UTF-8, 2 bytes UTF-16.
1457 // Check for a full string.
1460 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1468 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1469 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1470 if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1473 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1479 uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1480 uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1481 uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1482 uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1484 // Split v up into a surrogate pair.
1485 uchar16_t x = static_cast<uchar16_t>(v);
1486 uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1487 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1492 ++used; // Using two shorts this time, so increase used by 1.
1494 else if ((c2[l] & 0xF0) == 0xE0)
1495 { // 3 bytes UTF-8, 1 byte UTF-16.
1496 // Check for a full string.
1499 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1507 if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1508 if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1511 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1517 uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1518 uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1519 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1523 else if ((c2[l] & 0xE0) == 0xC0)
1524 { // 2 bytes UTF-8, 1 byte UTF-16.
1525 // Check for a full string.
1528 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1534 if (((c2[l+1] >> 6) & 0x03) != 0x02)
1536 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1542 uchar8_t b1 = (c2[l] >> 2) & 0x7;
1543 uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1544 uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1549 { // 1 byte UTF-8, 1 byte UTF-16.
1552 { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
1553 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1555 else array[pos++] = static_cast<uchar16_t>(c2[l]);
1561 // Validate our new UTF-16 string.
1568 //! Appends a UTF-16 string to this ustring16
1569 //! \param other The UTF-16 string to append.
1570 //! \param length The length of the string to append.
1571 //! \return A reference to our current string.
1572 ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
1577 // Determine if the string is long enough for a BOM.
1579 const uchar16_t* p = other;
1583 } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1585 // Check for the BOM to determine the string's endianness.
1586 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1587 if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1588 c_end = unicode::EUTFEE_LITTLE;
1589 else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1590 c_end = unicode::EUTFEE_BIG;
1592 // If a BOM was found, don't include it in the string.
1593 const uchar16_t* c2 = other;
1594 if (c_end != unicode::EUTFEE_NATIVE)
1596 c2 = other + unicode::BOM_UTF16_LEN;
1597 length -= unicode::BOM_UTF16_LEN;
1600 // Calculate the size of the string to read in.
1606 } while(*p++ && len < length);
1610 // If we need to grow the size of the array, do it now.
1611 if (used + len >= allocated)
1612 reallocate(used + (len * 2));
1616 // Copy the string now.
1617 unicode::EUTF_ENDIAN m_end = getEndianness();
1618 for (u32 l = start; l < start + len; ++l)
1620 array[l] = (uchar16_t)c2[l];
1621 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1622 array[l] = unicode::swapEndian16(array[l]);
1627 // Validate our new UTF-16 string.
1633 //! Appends a UTF-32 string to this ustring16
1634 //! \param other The UTF-32 string to append.
1635 //! \param length The length of the string to append.
1636 //! \return A reference to our current string.
1637 ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
1642 // Check for the BOM to determine the string's endianness.
1643 unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
1644 if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1645 c_end = unicode::EUTFEE_LITTLE;
1646 else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1647 c_end = unicode::EUTFEE_BIG;
1649 // If a BOM was found, don't include it in the string.
1650 const uchar32_t* c2 = other;
1651 if (c_end != unicode::EUTFEE_NATIVE)
1653 c2 = other + unicode::BOM_UTF32_LEN;
1654 length -= unicode::BOM_UTF32_LEN;
1657 // Calculate the size of the string to read in.
1659 const uchar32_t* p = c2;
1663 } while(*p++ && len < length);
1667 // If we need to grow the size of the array, do it now.
1668 // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1669 if (used + (len * 2) >= allocated)
1670 reallocate(used + ((len * 2) * 2));
1673 // Convert UTF-32 to UTF-16.
1674 unicode::EUTF_ENDIAN m_end = getEndianness();
1676 for (u32 l = 0; l<len; ++l)
1680 uchar32_t ch = c2[l];
1681 if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1682 ch = unicode::swapEndian32(ch);
1686 // Split ch up into a surrogate pair as it is over 16 bits long.
1687 uchar16_t x = static_cast<uchar16_t>(ch);
1688 uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1689 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1692 ++used; // Using two shorts, so increased used again.
1694 else if (ch >= 0xD800 && ch <= 0xDFFF)
1696 // Between possible UTF-16 surrogates (invalid!)
1697 array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
1699 else array[pos++] = static_cast<uchar16_t>(ch);
1703 // Validate our new UTF-16 string.
1710 //! Appends a ustring16 to this ustring16
1711 //! \param other The string to append to this one.
1712 //! \return A reference to our current string.
1713 ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
1715 const uchar16_t* oa = other.c_str();
1717 u32 len = other.size_raw();
1719 if (used + len >= allocated)
1720 reallocate(used + len);
1722 for (u32 l=0; l<len; ++l)
1723 array[used+l] = oa[l];
1732 //! Appends a certain amount of characters of a ustring16 to this ustring16.
1733 //! \param other The string to append to this one.
1734 //! \param length How many characters of the other string to add to this one.
1735 //! \return A reference to our current string.
1736 ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
1738 if (other.size() == 0)
1741 if (other.size() < length)
1747 if (used + length * 2 >= allocated)
1748 reallocate(used + length * 2);
1750 const_iterator iter(other, 0);
1752 while (!iter.atEnd() && l)
1754 uchar32_t c = *iter;
1764 //! Reserves some memory.
1765 //! \param count The amount of characters to reserve.
1766 void reserve(u32 count)
1768 if (count < allocated)
1775 //! Finds first occurrence of character.
1776 //! \param c The character to search for.
1777 //! \return Position where the character has been found, or -1 if not found.
1778 s32 findFirst(uchar32_t c) const
1780 const_iterator i(*this, 0);
1795 //! Finds first occurrence of a character of a list.
1796 //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
1797 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1798 //! \return Position where one of the characters has been found, or -1 if not found.
1799 s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
1804 const_iterator i(*this, 0);
1810 for (u32 j=0; j<count; ++j)
1821 //! Finds first position of a character not in a given list.
1822 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1823 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1824 //! \return Position where the character has been found, or -1 if not found.
1825 s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
1830 const_iterator i(*this, 0);
1837 for (j=0; j<count; ++j)
1850 //! Finds last position of a character not in a given list.
1851 //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
1852 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1853 //! \return Position where the character has been found, or -1 if not found.
1854 s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
1859 const_iterator i(end());
1862 s32 pos = size() - 1;
1863 while (!i.atStart())
1867 for (j=0; j<count; ++j)
1880 //! Finds next occurrence of character.
1881 //! \param c The character to search for.
1882 //! \param startPos The position in the string to start searching.
1883 //! \return Position where the character has been found, or -1 if not found.
1884 s32 findNext(uchar32_t c, u32 startPos) const
1886 const_iterator i(*this, startPos);
1902 //! Finds last occurrence of character.
1903 //! \param c The character to search for.
1904 //! \param start The start position of the reverse search ( default = -1, on end ).
1905 //! \return Position where the character has been found, or -1 if not found.
1906 s32 findLast(uchar32_t c, s32 start = -1) const
1909 start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
1911 const_iterator i(*this, start);
1913 while (!i.atStart())
1925 //! Finds last occurrence of a character in a list.
1926 //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
1927 //! \param count The amount of characters in the list. Usually, this should be strlen(c).
1928 //! \return Position where one of the characters has been found, or -1 if not found.
1929 s32 findLastChar(const uchar32_t* const c, u32 count=1) const
1934 const_iterator i(end());
1938 while (!i.atStart())
1941 for (u32 j=0; j<count; ++j)
1952 //! Finds another ustring16 in this ustring16.
1953 //! \param str The string to find.
1954 //! \param start The start position of the search.
1955 //! \return Positions where the ustring16 has been found, or -1 if not found.
1956 s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
1958 u32 my_size = size();
1959 u32 their_size = str.size();
1961 if (their_size == 0 || my_size - start < their_size)
1964 const_iterator i(*this, start);
1969 const_iterator i2(i);
1970 const_iterator j(str, 0);
1971 uchar32_t t1 = (uchar32_t)*i2;
1972 uchar32_t t2 = (uchar32_t)*j;
1979 t1 = (uchar32_t)*i2;
1990 //! Finds another ustring16 in this ustring16.
1991 //! \param str The string to find.
1992 //! \param start The start position of the search.
1993 //! \return Positions where the string has been found, or -1 if not found.
1994 s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
1996 const uchar16_t* data = str.c_str();
2007 for (u32 i=start; i<=used-len; ++i)
2011 while(data[j] && array[i+j] == data[j])
2023 //! Returns a substring.
2024 //! \param begin: Start of substring.
2025 //! \param length: Length of substring.
2026 //! \return A reference to our current string.
2027 ustring16<TAlloc> subString(u32 begin, s32 length) const
2030 // if start after ustring16
2031 // or no proper substring length
2032 if ((length <= 0) || (begin>=len))
2033 return ustring16<TAlloc>("");
2034 // clamp length to maximal value
2035 if ((length+begin) > len)
2038 ustring16<TAlloc> o;
2039 o.reserve((length+1) * 2);
2041 const_iterator i(*this, begin);
2042 while (!i.atEnd() && length)
2053 //! Appends a character to this ustring16.
2054 //! \param c Character to append.
2055 //! \return A reference to our current string.
2056 ustring16<TAlloc>& operator += (char c)
2058 append((uchar32_t)c);
2063 //! Appends a character to this ustring16.
2064 //! \param c Character to append.
2065 //! \return A reference to our current string.
2066 ustring16<TAlloc>& operator += (uchar32_t c)
2073 //! Appends a number to this ustring16.
2074 //! \param c Number to append.
2075 //! \return A reference to our current string.
2076 ustring16<TAlloc>& operator += (short c)
2078 append(core::stringc(c));
2083 //! Appends a number to this ustring16.
2084 //! \param c Number to append.
2085 //! \return A reference to our current string.
2086 ustring16<TAlloc>& operator += (unsigned short c)
2088 append(core::stringc(c));
2093 #ifdef USTRING_CPP0X_NEWLITERALS
2094 //! Appends a number to this ustring16.
2095 //! \param c Number to append.
2096 //! \return A reference to our current string.
2097 ustring16<TAlloc>& operator += (int c)
2099 append(core::stringc(c));
2104 //! Appends a number to this ustring16.
2105 //! \param c Number to append.
2106 //! \return A reference to our current string.
2107 ustring16<TAlloc>& operator += (unsigned int c)
2109 append(core::stringc(c));
2115 //! Appends a number to this ustring16.
2116 //! \param c Number to append.
2117 //! \return A reference to our current string.
2118 ustring16<TAlloc>& operator += (long c)
2120 append(core::stringc(c));
2125 //! Appends a number to this ustring16.
2126 //! \param c Number to append.
2127 //! \return A reference to our current string.
2128 ustring16<TAlloc>& operator += (unsigned long c)
2130 append(core::stringc(c));
2135 //! Appends a number to this ustring16.
2136 //! \param c Number to append.
2137 //! \return A reference to our current string.
2138 ustring16<TAlloc>& operator += (double c)
2140 append(core::stringc(c));
2145 //! Appends a char ustring16 to this ustring16.
2146 //! \param c Char ustring16 to append.
2147 //! \return A reference to our current string.
2148 ustring16<TAlloc>& operator += (const uchar16_t* const c)
2155 //! Appends a ustring16 to this ustring16.
2156 //! \param other ustring16 to append.
2157 //! \return A reference to our current string.
2158 ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
2165 //! Replaces all characters of a given type with another one.
2166 //! \param toReplace Character to replace.
2167 //! \param replaceWith Character replacing the old one.
2168 //! \return A reference to our current string.
2169 ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
2171 iterator i(*this, 0);
2174 typename ustring16<TAlloc>::access a = *i;
2175 if ((uchar32_t)a == toReplace)
2183 //! Replaces all instances of a string with another one.
2184 //! \param toReplace The string to replace.
2185 //! \param replaceWith The string replacing the old one.
2186 //! \return A reference to our current string.
2187 ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2189 if (toReplace.size() == 0)
2192 const uchar16_t* other = toReplace.c_str();
2193 const uchar16_t* replace = replaceWith.c_str();
2194 const u32 other_size = toReplace.size_raw();
2195 const u32 replace_size = replaceWith.size_raw();
2197 // Determine the delta. The algorithm will change depending on the delta.
2198 s32 delta = replace_size - other_size;
2200 // A character for character replace. The string will not shrink or grow.
2204 while ((pos = find_raw(other, pos)) != -1)
2206 for (u32 i = 0; i < replace_size; ++i)
2207 array[pos + i] = replace[i];
2213 // We are going to be removing some characters. The string will shrink.
2217 for (u32 pos = 0; pos <= used; ++i, ++pos)
2219 // Is this potentially a match?
2220 if (array[pos] == *other)
2222 // Check to see if we have a match.
2224 for (j = 0; j < other_size; ++j)
2226 if (array[pos + j] != other[j])
2230 // If we have a match, replace characters.
2231 if (j == other_size)
2233 for (j = 0; j < replace_size; ++j)
2234 array[i + j] = replace[j];
2235 i += replace_size - 1;
2236 pos += other_size - 1;
2241 // No match found, just copy characters.
2242 array[i - 1] = array[pos];
2250 // We are going to be adding characters, so the string size will increase.
2251 // Count the number of times toReplace exists in the string so we can allocate the new size.
2254 while ((pos = find_raw(other, pos)) != -1)
2260 // Re-allocate the string now, if needed.
2261 u32 len = delta * find_count;
2262 if (used + len >= allocated)
2263 reallocate(used + len);
2267 while ((pos = find_raw(other, pos)) != -1)
2269 uchar16_t* start = array + pos + other_size - 1;
2270 uchar16_t* ptr = array + used;
2271 uchar16_t* end = array + used + delta;
2273 // Shift characters to make room for the string.
2274 while (ptr != start)
2281 // Add the new string now.
2282 for (u32 i = 0; i < replace_size; ++i)
2283 array[pos + i] = replace[i];
2285 pos += replace_size;
2289 // Terminate the string and return ourself.
2295 //! Removes characters from a ustring16..
2296 //! \param c The character to remove.
2297 //! \return A reference to our current string.
2298 ustring16<TAlloc>& remove(uchar32_t c)
2302 u32 len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2303 for (u32 i=0; i<=used; ++i)
2306 if (!UTF16_IS_SURROGATE_HI(array[i]))
2308 else if (i + 1 <= used)
2310 // Convert the surrogate pair into a single UTF-32 character.
2311 uc32 = unicode::toUTF32(array[i], array[i + 1]);
2313 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2321 array[pos++] = array[i];
2323 array[pos++] = array[++i];
2331 //! Removes a ustring16 from the ustring16.
2332 //! \param toRemove The string to remove.
2333 //! \return A reference to our current string.
2334 ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
2336 u32 size = toRemove.size_raw();
2337 if (size == 0) return *this;
2339 const uchar16_t* tra = toRemove.c_str();
2342 for (u32 i=0; i<=used; ++i)
2347 if (array[i + j] != tra[j])
2358 array[pos++] = array[i];
2366 //! Removes characters from the ustring16.
2367 //! \param characters The characters to remove.
2368 //! \return A reference to our current string.
2369 ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
2371 if (characters.size_raw() == 0)
2376 const_iterator iter(characters);
2377 for (u32 i=0; i<=used; ++i)
2380 if (!UTF16_IS_SURROGATE_HI(array[i]))
2382 else if (i + 1 <= used)
2384 // Convert the surrogate pair into a single UTF-32 character.
2385 uc32 = unicode::toUTF32(array[i], array[i+1]);
2387 u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
2391 while (!iter.atEnd())
2393 uchar32_t c = *iter;
2396 found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2405 array[pos++] = array[i];
2407 array[pos++] = array[++i];
2415 //! Trims the ustring16.
2416 //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
2417 //! \param whitespace The characters that are to be considered as whitespace.
2418 //! \return A reference to our current string.
2419 ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2421 core::array<uchar32_t> utf32white = whitespace.toUTF32();
2423 // find start and end of the substring without the specified characters
2424 const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2428 const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2430 return (*this = subString(begin, (end +1) - begin));
2434 //! Erases a character from the ustring16.
2435 //! May be slow, because all elements following after the erased element have to be copied.
2436 //! \param index Index of element to be erased.
2437 //! \return A reference to our current string.
2438 ustring16<TAlloc>& erase(u32 index)
2440 _IRR_DEBUG_BREAK_IF(index>used) // access violation
2442 iterator i(*this, index);
2445 u32 len = (t > 0xFFFF ? 2 : 1);
2447 for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
2448 array[j - len] = array[j];
2457 //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
2458 //! \return A reference to our current string.
2459 ustring16<TAlloc>& validate()
2461 // Validate all unicode characters.
2462 for (u32 i=0; i<allocated; ++i)
2464 // Terminate on existing null.
2470 if (UTF16_IS_SURROGATE(array[i]))
2472 if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2473 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2474 else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2475 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2478 if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2479 array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
2486 used = allocated - 1;
2493 //! Gets the last char of the ustring16, or 0.
2494 //! \return The last char of the ustring16, or 0.
2495 uchar32_t lastChar() const
2500 if (UTF16_IS_SURROGATE_LO(array[used-1]))
2502 // Make sure we have a paired surrogate.
2506 // Check for an invalid surrogate.
2507 if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2510 // Convert the surrogate pair into a single UTF-32 character.
2511 return unicode::toUTF32(array[used-2], array[used-1]);
2515 return array[used-1];
2520 //! Split the ustring16 into parts.
2521 /** This method will split a ustring16 at certain delimiter characters
2522 into the container passed in as reference. The type of the container
2523 has to be given as template parameter. It must provide a push_back and
2525 \param ret The result container
2526 \param c C-style ustring16 of delimiter characters
2527 \param count Number of delimiter characters
2528 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2529 container. If two delimiters occur without a character in between, an
2530 empty substring would be placed in the result. If this flag is set,
2531 only non-empty strings are stored.
2532 \param keepSeparators Flag which allows to add the separator to the
2533 result ustring16. If this flag is true, the concatenation of the
2534 substrings results in the original ustring16. Otherwise, only the
2535 characters between the delimiters are returned.
2536 \return The number of resulting substrings
2538 template<class container>
2539 u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2544 const_iterator i(*this);
2545 const u32 oldSize=ret.size();
2549 bool lastWasSeparator = false;
2553 bool foundSeparator = false;
2554 for (u32 j=0; j<count; ++j)
2558 if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2560 ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2561 foundSeparator = true;
2562 lastpos = (keepSeparators ? pos : pos + 1);
2563 lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2567 lastWasSeparator = foundSeparator;
2573 ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2574 return ret.size()-oldSize;
2578 //! Split the ustring16 into parts.
2579 /** This method will split a ustring16 at certain delimiter characters
2580 into the container passed in as reference. The type of the container
2581 has to be given as template parameter. It must provide a push_back and
2583 \param ret The result container
2584 \param c A unicode string of delimiter characters
2585 \param ignoreEmptyTokens Flag to avoid empty substrings in the result
2586 container. If two delimiters occur without a character in between, an
2587 empty substring would be placed in the result. If this flag is set,
2588 only non-empty strings are stored.
2589 \param keepSeparators Flag which allows to add the separator to the
2590 result ustring16. If this flag is true, the concatenation of the
2591 substrings results in the original ustring16. Otherwise, only the
2592 characters between the delimiters are returned.
2593 \return The number of resulting substrings
2595 template<class container>
2596 u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2598 core::array<uchar32_t> v = c.toUTF32();
2599 return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2603 //! Gets the size of the allocated memory buffer for the string.
2604 //! \return The size of the allocated memory buffer.
2605 u32 capacity() const
2611 //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
2612 //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
2613 u32 size_raw() const
2619 //! Inserts a character into the string.
2620 //! \param c The character to insert.
2621 //! \param pos The position to insert the character.
2622 //! \return A reference to our current string.
2623 ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
2625 u8 len = (c > 0xFFFF ? 2 : 1);
2627 if (used + len >= allocated)
2628 reallocate(used + len);
2632 iterator iter(*this, pos);
2633 for (u32 i = used - 2; i > iter.getPos(); --i)
2634 array[i] = array[i - len];
2638 // c will be multibyte, so split it up into a surrogate pair.
2639 uchar16_t x = static_cast<uchar16_t>(c);
2640 uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2641 uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2642 array[iter.getPos()] = vh;
2643 array[iter.getPos()+1] = vl;
2647 array[iter.getPos()] = static_cast<uchar16_t>(c);
2654 //! Inserts a string into the string.
2655 //! \param c The string to insert.
2656 //! \param pos The position to insert the string.
2657 //! \return A reference to our current string.
2658 ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
2660 u32 len = c.size_raw();
2661 if (len == 0) return *this;
2663 if (used + len >= allocated)
2664 reallocate(used + len);
2668 iterator iter(*this, pos);
2669 for (u32 i = used - 2; i > iter.getPos() + len; --i)
2670 array[i] = array[i - len];
2672 const uchar16_t* s = c.c_str();
2673 for (u32 i = 0; i < len; ++i)
2684 //! Inserts a character into the string.
2685 //! \param c The character to insert.
2686 //! \param pos The position to insert the character.
2687 //! \return A reference to our current string.
2688 ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
2690 if (used + 1 >= allocated)
2691 reallocate(used + 1);
2695 for (u32 i = used - 1; i > pos; --i)
2696 array[i] = array[i - 1];
2704 //! Removes a character from string.
2705 //! \param pos Position of the character to remove.
2706 //! \return A reference to our current string.
2707 ustring16<TAlloc>& erase_raw(u32 pos)
2709 for (u32 i=pos; i<=used; ++i)
2711 array[i] = array[i + 1];
2719 //! Replaces a character in the string.
2720 //! \param c The new character.
2721 //! \param pos The position of the character to replace.
2722 //! \return A reference to our current string.
2723 ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
2730 //! Returns an iterator to the beginning of the string.
2731 //! \return An iterator to the beginning of the string.
2734 iterator i(*this, 0);
2739 //! Returns an iterator to the beginning of the string.
2740 //! \return An iterator to the beginning of the string.
2741 const_iterator begin() const
2743 const_iterator i(*this, 0);
2748 //! Returns an iterator to the beginning of the string.
2749 //! \return An iterator to the beginning of the string.
2750 const_iterator cbegin() const
2752 const_iterator i(*this, 0);
2757 //! Returns an iterator to the end of the string.
2758 //! \return An iterator to the end of the string.
2761 iterator i(*this, 0);
2767 //! Returns an iterator to the end of the string.
2768 //! \return An iterator to the end of the string.
2769 const_iterator end() const
2771 const_iterator i(*this, 0);
2777 //! Returns an iterator to the end of the string.
2778 //! \return An iterator to the end of the string.
2779 const_iterator cend() const
2781 const_iterator i(*this, 0);
2787 //! Converts the string to a UTF-8 encoded string.
2788 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2789 //! \return A string containing the UTF-8 encoded string.
2790 core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2792 core::string<uchar8_t> ret;
2793 ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2794 const_iterator iter(*this, 0);
2796 // Add the byte order mark if the user wants it.
2799 ret.append(unicode::BOM_ENCODE_UTF8[0]);
2800 ret.append(unicode::BOM_ENCODE_UTF8[1]);
2801 ret.append(unicode::BOM_ENCODE_UTF8[2]);
2804 while (!iter.atEnd())
2806 uchar32_t c = *iter;
2809 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2810 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2811 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2812 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2820 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2821 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2822 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2829 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2830 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2836 ret.append(static_cast<uchar8_t>(c));
2844 //! Converts the string to a UTF-8 encoded string array.
2845 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2846 //! \return An array containing the UTF-8 encoded string.
2847 core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2849 core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2850 const_iterator iter(*this, 0);
2852 // Add the byte order mark if the user wants it.
2855 ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
2856 ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
2857 ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
2860 while (!iter.atEnd())
2862 uchar32_t c = *iter;
2865 uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2866 uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2867 uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2868 uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2876 uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2877 uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2878 uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2885 uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2886 uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2892 ret.push_back(static_cast<uchar8_t>(c));
2901 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2902 //! Converts the string to a UTF-16 encoded string.
2903 //! \param endian The desired endianness of the string.
2904 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2905 //! \return A string containing the UTF-16 encoded string.
2906 core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2908 core::string<char16_t> ret;
2909 ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2911 // Add the BOM if specified.
2914 if (endian == unicode::EUTFEE_NATIVE)
2915 ret[0] = unicode::BOM;
2916 else if (endian == unicode::EUTFEE_LITTLE)
2918 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2919 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2920 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2924 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2925 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2926 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2931 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2933 char16_t* ptr = ret.c_str();
2934 for (u32 i = 0; i < ret.size(); ++i)
2935 *ptr++ = unicode::swapEndian16(*ptr);
2942 //! Converts the string to a UTF-16 encoded string array.
2943 //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
2944 //! \param endian The desired endianness of the string.
2945 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2946 //! \return An array containing the UTF-16 encoded string.
2947 core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2949 core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2950 uchar16_t* ptr = ret.pointer();
2952 // Add the BOM if specified.
2955 if (endian == unicode::EUTFEE_NATIVE)
2956 *ptr = unicode::BOM;
2957 else if (endian == unicode::EUTFEE_LITTLE)
2959 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2960 *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2961 *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2965 uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2966 *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2967 *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2972 memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2973 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2975 for (u32 i = 0; i <= used; ++i)
2976 ptr[i] = unicode::swapEndian16(ptr[i]);
2978 ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2984 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2985 //! Converts the string to a UTF-32 encoded string.
2986 //! \param endian The desired endianness of the string.
2987 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
2988 //! \return A string containing the UTF-32 encoded string.
2989 core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2991 core::string<char32_t> ret;
2992 ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
2993 const_iterator iter(*this, 0);
2995 // Add the BOM if specified.
2998 if (endian == unicode::EUTFEE_NATIVE)
2999 ret.append(unicode::BOM);
3008 if (endian == unicode::EUTFEE_LITTLE)
3010 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3011 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3012 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3013 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3017 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3018 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3019 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3020 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3026 while (!iter.atEnd())
3028 uchar32_t c = *iter;
3029 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3030 c = unicode::swapEndian32(c);
3039 //! Converts the string to a UTF-32 encoded string array.
3040 //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
3041 //! \param endian The desired endianness of the string.
3042 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3043 //! \return An array containing the UTF-32 encoded string.
3044 core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3046 core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3047 const_iterator iter(*this, 0);
3049 // Add the BOM if specified.
3052 if (endian == unicode::EUTFEE_NATIVE)
3053 ret.push_back(unicode::BOM);
3062 if (endian == unicode::EUTFEE_LITTLE)
3064 t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3065 t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3066 t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3067 t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3071 t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3072 t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3073 t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3074 t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3076 ret.push_back(t.full);
3081 while (!iter.atEnd())
3083 uchar32_t c = *iter;
3084 if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3085 c = unicode::swapEndian32(c);
3093 //! Converts the string to a wchar_t encoded string.
3094 /** The size of a wchar_t changes depending on the platform. This function will store a
3095 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3096 //! \param endian The desired endianness of the string.
3097 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3098 //! \return A string containing the wchar_t encoded string.
3099 core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3101 if (sizeof(wchar_t) == 4)
3103 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3104 core::stringw ret(a.pointer());
3107 else if (sizeof(wchar_t) == 2)
3109 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3111 core::stringw ret(array);
3116 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3117 core::stringw ret(a.pointer());
3121 else if (sizeof(wchar_t) == 1)
3123 core::array<uchar8_t> a(toUTF8(addBOM));
3124 core::stringw ret(a.pointer());
3128 // Shouldn't happen.
3129 return core::stringw();
3133 //! Converts the string to a wchar_t encoded string array.
3134 /** The size of a wchar_t changes depending on the platform. This function will store a
3135 correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
3136 //! \param endian The desired endianness of the string.
3137 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3138 //! \return An array containing the wchar_t encoded string.
3139 core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3141 if (sizeof(wchar_t) == 4)
3143 core::array<uchar32_t> a(toUTF32(endian, addBOM));
3144 core::array<wchar_t> ret(a.size());
3145 ret.set_used(a.size());
3146 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3149 if (sizeof(wchar_t) == 2)
3151 if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3153 core::array<wchar_t> ret(used);
3155 memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3160 core::array<uchar16_t> a(toUTF16(endian, addBOM));
3161 core::array<wchar_t> ret(a.size());
3162 ret.set_used(a.size());
3163 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3167 if (sizeof(wchar_t) == 1)
3169 core::array<uchar8_t> a(toUTF8(addBOM));
3170 core::array<wchar_t> ret(a.size());
3171 ret.set_used(a.size());
3172 memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3176 // Shouldn't happen.
3177 return core::array<wchar_t>();
3180 //! Converts the string to a properly encoded io::path string.
3181 //! \param endian The desired endianness of the string.
3182 //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
3183 //! \return An io::path string containing the properly encoded string.
3184 io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3186 #if defined(_IRR_WCHAR_FILESYSTEM)
3187 return toWCHAR_s(endian, addBOM);
3189 return toUTF8_s(addBOM);
3193 //! Loads an unknown stream of data.
3194 //! Will attempt to determine if the stream is unicode data. Useful for loading from files.
3195 //! \param data The data stream to load from.
3196 //! \param data_size The length of the data string.
3197 //! \return A reference to our current string.
3198 ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3200 // Clear our string.
3205 unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
3209 case unicode::EUTFE_UTF8:
3210 append((uchar8_t*)data, data_size);
3213 case unicode::EUTFE_UTF16:
3214 case unicode::EUTFE_UTF16_BE:
3215 case unicode::EUTFE_UTF16_LE:
3216 append((uchar16_t*)data, data_size / 2);
3219 case unicode::EUTFE_UTF32:
3220 case unicode::EUTFE_UTF32_BE:
3221 case unicode::EUTFE_UTF32_LE:
3222 append((uchar32_t*)data, data_size / 4);
3229 //! Gets the encoding of the Unicode string this class contains.
3230 //! \return An enum describing the current encoding of this string.
3231 const unicode::EUTF_ENCODE getEncoding() const
3236 //! Gets the endianness of the Unicode string this class contains.
3237 //! \return An enum describing the endianness of this string.
3238 const unicode::EUTF_ENDIAN getEndianness() const
3240 if (encoding == unicode::EUTFE_UTF16_LE ||
3241 encoding == unicode::EUTFE_UTF32_LE)
3242 return unicode::EUTFEE_LITTLE;
3243 else return unicode::EUTFEE_BIG;
3248 //! Reallocate the string, making it bigger or smaller.
3249 //! \param new_size The new size of the string.
3250 void reallocate(u32 new_size)
3252 uchar16_t* old_array = array;
3254 array = allocator.allocate(new_size + 1); //new u16[new_size];
3255 allocated = new_size + 1;
3256 if (old_array == 0) return;
3258 u32 amount = used < new_size ? used : new_size;
3259 for (u32 i=0; i<=amount; ++i)
3260 array[i] = old_array[i];
3262 if (allocated <= used)
3263 used = allocated - 1;
3267 allocator.deallocate(old_array); // delete [] old_array;
3270 //--- member variables
3273 unicode::EUTF_ENCODE encoding;
3277 //irrAllocator<uchar16_t> allocator;
3280 typedef ustring16<irrAllocator<uchar16_t> > ustring;
3283 //! Appends two ustring16s.
3284 template <typename TAlloc>
3285 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
3287 ustring16<TAlloc> ret(left);
3293 //! Appends a ustring16 and a null-terminated unicode string.
3294 template <typename TAlloc, class B>
3295 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
3297 ustring16<TAlloc> ret(left);
3303 //! Appends a ustring16 and a null-terminated unicode string.
3304 template <class B, typename TAlloc>
3305 inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
3307 ustring16<TAlloc> ret(left);
3313 //! Appends a ustring16 and an Irrlicht string.
3314 template <typename TAlloc, typename B, typename BAlloc>
3315 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
3317 ustring16<TAlloc> ret(left);
3323 //! Appends a ustring16 and an Irrlicht string.
3324 template <typename TAlloc, typename B, typename BAlloc>
3325 inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
3327 ustring16<TAlloc> ret(left);
3333 //! Appends a ustring16 and a std::basic_string.
3334 template <typename TAlloc, typename B, typename A, typename BAlloc>
3335 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3337 ustring16<TAlloc> ret(left);
3343 //! Appends a ustring16 and a std::basic_string.
3344 template <typename TAlloc, typename B, typename A, typename BAlloc>
3345 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3347 ustring16<TAlloc> ret(left);
3353 //! Appends a ustring16 and a char.
3354 template <typename TAlloc>
3355 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
3357 ustring16<TAlloc> ret(left);
3363 //! Appends a ustring16 and a char.
3364 template <typename TAlloc>
3365 inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
3367 ustring16<TAlloc> ret(left);
3373 #ifdef USTRING_CPP0X_NEWLITERALS
3374 //! Appends a ustring16 and a uchar32_t.
3375 template <typename TAlloc>
3376 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3378 ustring16<TAlloc> ret(left);
3384 //! Appends a ustring16 and a uchar32_t.
3385 template <typename TAlloc>
3386 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3388 ustring16<TAlloc> ret(left);
3395 //! Appends a ustring16 and a short.
3396 template <typename TAlloc>
3397 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
3399 ustring16<TAlloc> ret(left);
3400 ret += core::stringc(right);
3405 //! Appends a ustring16 and a short.
3406 template <typename TAlloc>
3407 inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
3409 ustring16<TAlloc> ret((core::stringc(left)));
3415 //! Appends a ustring16 and an unsigned short.
3416 template <typename TAlloc>
3417 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3419 ustring16<TAlloc> ret(left);
3420 ret += core::stringc(right);
3425 //! Appends a ustring16 and an unsigned short.
3426 template <typename TAlloc>
3427 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3429 ustring16<TAlloc> ret((core::stringc(left)));
3435 //! Appends a ustring16 and an int.
3436 template <typename TAlloc>
3437 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
3439 ustring16<TAlloc> ret(left);
3440 ret += core::stringc(right);
3445 //! Appends a ustring16 and an int.
3446 template <typename TAlloc>
3447 inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
3449 ustring16<TAlloc> ret((core::stringc(left)));
3455 //! Appends a ustring16 and an unsigned int.
3456 template <typename TAlloc>
3457 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3459 ustring16<TAlloc> ret(left);
3460 ret += core::stringc(right);
3465 //! Appends a ustring16 and an unsigned int.
3466 template <typename TAlloc>
3467 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3469 ustring16<TAlloc> ret((core::stringc(left)));
3475 //! Appends a ustring16 and a long.
3476 template <typename TAlloc>
3477 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
3479 ustring16<TAlloc> ret(left);
3480 ret += core::stringc(right);
3485 //! Appends a ustring16 and a long.
3486 template <typename TAlloc>
3487 inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
3489 ustring16<TAlloc> ret((core::stringc(left)));
3495 //! Appends a ustring16 and an unsigned long.
3496 template <typename TAlloc>
3497 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3499 ustring16<TAlloc> ret(left);
3500 ret += core::stringc(right);
3505 //! Appends a ustring16 and an unsigned long.
3506 template <typename TAlloc>
3507 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3509 ustring16<TAlloc> ret((core::stringc(left)));
3515 //! Appends a ustring16 and a float.
3516 template <typename TAlloc>
3517 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
3519 ustring16<TAlloc> ret(left);
3520 ret += core::stringc(right);
3525 //! Appends a ustring16 and a float.
3526 template <typename TAlloc>
3527 inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
3529 ustring16<TAlloc> ret((core::stringc(left)));
3535 //! Appends a ustring16 and a double.
3536 template <typename TAlloc>
3537 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
3539 ustring16<TAlloc> ret(left);
3540 ret += core::stringc(right);
3545 //! Appends a ustring16 and a double.
3546 template <typename TAlloc>
3547 inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
3549 ustring16<TAlloc> ret((core::stringc(left)));
3555 #ifdef USTRING_CPP0X
3556 //! Appends two ustring16s.
3557 template <typename TAlloc>
3558 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3560 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3561 right.insert(left, 0);
3562 return std::move(right);
3566 //! Appends two ustring16s.
3567 template <typename TAlloc>
3568 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3570 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3572 return std::move(left);
3576 //! Appends two ustring16s.
3577 template <typename TAlloc>
3578 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3580 //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3581 if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3582 (right.capacity() - right.size_raw() < left.size_raw()))
3585 return std::move(left);
3589 right.insert(left, 0);
3590 return std::move(right);
3595 //! Appends a ustring16 and a null-terminated unicode string.
3596 template <typename TAlloc, class B>
3597 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3599 //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3601 return std::move(left);
3605 //! Appends a ustring16 and a null-terminated unicode string.
3606 template <class B, typename TAlloc>
3607 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3609 //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3610 right.insert(left, 0);
3611 return std::move(right);
3615 //! Appends a ustring16 and an Irrlicht string.
3616 template <typename TAlloc, typename B, typename BAlloc>
3617 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3619 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3620 right.insert(left, 0);
3621 return std::move(right);
3625 //! Appends a ustring16 and an Irrlicht string.
3626 template <typename TAlloc, typename B, typename BAlloc>
3627 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3629 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3631 return std::move(left);
3635 //! Appends a ustring16 and a std::basic_string.
3636 template <typename TAlloc, typename B, typename A, typename BAlloc>
3637 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3639 //std::cout << "MOVE operator+(&, &&)" << std::endl;
3640 right.insert(core::ustring16<TAlloc>(left), 0);
3641 return std::move(right);
3645 //! Appends a ustring16 and a std::basic_string.
3646 template <typename TAlloc, typename B, typename A, typename BAlloc>
3647 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3649 //std::cout << "MOVE operator+(&&, &)" << std::endl;
3651 return std::move(left);
3655 //! Appends a ustring16 and a char.
3656 template <typename TAlloc>
3657 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3659 left.append((uchar32_t)right);
3660 return std::move(left);
3664 //! Appends a ustring16 and a char.
3665 template <typename TAlloc>
3666 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3668 right.insert((uchar32_t)left, 0);
3669 return std::move(right);
3673 #ifdef USTRING_CPP0X_NEWLITERALS
3674 //! Appends a ustring16 and a uchar32_t.
3675 template <typename TAlloc>
3676 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3679 return std::move(left);
3683 //! Appends a ustring16 and a uchar32_t.
3684 template <typename TAlloc>
3685 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3687 right.insert(left, 0);
3688 return std::move(right);
3693 //! Appends a ustring16 and a short.
3694 template <typename TAlloc>
3695 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3697 left.append(core::stringc(right));
3698 return std::move(left);
3702 //! Appends a ustring16 and a short.
3703 template <typename TAlloc>
3704 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3706 right.insert(core::stringc(left), 0);
3707 return std::move(right);
3711 //! Appends a ustring16 and an unsigned short.
3712 template <typename TAlloc>
3713 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3715 left.append(core::stringc(right));
3716 return std::move(left);
3720 //! Appends a ustring16 and an unsigned short.
3721 template <typename TAlloc>
3722 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3724 right.insert(core::stringc(left), 0);
3725 return std::move(right);
3729 //! Appends a ustring16 and an int.
3730 template <typename TAlloc>
3731 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3733 left.append(core::stringc(right));
3734 return std::move(left);
3738 //! Appends a ustring16 and an int.
3739 template <typename TAlloc>
3740 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3742 right.insert(core::stringc(left), 0);
3743 return std::move(right);
3747 //! Appends a ustring16 and an unsigned int.
3748 template <typename TAlloc>
3749 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3751 left.append(core::stringc(right));
3752 return std::move(left);
3756 //! Appends a ustring16 and an unsigned int.
3757 template <typename TAlloc>
3758 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3760 right.insert(core::stringc(left), 0);
3761 return std::move(right);
3765 //! Appends a ustring16 and a long.
3766 template <typename TAlloc>
3767 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3769 left.append(core::stringc(right));
3770 return std::move(left);
3774 //! Appends a ustring16 and a long.
3775 template <typename TAlloc>
3776 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3778 right.insert(core::stringc(left), 0);
3779 return std::move(right);
3783 //! Appends a ustring16 and an unsigned long.
3784 template <typename TAlloc>
3785 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3787 left.append(core::stringc(right));
3788 return std::move(left);
3792 //! Appends a ustring16 and an unsigned long.
3793 template <typename TAlloc>
3794 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3796 right.insert(core::stringc(left), 0);
3797 return std::move(right);
3801 //! Appends a ustring16 and a float.
3802 template <typename TAlloc>
3803 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3805 left.append(core::stringc(right));
3806 return std::move(left);
3810 //! Appends a ustring16 and a float.
3811 template <typename TAlloc>
3812 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3814 right.insert(core::stringc(left), 0);
3815 return std::move(right);
3819 //! Appends a ustring16 and a double.
3820 template <typename TAlloc>
3821 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3823 left.append(core::stringc(right));
3824 return std::move(left);
3828 //! Appends a ustring16 and a double.
3829 template <typename TAlloc>
3830 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3832 right.insert(core::stringc(left), 0);
3833 return std::move(right);
3838 #ifndef USTRING_NO_STL
3839 //! Writes a ustring16 to an ostream.
3840 template <typename TAlloc>
3841 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3843 out << in.toUTF8_s().c_str();
3847 //! Writes a ustring16 to a wostream.
3848 template <typename TAlloc>
3849 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3851 out << in.toWCHAR_s().c_str();
3857 #ifndef USTRING_NO_STL
3862 //! Hashing algorithm for hashing a ustring. Used for things like unordered_maps.
3863 //! Algorithm taken from std::hash<std::string>.
3864 class hash : public std::unary_function<core::ustring, size_t>
3867 size_t operator()(const core::ustring& s) const
3869 size_t ret = 2166136261U;
3871 size_t stride = 1 + s.size_raw() / 10;
3873 core::ustring::const_iterator i = s.begin();
3874 while (i != s.end())
3876 // TODO: Don't force u32 on an x64 OS. Make it agnostic.
3877 ret = 16777619U * ret ^ (size_t)s[(u32)index];
3885 } // end namespace unicode
3889 } // end namespace core
3890 } // end namespace irr