MyGUI 3.4.2
MyGUI_UString.cpp
Go to the documentation of this file.
1/*
2 * This source file is part of MyGUI. For the latest info, see http://mygui.info/
3 * Distributed under the MIT License
4 * (See accompanying file COPYING.MIT or copy at http://opensource.org/licenses/MIT)
5 */
6
7#include "MyGUI_Precompiled.h"
8#include "MyGUI_UString.h"
9
10namespace MyGUI
11{
12
13 //--------------------------------------------------------------------------
15 {
16 mString = nullptr;
17 }
18 //--------------------------------------------------------------------------
20 {
21 mIter += c;
22 }
23 //--------------------------------------------------------------------------
25 {
26 mIter -= c;
27 }
28 //--------------------------------------------------------------------------
30 {
31 mIter = i.mIter;
32 mString = i.mString;
33 }
34 //--------------------------------------------------------------------------
36 {
37 return mIter == mString->mData.begin();
38 }
39 //--------------------------------------------------------------------------
41 {
42 return mIter == mString->mData.end();
43 }
44 //--------------------------------------------------------------------------
46 {
47 return mIter - mString->mData.begin();
48 }
49 //--------------------------------------------------------------------------
51 {
52 mIter = mString->mData.begin() + index;
53 }
54 //--------------------------------------------------------------------------
56 {
57 size_type current_index = _get_index();
58 return mString->getChar( current_index );
59 }
60 //--------------------------------------------------------------------------
62 {
63 size_type current_index = _get_index();
64 int change = mString->setChar( current_index, uc );
65 _jump_to( current_index );
66 return change;
67 }
68 //--------------------------------------------------------------------------
70 {
71 _seekFwd( 1 ); // move 1 code point forward
72 if ( _test_end() ) return; // exit if we hit the end
73 if ( _utf16_surrogate_follow( mIter[0] ) ) {
74 // landing on a follow code point means we might be part of a bigger character
75 // so we test for that
77 //NB: we can't possibly be at the beginning here, so no need to test
78 lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
80 _seekFwd( 1 ); // if so, then advance 1 more code point
81 }
82 }
83 }
84 //--------------------------------------------------------------------------
86 {
87 _seekRev( 1 ); // move 1 code point backwards
88 if ( _test_begin() ) return; // exit if we hit the beginning
89 if ( _utf16_surrogate_follow( mIter[0] ) ) {
90 // landing on a follow code point means we might be part of a bigger character
91 // so we test for that
93 lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
95 _seekRev( 1 ); // if so, then rewind 1 more code point
96 }
97 }
98 }
99 //--------------------------------------------------------------------------
100 //--------------------------------------------------------------------------
101 //--------------------------------------------------------------------------
102 //--------------------------------------------------------------------------
104 //--------------------------------------------------------------------------
106 {
107 _become( i );
108 }
109 //--------------------------------------------------------------------------
111 {
112 _become( i );
113 return *this;
114 }
115 //--------------------------------------------------------------------------
117 {
118 _seekFwd( 1 );
119 return *this;
120 }
121 //--------------------------------------------------------------------------
123 {
124 _fwd_iterator tmp( *this );
125 _seekFwd( 1 );
126 return tmp;
127 }
128 //--------------------------------------------------------------------------
130 {
131 _seekRev( 1 );
132 return *this;
133 }
134 //--------------------------------------------------------------------------
136 {
137 _fwd_iterator tmp( *this );
138 _seekRev( 1 );
139 return tmp;
140 }
141 //--------------------------------------------------------------------------
143 {
144 _fwd_iterator tmp( *this );
145 if ( n < 0 )
146 tmp._seekRev( -n );
147 else
148 tmp._seekFwd( n );
149 return tmp;
150 }
151 //--------------------------------------------------------------------------
153 {
154 _fwd_iterator tmp( *this );
155 if ( n < 0 )
156 tmp._seekFwd( -n );
157 else
158 tmp._seekRev( n );
159 return tmp;
160 }
161 //--------------------------------------------------------------------------
163 {
164 if ( n < 0 )
165 _seekRev( -n );
166 else
167 _seekFwd( n );
168 return *this;
169 }
170 //--------------------------------------------------------------------------
172 {
173 if ( n < 0 )
174 _seekFwd( -n );
175 else
176 _seekRev( n );
177 return *this;
178 }
179 //--------------------------------------------------------------------------
181 {
182 return *mIter;
183 }
184 //--------------------------------------------------------------------------
186 {
187 _fwd_iterator tmp( *this );
188 tmp += n;
189 return *tmp;
190 }
191 //--------------------------------------------------------------------------
193 {
194 _moveNext();
195 return *this;
196 }
197 //--------------------------------------------------------------------------
199 {
200 _movePrev();
201 return *this;
202 }
203 //--------------------------------------------------------------------------
205 {
206 return _getCharacter();
207 }
208 //--------------------------------------------------------------------------
210 {
211 return _setCharacter( uc );
212 }
213 //--------------------------------------------------------------------------
214 //--------------------------------------------------------------------------
215 //--------------------------------------------------------------------------
216 //--------------------------------------------------------------------------
218 //--------------------------------------------------------------------------
220 {
221 _become( i );
222 }
223 //--------------------------------------------------------------------------
225 {
226 _become( i );
227 return *this;
228 }
229 //--------------------------------------------------------------------------
231 {
232 _become( i );
233 }
234 //--------------------------------------------------------------------------
236 {
237 _seekFwd( 1 );
238 return *this;
239 }
240 //--------------------------------------------------------------------------
242 {
243 _const_fwd_iterator tmp( *this );
244 _seekFwd( 1 );
245 return tmp;
246 }
247 //--------------------------------------------------------------------------
249 {
250 _seekRev( 1 );
251 return *this;
252 }
253 //--------------------------------------------------------------------------
255 {
256 _const_fwd_iterator tmp( *this );
257 _seekRev( 1 );
258 return tmp;
259 }
260 //--------------------------------------------------------------------------
262 {
263 _const_fwd_iterator tmp( *this );
264 if ( n < 0 )
265 tmp._seekRev( -n );
266 else
267 tmp._seekFwd( n );
268 return tmp;
269 }
270 //--------------------------------------------------------------------------
272 {
273 _const_fwd_iterator tmp( *this );
274 if ( n < 0 )
275 tmp._seekFwd( -n );
276 else
277 tmp._seekRev( n );
278 return tmp;
279 }
280 //--------------------------------------------------------------------------
282 {
283 if ( n < 0 )
284 _seekRev( -n );
285 else
286 _seekFwd( n );
287 return *this;
288 }
289 //--------------------------------------------------------------------------
291 {
292 if ( n < 0 )
293 _seekFwd( -n );
294 else
295 _seekRev( n );
296 return *this;
297 }
298 //--------------------------------------------------------------------------
300 {
301 return *mIter;
302 }
303 //--------------------------------------------------------------------------
305 {
306 _const_fwd_iterator tmp( *this );
307 tmp += n;
308 return *tmp;
309 }
310 //--------------------------------------------------------------------------
312 {
313 _moveNext();
314 return *this;
315 }
316 //--------------------------------------------------------------------------
318 {
319 _movePrev();
320 return *this;
321 }
322 //--------------------------------------------------------------------------
324 {
325 return _getCharacter();
326 }
327 //--------------------------------------------------------------------------
328 //--------------------------------------------------------------------------
329 //--------------------------------------------------------------------------
330 //--------------------------------------------------------------------------
332 //--------------------------------------------------------------------------
334 {
335 _become( i );
336 }
337 //--------------------------------------------------------------------------
339 {
340 _seekRev( 1 );
341 return *this;
342 }
343 //--------------------------------------------------------------------------
345 {
346 _rev_iterator tmp( *this );
347 _seekRev( 1 );
348 return tmp;
349 }
350 //--------------------------------------------------------------------------
352 {
353 _seekFwd( 1 );
354 return *this;
355 }
356 //--------------------------------------------------------------------------
358 {
359 _rev_iterator tmp( *this );
360 _seekFwd( 1 );
361 return tmp;
362 }
363 //--------------------------------------------------------------------------
365 {
366 _rev_iterator tmp( *this );
367 if ( n < 0 )
368 tmp._seekFwd( -n );
369 else
370 tmp._seekRev( n );
371 return tmp;
372 }
373 //--------------------------------------------------------------------------
375 {
376 _rev_iterator tmp( *this );
377 if ( n < 0 )
378 tmp._seekRev( -n );
379 else
380 tmp._seekFwd( n );
381 return tmp;
382 }
383 //--------------------------------------------------------------------------
385 {
386 if ( n < 0 )
387 _seekFwd( -n );
388 else
389 _seekRev( n );
390 return *this;
391 }
392 //--------------------------------------------------------------------------
394 {
395 if ( n < 0 )
396 _seekRev( -n );
397 else
398 _seekFwd( n );
399 return *this;
400 }
401 //--------------------------------------------------------------------------
403 {
404 return mIter[-1];
405 }
406 //--------------------------------------------------------------------------
408 {
409 _rev_iterator tmp( *this );
410 tmp -= n;
411 return *tmp;
412 }
413 //--------------------------------------------------------------------------
414 //--------------------------------------------------------------------------
415 //--------------------------------------------------------------------------
416 //--------------------------------------------------------------------------
418 //--------------------------------------------------------------------------
420 {
421 _become( i );
422 }
423 //--------------------------------------------------------------------------
425 {
426 _become( i );
427 }
428 //--------------------------------------------------------------------------
430 {
431 _seekRev( 1 );
432 return *this;
433 }
434 //--------------------------------------------------------------------------
436 {
437 _const_rev_iterator tmp( *this );
438 _seekRev( 1 );
439 return tmp;
440 }
441 //--------------------------------------------------------------------------
443 {
444 _seekFwd( 1 );
445 return *this;
446 }
447 //--------------------------------------------------------------------------
449 {
450 _const_rev_iterator tmp( *this );
451 _seekFwd( 1 );
452 return tmp;
453 }
454 //--------------------------------------------------------------------------
456 {
457 _const_rev_iterator tmp( *this );
458 if ( n < 0 )
459 tmp._seekFwd( -n );
460 else
461 tmp._seekRev( n );
462 return tmp;
463 }
464 //--------------------------------------------------------------------------
466 {
467 _const_rev_iterator tmp( *this );
468 if ( n < 0 )
469 tmp._seekRev( -n );
470 else
471 tmp._seekFwd( n );
472 return tmp;
473 }
474 //--------------------------------------------------------------------------
476 {
477 if ( n < 0 )
478 _seekFwd( -n );
479 else
480 _seekRev( n );
481 return *this;
482 }
483 //--------------------------------------------------------------------------
485 {
486 if ( n < 0 )
487 _seekRev( -n );
488 else
489 _seekFwd( n );
490 return *this;
491 }
492 //--------------------------------------------------------------------------
494 {
495 return mIter[-1];
496 }
497 //--------------------------------------------------------------------------
499 {
500 _const_rev_iterator tmp( *this );
501 tmp -= n;
502 return *tmp;
503 }
504 //--------------------------------------------------------------------------
505 //--------------------------------------------------------------------------
506 //--------------------------------------------------------------------------
507 //--------------------------------------------------------------------------
509 {
510 _init();
511 }
512 //--------------------------------------------------------------------------
514 {
515 _init();
516 mData = copy.mData;
517 }
518 //--------------------------------------------------------------------------
520 {
521 _init();
522 assign( length, ch );
523 }
524 //--------------------------------------------------------------------------
526 {
527 _init();
528 assign( str );
529 }
530 //--------------------------------------------------------------------------
532 {
533 _init();
534 assign( str, length );
535 }
536 //--------------------------------------------------------------------------
538 {
539 _init();
540 assign( str, index, length );
541 }
542 //--------------------------------------------------------------------------
543#if MYGUI_IS_NATIVE_WCHAR_T
544 UString::UString( const wchar_t* w_str )
545 {
546 _init();
547 assign( w_str );
548 }
549 //--------------------------------------------------------------------------
550 UString::UString( const wchar_t* w_str, size_type length )
551 {
552 _init();
553 assign( w_str, length );
554 }
555#endif
556 //--------------------------------------------------------------------------
557 UString::UString( const std::wstring& wstr )
558 {
559 _init();
560 assign( wstr );
561 }
562 //--------------------------------------------------------------------------
563 UString::UString( const char* c_str )
564 {
565 _init();
566 assign( c_str );
567 }
568 //--------------------------------------------------------------------------
570 {
571 _init();
572 assign( c_str, length );
573 }
574 //--------------------------------------------------------------------------
575 UString::UString( const std::string& str )
576 {
577 _init();
578 assign( str );
579 }
580 //--------------------------------------------------------------------------
582 {
583 _init();
584 assign( str );
585 }
586 //--------------------------------------------------------------------------
588 {
589 _cleanBuffer();
590 }
591 //--------------------------------------------------------------------------
593 {
594 return mData.size();
595 }
596 //--------------------------------------------------------------------------
598 {
599 return size();
600 }
601 //--------------------------------------------------------------------------
603 {
604 const_iterator i = begin(), ie = end();
605 size_type c = 0;
606 while ( i != ie ) {
607 i.moveNext();
608 ++c;
609 }
610 return c;
611 }
612 //--------------------------------------------------------------------------
614 {
615 return mData.max_size();
616 }
617 //--------------------------------------------------------------------------
619 {
620 mData.reserve( size );
621 }
622 //--------------------------------------------------------------------------
623 void UString::resize( size_type num, const code_point& val /*= 0 */ )
624 {
625 mData.resize( num, val );
626 }
627 //--------------------------------------------------------------------------
629 {
630 mData.swap( from.mData );
631 }
632 //--------------------------------------------------------------------------
633 bool UString::empty() const
634 {
635 return mData.empty();
636 }
637 //--------------------------------------------------------------------------
639 {
640 return mData.c_str();
641 }
642 //--------------------------------------------------------------------------
644 {
645 return c_str();
646 }
647 //--------------------------------------------------------------------------
649 {
650 return mData.capacity();
651 }
652 //--------------------------------------------------------------------------
654 {
655 mData.clear();
656 }
657 //--------------------------------------------------------------------------
658 UString UString::substr( size_type index, size_type num /*= npos */ ) const
659 {
660 // this could avoid the extra copy if we used a private specialty constructor
661 dstring data = mData.substr( index, num );
662 UString tmp;
663 tmp.mData.swap( data );
664 return tmp;
665 }
666 //--------------------------------------------------------------------------
668 {
669 code_point cp[2];
670 size_t c = _utf32_to_utf16( val, cp );
671 if ( c > 0 ) push_back( cp[0] );
672 if ( c > 1 ) push_back( cp[1] );
673 }
674 //--------------------------------------------------------------------------
675#if MYGUI_IS_NATIVE_WCHAR_T
676 void UString::push_back( wchar_t val )
677 {
678 // we do this because the Unicode method still preserves UTF-16 code points
679 mData.push_back( static_cast<code_point>( val ) );
680 }
681#endif
682 //--------------------------------------------------------------------------
684 {
685 mData.push_back( val );
686 }
687
689 {
690 mData.push_back( static_cast<code_point>( val ) );
691 }
692
694 {
695 const_iterator i, ie = end();
696 for ( i = begin(); i != ie; i.moveNext() ) {
697 if ( i.getCharacter() == ch )
698 return true;
699 }
700 return false;
701 }
702
703 const std::string& UString::asUTF8() const
704 {
705 _load_buffer_UTF8();
706 return *m_buffer.mStrBuffer;
707 }
708
709 const char* UString::asUTF8_c_str() const
710 {
711 _load_buffer_UTF8();
712 return m_buffer.mStrBuffer->c_str();
713 }
714
716 {
717 _load_buffer_UTF32();
718 return *m_buffer.mUTF32StrBuffer;
719 }
720
722 {
723 _load_buffer_UTF32();
724 return m_buffer.mUTF32StrBuffer->c_str();
725 }
726
727 const std::wstring& UString::asWStr() const
728 {
729 _load_buffer_WStr();
730 return *m_buffer.mWStrBuffer;
731 }
732
733 const wchar_t* UString::asWStr_c_str() const
734 {
735 _load_buffer_WStr();
736 return m_buffer.mWStrBuffer->c_str();
737 }
738
740 {
741 return mData.at( loc );
742 }
743
745 {
746 return mData.at( loc );
747 }
748
750 {
751 const code_point* ptr = c_str();
753 size_t l = _utf16_char_length( ptr[loc] );
754 code_point cp[2] = { /* blame the code beautifier */
755 0, 0
756 };
757 cp[0] = ptr[loc];
758
759 if ( l == 2 && ( loc + 1 ) < mData.length() ) {
760 cp[1] = ptr[loc+1];
761 }
763 return uc;
764 }
765
767 {
768 code_point cp[2] = { /* blame the code beautifier */
769 0, 0
770 };
771 size_t l = _utf32_to_utf16( ch, cp );
774 size_t newSize = _utf16_char_length( ch );
775
776 if ( newSize > existingSize ) {
777 at( loc ) = cp[0];
778 insert( loc + 1, 1, cp[1] );
779 return 1;
780 }
781 if ( newSize < existingSize ) {
782 erase( loc, 1 );
783 at( loc ) = cp[0];
784 return -1;
785 }
786
787 // newSize == existingSize
788 at( loc ) = cp[0];
789 if ( l == 2 ) at( loc + 1 ) = cp[1];
790 return 0;
791 }
792
794 {
795 iterator i;
796 i.mIter = mData.begin();
797 i.mString = this;
798 return i;
799 }
800
802 {
804 i.mIter = const_cast<UString*>( this )->mData.begin();
805 i.mString = const_cast<UString*>( this );
806 return i;
807 }
808
810 {
811 iterator i;
812 i.mIter = mData.end();
813 i.mString = this;
814 return i;
815 }
816
818 {
820 i.mIter = const_cast<UString*>( this )->mData.end();
821 i.mString = const_cast<UString*>( this );
822 return i;
823 }
824
826 {
828 i.mIter = mData.end();
829 i.mString = this;
830 return i;
831 }
832
834 {
836 i.mIter = const_cast<UString*>( this )->mData.end();
837 i.mString = const_cast<UString*>( this );
838 return i;
839 }
840
842 {
844 i.mIter = mData.begin();
845 i.mString = this;
846 return i;
847 }
848
850 {
852 i.mIter = const_cast<UString*>( this )->mData.begin();
853 i.mString = const_cast<UString*>( this );
854 return i;
855 }
856
858 {
859 mData.assign( start.mIter, end.mIter );
860 return *this;
861 }
862
864 {
865 mData.assign( str.mData );
866 return *this;
867 }
868
870 {
871 mData.assign( str );
872 return *this;
873 }
874
876 {
877 mData.assign( str, num );
878 return *this;
879 }
880
882 {
883 mData.assign( str.mData, index, len );
884 return *this;
885 }
886
888 {
889 mData.assign( num, ch );
890 return *this;
891 }
892
893 UString& UString::assign( const std::wstring& wstr )
894 {
895 mData.clear();
896 mData.reserve( wstr.length() ); // best guess bulk allocate
897#ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
899 std::wstring::const_iterator i, ie = wstr.end();
900 for ( i = wstr.begin(); i != ie; i++ ) {
901 tmp = static_cast<code_point>( *i );
902 mData.push_back( tmp );
903 }
904#else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
905 code_point cp[3] = {0, 0, 0};
907 std::wstring::const_iterator i, ie = wstr.end();
908 for ( i = wstr.begin(); i != ie; i++ ) {
909 tmp = static_cast<unicode_char>( *i );
910 size_t l = _utf32_to_utf16( tmp, cp );
911 if ( l > 0 ) mData.push_back( cp[0] );
912 if ( l > 1 ) mData.push_back( cp[1] );
913 }
914#endif
915 return *this;
916 }
917
918#if MYGUI_IS_NATIVE_WCHAR_T
919 UString& UString::assign( const wchar_t* w_str )
920 {
921 std::wstring tmp;
922 tmp.assign( w_str );
923 return assign( tmp );
924 }
925
926 UString& UString::assign( const wchar_t* w_str, size_type num )
927 {
928 std::wstring tmp;
929 tmp.assign( w_str, num );
930 return assign( tmp );
931 }
932#endif
933
934 UString& UString::assign( const std::string& str )
935 {
936 return assign(str.data(), str.size());
937 }
938
940 {
941 for (const auto& character : str)
942 {
944 }
945 return *this;
946 }
947
949 {
950 return assign( c_str, std::strlen(c_str) );
951 }
952
954 {
956 clear(); // empty our contents, if there are any
957 reserve( len ); // best guess bulk capacity growth
958
959 // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
960 // then converting it to UTF-16, then finally appending the data buffer
961
962 unicode_char uc; // temporary Unicode character buffer
963 unsigned char utf8buf[7]; // temporary UTF-8 buffer
964 utf8buf[6] = 0;
965 size_t utf8len; // UTF-8 length
966 code_point utf16buff[3]; // temporary UTF-16 buffer
967 utf16buff[2] = 0;
968 size_t utf16len; // UTF-16 length
969
970 for ( size_type i = 0; i < num; ++i ) {
971 utf8len = std::min( _utf8_char_length( static_cast<unsigned char>( c_str[i] ) ), num - i ); // estimate bytes to load
972 for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes
973 utf8buf[j] = ( static_cast<unsigned char>( c_str[i + j] ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
974 }
975 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
976 utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
977 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
978
979 utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
980 append( utf16buff, utf16len ); // append the characters to the string
981 }
982 return *this;
983 }
984
986 {
987 mData.append( str.mData );
988 return *this;
989 }
990
992 {
993 mData.append( str );
994 return *this;
995 }
996
998 {
999 mData.append( str.mData, index, len );
1000 return *this;
1001 }
1002
1004 {
1005 mData.append( str, num );
1006 return *this;
1007 }
1008
1010 {
1011 mData.append( num, ch );
1012 return *this;
1013 }
1014
1016 {
1017 mData.append( start.mIter, end.mIter );
1018 return *this;
1019 }
1020
1021#if MYGUI_IS_NATIVE_WCHAR_T
1022 UString& UString::append( const wchar_t* w_str, size_type num )
1023 {
1024 std::wstring tmp( w_str, num );
1025 return append( tmp );
1026 }
1027
1028 UString& UString::append( size_type num, wchar_t ch )
1029 {
1030 return append( num, static_cast<unicode_char>( ch ) );
1031 }
1032#endif
1034 {
1035 UString tmp( c_str, num );
1036 append( tmp );
1037 return *this;
1038 }
1039
1041 {
1042 append( num, static_cast<code_point>( ch ) );
1043 return *this;
1044 }
1045
1047 {
1048 code_point cp[2] = {0, 0};
1049 if ( _utf32_to_utf16( ch, cp ) == 2 ) {
1050 for ( size_type i = 0; i < num; i++ ) {
1051 append( 1, cp[0] );
1052 append( 1, cp[1] );
1053 }
1054 } else {
1055 for ( size_type i = 0; i < num; i++ ) {
1056 append( 1, cp[0] );
1057 }
1058 }
1059 return *this;
1060 }
1061
1063 {
1064 iterator ret;
1065 ret.mIter = mData.insert( i.mIter, ch );
1066 ret.mString = this;
1067 return ret;
1068 }
1069
1071 {
1072 mData.insert( index, str.mData );
1073 return *this;
1074 }
1075
1077 {
1078 mData.insert( index1, str.mData, index2, num );
1079 return *this;
1080 }
1081
1083 {
1084 mData.insert( i.mIter, start.mIter, end.mIter );
1085 }
1086
1088 {
1089 mData.insert( index, str, num );
1090 return *this;
1091 }
1092
1093#if MYGUI_IS_NATIVE_WCHAR_T
1094 UString& UString::insert( size_type index, const wchar_t* w_str, size_type num )
1095 {
1096 UString tmp( w_str, num );
1097 insert( index, tmp );
1098 return *this;
1099 }
1100#endif
1101
1103 {
1104 UString tmp( c_str, num );
1105 insert( index, tmp );
1106 return *this;
1107 }
1108
1110 {
1111 mData.insert( index, num, ch );
1112 return *this;
1113 }
1114
1115#if MYGUI_IS_NATIVE_WCHAR_T
1116 UString& UString::insert( size_type index, size_type num, wchar_t ch )
1117 {
1118 insert( index, num, static_cast<unicode_char>( ch ) );
1119 return *this;
1120 }
1121#endif
1122
1124 {
1125 insert( index, num, static_cast<code_point>( ch ) );
1126 return *this;
1127 }
1128
1130 {
1131 code_point cp[3] = {0, 0, 0};
1132 size_t l = _utf32_to_utf16( ch, cp );
1133 if ( l == 1 ) {
1134 return insert( index, num, cp[0] );
1135 }
1136 for ( size_type c = 0; c < num; c++ ) {
1137 // insert in reverse order to preserve ordering after insert
1138 insert( index, 1, cp[1] );
1139 insert( index, 1, cp[0] );
1140 }
1141 return *this;
1142 }
1143
1145 {
1146 mData.insert( i.mIter, num, ch );
1147 }
1148#if MYGUI_IS_NATIVE_WCHAR_T
1149 void UString::insert( iterator i, size_type num, const wchar_t& ch )
1150 {
1151 insert( i, num, static_cast<unicode_char>( ch ) );
1152 }
1153#endif
1154
1156 {
1157 insert( i, num, static_cast<code_point>( ch ) );
1158 }
1159
1161 {
1162 code_point cp[3] = {0, 0, 0};
1163 size_t l = _utf32_to_utf16( ch, cp );
1164 if ( l == 1 ) {
1165 insert( i, num, cp[0] );
1166 } else {
1167 for ( size_type c = 0; c < num; c++ ) {
1168 // insert in reverse order to preserve ordering after insert
1169 insert( i, 1, cp[1] );
1170 insert( i, 1, cp[0] );
1171 }
1172 }
1173 }
1174
1176 {
1177 iterator ret;
1178 ret.mIter = mData.erase( loc.mIter );
1179 ret.mString = this;
1180 return ret;
1181 }
1182
1184 {
1185 iterator ret;
1186 ret.mIter = mData.erase( start.mIter, end.mIter );
1187 ret.mString = this;
1188 return ret;
1189 }
1190
1191 UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ )
1192 {
1193 if ( num == npos )
1194 mData.erase( index );
1195 else
1196 mData.erase( index, num );
1197 return *this;
1198 }
1199
1201 {
1202 mData.replace( index1, num1, str.mData, 0, npos );
1203 return *this;
1204 }
1205
1207 {
1208 mData.replace( index1, num1, str.mData, 0, num2 );
1209 return *this;
1210 }
1211
1213 {
1214 mData.replace( index1, num1, str.mData, index2, num2 );
1215 return *this;
1216 }
1217
1219 {
1220 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1221
1222 size_type index1 = begin() - st;
1223 size_type num1 = end - st;
1224 return replace( index1, num1, str, 0, num );
1225 }
1226
1228 {
1229 mData.replace( index, num1, num2, ch );
1230 return *this;
1231 }
1232
1234 {
1235 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1236
1237 size_type index1 = begin() - st;
1238 size_type num1 = end - st;
1239 return replace( index1, num1, num, ch );
1240 }
1241
1242 int UString::compare( const UString& str ) const
1243 {
1244 return mData.compare( str.mData );
1245 }
1246
1247 int UString::compare( const code_point* str ) const
1248 {
1249 return mData.compare( str );
1250 }
1251
1253 {
1254 return mData.compare( index, length, str.mData );
1255 }
1256
1258 {
1259 return mData.compare( index, length, str.mData, index2, length2 );
1260 }
1261
1263 {
1264 return mData.compare( index, length, str, length2 );
1265 }
1266
1267#if MYGUI_IS_NATIVE_WCHAR_T
1268 int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
1269 {
1271 return compare( index, length, tmp );
1272 }
1273#endif
1274
1276 {
1278 return compare( index, length, tmp );
1279 }
1280
1281 UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const
1282 {
1283 return mData.find( str.c_str(), index );
1284 }
1285
1287 {
1288 UString tmp( cp_str );
1289 return mData.find( tmp.c_str(), index, length );
1290 }
1291
1293 {
1294 UString tmp( c_str );
1295 return mData.find( tmp.c_str(), index, length );
1296 }
1297
1298#if MYGUI_IS_NATIVE_WCHAR_T
1299 UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const
1300 {
1301 UString tmp( w_str );
1302 return mData.find( tmp.c_str(), index, length );
1303 }
1304#endif
1305
1306 UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const
1307 {
1308 return find( static_cast<code_point>( ch ), index );
1309 }
1310
1312 {
1313 return mData.find( ch, index );
1314 }
1315
1316#if MYGUI_IS_NATIVE_WCHAR_T
1317 UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const
1318 {
1319 return find( static_cast<unicode_char>( ch ), index );
1320 }
1321#endif
1322
1324 {
1325 code_point cp[3] = {0, 0, 0};
1326 size_t l = _utf32_to_utf16( ch, cp );
1327 return find( UString( cp, l ), index );
1328 }
1329
1331 {
1332 return mData.rfind( str.c_str(), index );
1333 }
1334
1336 {
1337 UString tmp( cp_str );
1338 return mData.rfind( tmp.c_str(), index, num );
1339 }
1340
1342 {
1343 UString tmp( c_str );
1344 return mData.rfind( tmp.c_str(), index, num );
1345 }
1346
1347#if MYGUI_IS_NATIVE_WCHAR_T
1348 UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const
1349 {
1350 UString tmp( w_str );
1351 return mData.rfind( tmp.c_str(), index, num );
1352 }
1353#endif
1354
1355 UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const
1356 {
1357 return rfind( static_cast<code_point>( ch ), index );
1358 }
1359
1361 {
1362 return mData.rfind( ch, index );
1363 }
1364
1365#if MYGUI_IS_NATIVE_WCHAR_T
1366 UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const
1367 {
1368 return rfind( static_cast<unicode_char>( ch ), index );
1369 }
1370#endif
1371
1373 {
1374 code_point cp[3] = {0, 0, 0};
1375 size_t l = _utf32_to_utf16( ch, cp );
1376 return rfind( UString( cp, l ), index );
1377 }
1378
1379 UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const
1380 {
1381 size_type i = 0;
1382 const size_type len = length();
1383 while ( i < num && ( index + i ) < len ) {
1384 unicode_char ch = getChar( index + i );
1385 if ( str.inString( ch ) )
1386 return index + i;
1387 i += _utf16_char_length( ch ); // increment by the Unicode character length
1388 }
1389 return npos;
1390 }
1391
1393 {
1394 UString tmp;
1395 tmp.assign( 1, ch );
1396 return find_first_of( tmp, index );
1397 }
1398
1400 {
1401 return find_first_of( static_cast<code_point>( ch ), index );
1402 }
1403
1404#if MYGUI_IS_NATIVE_WCHAR_T
1405 UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const
1406 {
1407 return find_first_of( static_cast<unicode_char>( ch ), index );
1408 }
1409#endif
1410
1412 {
1413 code_point cp[3] = {0, 0, 0};
1414 size_t l = _utf32_to_utf16( ch, cp );
1415 return find_first_of( UString( cp, l ), index );
1416 }
1417
1419 {
1420 size_type i = 0;
1421 const size_type len = length();
1422 while ( i < num && ( index + i ) < len ) {
1423 unicode_char ch = getChar( index + i );
1424 if ( !str.inString( ch ) )
1425 return index + i;
1426 i += _utf16_char_length( ch ); // increment by the Unicode character length
1427 }
1428 return npos;
1429 }
1430
1432 {
1433 UString tmp;
1434 tmp.assign( 1, ch );
1435 return find_first_not_of( tmp, index );
1436 }
1437
1439 {
1440 return find_first_not_of( static_cast<code_point>( ch ), index );
1441 }
1442
1443#if MYGUI_IS_NATIVE_WCHAR_T
1444 UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const
1445 {
1446 return find_first_not_of( static_cast<unicode_char>( ch ), index );
1447 }
1448#endif
1449
1451 {
1452 code_point cp[3] = {0, 0, 0};
1453 size_t l = _utf32_to_utf16( ch, cp );
1454 return find_first_not_of( UString( cp, l ), index );
1455 }
1456
1457 UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1458 {
1459 size_type i = 0;
1460 const size_type len = length();
1461 if ( index > len ) index = len - 1;
1462
1463 while ( i < num && ( index - i ) != npos ) {
1464 size_type j = index - i;
1465 // careful to step full Unicode characters
1466 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1467 j = index - ++i;
1468 }
1469 // and back to the usual dull test
1470 unicode_char ch = getChar( j );
1471 if ( str.inString( ch ) )
1472 return j;
1473 i++;
1474 }
1475 return npos;
1476 }
1477
1479 {
1480 UString tmp;
1481 tmp.assign( 1, ch );
1482 return find_last_of( tmp, index );
1483 }
1484
1485#if MYGUI_IS_NATIVE_WCHAR_T
1486 UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const
1487 {
1488 return find_last_of( static_cast<unicode_char>( ch ), index );
1489 }
1490#endif
1491
1493 {
1494 code_point cp[3] = {0, 0, 0};
1495 size_t l = _utf32_to_utf16( ch, cp );
1496 return find_last_of( UString( cp, l ), index );
1497 }
1498
1499 UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1500 {
1501 size_type i = 0;
1502 const size_type len = length();
1503 if ( index > len ) index = len - 1;
1504
1505 while ( i < num && ( index - i ) != npos ) {
1506 size_type j = index - i;
1507 // careful to step full Unicode characters
1508 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1509 j = index - ++i;
1510 }
1511 // and back to the usual dull test
1512 unicode_char ch = getChar( j );
1513 if ( !str.inString( ch ) )
1514 return j;
1515 i++;
1516 }
1517 return npos;
1518 }
1519
1521 {
1522 UString tmp;
1523 tmp.assign( 1, ch );
1524 return find_last_not_of( tmp, index );
1525 }
1526
1528 {
1529 return find_last_not_of( static_cast<code_point>( ch ), index );
1530 }
1531
1532#if MYGUI_IS_NATIVE_WCHAR_T
1533 UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const
1534 {
1535 return find_last_not_of( static_cast<unicode_char>( ch ), index );
1536 }
1537#endif
1538
1540 {
1541 code_point cp[3] = {0, 0, 0};
1542 size_t l = _utf32_to_utf16( ch, cp );
1543 return find_last_not_of( UString( cp, l ), index );
1544 }
1545
1546 bool UString::operator<( const UString& right ) const
1547 {
1548 return compare( right ) < 0;
1549 }
1550
1551 bool UString::operator<=( const UString& right ) const
1552 {
1553 return compare( right ) <= 0;
1554 }
1555
1557 {
1558 return assign( s );
1559 }
1560
1562 {
1563 clear();
1564 return append( 1, ch );
1565 }
1566
1568 {
1569 clear();
1570 return append( 1, ch );
1571 }
1572
1573#if MYGUI_IS_NATIVE_WCHAR_T
1574 UString& UString::operator=( wchar_t ch )
1575 {
1576 clear();
1577 return append( 1, ch );
1578 }
1579#endif
1580
1582 {
1583 clear();
1584 return append( 1, ch );
1585 }
1586
1587 bool UString::operator>( const UString& right ) const
1588 {
1589 return compare( right ) > 0;
1590 }
1591
1592 bool UString::operator>=( const UString& right ) const
1593 {
1594 return compare( right ) >= 0;
1595 }
1596
1597 bool UString::operator==( const UString& right ) const
1598 {
1599 return compare( right ) == 0;
1600 }
1601
1602 bool UString::operator!=( const UString& right ) const
1603 {
1604 return !operator==( right );
1605 }
1606
1608 {
1609 return at( index );
1610 }
1611
1613 {
1614 return at( index );
1615 }
1616
1617 UString::operator std::string() const
1618 {
1619 return std::string( asUTF8() );
1620 }
1621
1623 UString::operator std::wstring() const
1624 {
1625 return std::wstring( asWStr() );
1626 }
1627
1628
1630 {
1631 if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
1632 return false; // it matches a surrogate pair signature
1633 return true; // everything else is a standalone code point
1634 }
1635
1637 {
1638 if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
1639 return true; // it is a 1st word
1640 return false; // it isn't
1641 }
1642
1644 {
1645 if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
1646 return true; // it is a 2nd word
1647 return false; // everything else isn't
1648 }
1649
1651 {
1652 if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
1653 return 2; // if it is, then we are 2 words long
1654 return 1; // otherwise we are only 1 word long
1655 }
1656
1658 {
1659 if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
1660 return 2; // if so, we need a surrogate pair
1661 return 1; // otherwise we can stuff it into a single word
1662 }
1663
1665 {
1666 const code_point& cp1 = in_cp[0];
1667 const code_point& cp2 = in_cp[1];
1668 bool wordPair = false;
1669
1670 // does it look like a surrogate pair?
1671 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1672 // looks like one, but does the other half match the algorithm as well?
1673 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1674 wordPair = true; // yep!
1675 }
1676
1677 if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value
1678 out_uc = cp1;
1679 return 1;
1680 }
1681
1682 unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
1683 cU -= 0xD800; // remove the encoding markers
1684 cL -= 0xDC00;
1685
1686 out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
1687 out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
1688 out_uc += 0x10000; // add back in the value offset
1689
1690 return 2; // this whole operation takes to words, so that's what we'll return
1691 }
1692
1694 {
1695 if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them
1696 out_cp[0] = static_cast<code_point>(in_uc);
1697 return 1;
1698 }
1699 unicode_char uc = in_uc; // copy to writable buffer
1700 unsigned short tmp; // single code point buffer
1701 uc -= 0x10000; // subtract value offset
1702
1703 //process upper word
1704 tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits
1705 tmp += 0xD800; // add encoding offset
1706 out_cp[0] = tmp; // write
1707
1708 // process lower word
1709 tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
1710 tmp += 0xDC00; // add encoding offset
1711 out_cp[1] = tmp; // write
1712
1713 return 2; // return used word count (2 for surrogate pairs)
1714 }
1715
1716 bool UString::_utf8_start_char( unsigned char cp )
1717 {
1718 return ( cp & ~_cont_mask ) != _cont;
1719 }
1720
1721 size_t UString::_utf8_char_length( unsigned char cp )
1722 {
1723 if ( !( cp & 0x80 ) ) return 1;
1724 if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
1725 if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
1726 if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
1727 if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
1728 if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
1729
1730 return 1;
1731 //throw invalid_data( "invalid UTF-8 sequence header value" );
1732 }
1733
1735 {
1736 /*
1737 7 bit: U-00000000 - U-0000007F: 0xxxxxxx
1738 11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
1739 16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
1740 21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1741 26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1742 31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1743 */
1744 if ( !( uc & ~0x0000007F ) ) return 1;
1745 if ( !( uc & ~0x000007FF ) ) return 2;
1746 if ( !( uc & ~0x0000FFFF ) ) return 3;
1747 if ( !( uc & ~0x001FFFFF ) ) return 4;
1748 if ( !( uc & ~0x03FFFFFF ) ) return 5;
1749 if ( !( uc & ~0x7FFFFFFF ) ) return 6;
1750
1751 return 1;
1752 //throw invalid_data( "invalid UTF-32 value" );
1753 }
1754
1755 size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
1756 {
1757 size_t len = _utf8_char_length( in_cp[0] );
1758 if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit
1759 out_uc = in_cp[0];
1760 return 1;
1761 }
1762
1763 unicode_char c = 0; // temporary buffer
1764 size_t i = 0;
1765 switch ( len ) { // load header byte
1766 case 6:
1767 c = in_cp[i] & _lead5_mask;
1768 break;
1769 case 5:
1770 c = in_cp[i] & _lead4_mask;
1771 break;
1772 case 4:
1773 c = in_cp[i] & _lead3_mask;
1774 break;
1775 case 3:
1776 c = in_cp[i] & _lead2_mask;
1777 break;
1778 case 2:
1779 c = in_cp[i] & _lead1_mask;
1780 break;
1781 }
1782
1783 // load each continuation byte
1784 for ( ++i; i < len; i++ )
1785 {
1786 if (( in_cp[i] & ~_cont_mask ) != _cont )
1787 {
1788 //throw invalid_data( "bad UTF-8 continuation byte" );
1789 out_uc = in_cp[0];
1790 return 1;
1791 }
1792 c <<= 6;
1793 c |= ( in_cp[i] & _cont_mask );
1794 }
1795
1796 out_uc = c; // write the final value and return the used byte length
1797 return len;
1798 }
1799
1800 size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
1801 {
1802 size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
1803 unicode_char c = in_uc; // copy to temp buffer
1804
1805 //stuff all of the lower bits
1806 for ( size_t i = len - 1; i > 0; i-- ) {
1807 out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1808 c >>= 6;
1809 }
1810
1811 //now write the header byte
1812 switch ( len ) {
1813 case 6:
1814 out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1815 break;
1816 case 5:
1817 out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1818 break;
1819 case 4:
1820 out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1821 break;
1822 case 3:
1823 out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1824 break;
1825 case 2:
1826 out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1827 break;
1828 case 1:
1829 default:
1830 out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
1831 break;
1832 }
1833
1834 // return the byte length of the sequence
1835 return len;
1836 }
1837
1839 {
1840 std::string tmp( reinterpret_cast<const char*>( c_str ) );
1841 return _verifyUTF8( tmp );
1842 }
1843
1845 {
1846 return _verifyUTF8(str.data(), str.size());
1847 }
1848
1850 {
1851 size_type length = 0;
1852
1853 for ( size_type i = 0; i < num; ++i ) {
1854 // characters pass until we find an extended sequence
1855 if (c_str[i] & 0x80 ) {
1856 if ( i + 1 >= num ) // invalid extended sequence
1857 return num;
1858
1859 unsigned char c = c_str[i];
1860 size_t contBytes = 0;
1861
1862 // get continuation byte count and test for overlong sequences
1863 if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte
1864 if ( c == _lead1 )
1865 {
1866 //throw invalid_data( "overlong UTF-8 sequence" );
1867 return num;
1868 }
1869 contBytes = 1;
1870
1871 } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes
1872 contBytes = 2;
1873 if ( c == _lead2 ) { // possible overlong UTF-8 sequence
1874 c = c_str[i + 1]; // look ahead to next byte in sequence
1875 if (( c & _lead2 ) == _cont )
1876 {
1877 //throw invalid_data( "overlong UTF-8 sequence" );
1878 return num;
1879 }
1880 }
1881
1882 } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes
1883 contBytes = 3;
1884 if ( c == _lead3 ) { // possible overlong UTF-8 sequence
1885 c = c_str[i + 1]; // look ahead to next byte in sequence
1886 if (( c & _lead3 ) == _cont )
1887 {
1888 //throw invalid_data( "overlong UTF-8 sequence" );
1889 return num;
1890 }
1891 }
1892
1893 } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes
1894 contBytes = 4;
1895 if ( c == _lead4 ) { // possible overlong UTF-8 sequence
1896 c = c_str[i + 1]; // look ahead to next byte in sequence
1897 if (( c & _lead4 ) == _cont )
1898 {
1899 //throw invalid_data( "overlong UTF-8 sequence" );
1900 return num;
1901 }
1902 }
1903
1904 } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes
1905 contBytes = 5;
1906 if ( c == _lead5 ) { // possible overlong UTF-8 sequence
1907 c = c_str[i + 1]; // look ahead to next byte in sequence
1908 if (( c & _lead5 ) == _cont )
1909 {
1910 //throw invalid_data( "overlong UTF-8 sequence" );
1911 return num;
1912 }
1913 }
1914 }
1915 if ( i + contBytes >= num ) // invalid extended sequence
1916 return num;
1917 // check remaining continuation bytes for
1918 while ( contBytes-- ) {
1919 c = c_str[++i]; // get next byte in sequence
1920 if (( c & ~_cont_mask ) != _cont )
1921 {
1922 //throw invalid_data( "bad UTF-8 continuation byte" );
1923 return num;
1924 }
1925 }
1926 }
1927 length++;
1928 }
1929 return length;
1930 }
1931
1932 void UString::_init()
1933 {
1934 m_buffer.mVoidBuffer = nullptr;
1935 m_bufferType = bt_none;
1936 m_bufferSize = 0;
1937 }
1938
1939 void UString::_cleanBuffer() const
1940 {
1941 if ( m_buffer.mVoidBuffer != nullptr ) {
1942 switch ( m_bufferType ) {
1943 case bt_string:
1944 delete m_buffer.mStrBuffer;
1945 break;
1946 case bt_wstring:
1947 delete m_buffer.mWStrBuffer;
1948 break;
1949 case bt_utf32string:
1950 delete m_buffer.mUTF32StrBuffer;
1951 break;
1952 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
1953 //delete m_buffer.mVoidBuffer;
1954 // delete void* is undefined, don't do that
1955 assert("This should never happen - mVoidBuffer should never contain something if we "
1956 "don't know the type");
1957 break;
1958 }
1959 m_buffer.mVoidBuffer = nullptr;
1960 m_bufferSize = 0;
1961 m_bufferType = bt_none;
1962 }
1963 }
1964
1965 void UString::_getBufferStr() const
1966 {
1967 if ( m_bufferType != bt_string ) {
1968 _cleanBuffer();
1969 m_buffer.mStrBuffer = new std::string();
1970 m_bufferType = bt_string;
1971 }
1972 m_buffer.mStrBuffer->clear();
1973 }
1974
1975 void UString::_getBufferWStr() const
1976 {
1977 if ( m_bufferType != bt_wstring ) {
1978 _cleanBuffer();
1979 m_buffer.mWStrBuffer = new std::wstring();
1980 m_bufferType = bt_wstring;
1981 }
1982 m_buffer.mWStrBuffer->clear();
1983 }
1984
1985 void UString::_getBufferUTF32Str() const
1986 {
1987 if ( m_bufferType != bt_utf32string ) {
1988 _cleanBuffer();
1989 m_buffer.mUTF32StrBuffer = new utf32string();
1990 m_bufferType = bt_utf32string;
1991 }
1992 m_buffer.mUTF32StrBuffer->clear();
1993 }
1994
1995 void UString::_load_buffer_UTF8() const
1996 {
1997 _getBufferStr();
1998 std::string& buffer = ( *m_buffer.mStrBuffer );
1999 buffer.reserve( length() );
2000
2001 unsigned char utf8buf[6];
2002 char* charbuf = ( char* )utf8buf;
2003 unicode_char c;
2004 size_t len;
2005
2006 const_iterator i, ie = end();
2007 for ( i = begin(); i != ie; i.moveNext() ) {
2008 c = i.getCharacter();
2009 len = _utf32_to_utf8( c, utf8buf );
2010 size_t j = 0;
2011 while ( j < len )
2012 buffer.push_back( charbuf[j++] );
2013 }
2014 }
2015
2016 void UString::_load_buffer_WStr() const
2017 {
2018 _getBufferWStr();
2019 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
2020 buffer.reserve( length() ); // may over reserve, but should be close enough
2021#ifdef WCHAR_UTF16 // wchar_t matches UTF-16
2022 const_iterator i, ie = end();
2023 for ( i = begin(); i != ie; ++i ) {
2024 buffer.push_back(( wchar_t )( *i ) );
2025 }
2026#else // wchar_t fits UTF-32
2027 unicode_char c;
2028 const_iterator i, ie = end();
2029 for ( i = begin(); i != ie; i.moveNext() ) {
2030 c = i.getCharacter();
2031 buffer.push_back(( wchar_t )c );
2032 }
2033#endif
2034 }
2035
2036 void UString::_load_buffer_UTF32() const
2037 {
2038 _getBufferUTF32Str();
2039 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2040 buffer.reserve( length() ); // may over reserve, but should be close enough
2041
2042 unicode_char c;
2043
2044 const_iterator i, ie = end();
2045 for ( i = begin(); i != ie; i.moveNext() ) {
2046 c = i.getCharacter();
2047 buffer.push_back( c );
2048 }
2049 }
2050
2051} // namespace MyGUI
base iterator class for UString
int _setCharacter(unicode_char uc)
void _become(const _base_iterator &i)
void _jump_to(size_type index)
unicode_char _getCharacter() const
const forward iterator for UString
friend size_type operator-(const _const_fwd_iterator &left, const _const_fwd_iterator &right)
difference operator
_const_fwd_iterator & operator=(const _const_fwd_iterator &i)
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_fwd_iterator & operator++()
pre-increment
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_fwd_iterator & operator--()
pre-decrement
_const_fwd_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
const value_type & operator*() const
dereference operator
const reverse iterator for UString
_const_rev_iterator operator+(difference_type n)
addition operator
_const_rev_iterator & operator++()
pre-increment
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_rev_iterator & operator--()
pre-decrement
friend size_type operator-(const _const_rev_iterator &left, const _const_rev_iterator &right)
difference operator
const value_type & operator*() const
dereference operator
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
forward iterator for UString
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
_fwd_iterator & operator++()
pre-increment
_fwd_iterator operator-(difference_type n)
subtraction operator
_fwd_iterator & operator=(const _fwd_iterator &i)
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator operator+(difference_type n)
addition operator
value_type & operator*() const
dereference operator
_fwd_iterator & operator--()
pre-decrement
value_type & operator[](difference_type n) const
dereference at offset operator
forward iterator for UString
_rev_iterator & operator+=(difference_type n)
addition assignment operator
_rev_iterator & operator--()
pre-decrement
value_type & operator*() const
dereference operator
_rev_iterator & operator++()
pre-increment
_rev_iterator operator-(difference_type n)
subtraction operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
value_type & operator[](difference_type n) const
dereference at offset operator
_rev_iterator operator+(difference_type n)
addition operator
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
size_type length() const
Returns the number of code points in the current string.
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array
bool operator>(const UString &right) const
greater than operator
size_type size() const
Returns the number of code points in the current string.
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
const code_point * data() const
returns a pointer to the first character in the current string
UString()
default constructor, creates an empty string
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
bool operator==(const UString &right) const
equality operator
bool operator!=(const UString &right) const
inequality operator
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
bool operator>=(const UString &right) const
greater than or equal operator
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
void reserve(size_type size)
sets the capacity of the string to at least size code points
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding,...
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc,...
void clear()
deletes all of the elements in the string
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
~UString()
destructor
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
int compare(const UString &str) const
compare str to the current string
bool operator<=(const UString &right) const
less than or equal operator
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str,...
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character
std::basic_string< code_point > dstring
bool operator<(const UString &right) const
less than operator
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
uint16 code_point
a single UTF-16 code point
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str,...
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
iterator end()
returns an iterator just past the end of the string
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
static const size_type npos
the usual constant representing: not found, no limit, etc
uint32 unicode_char
a single 32-bit Unicode character
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
_fwd_iterator iterator
iterator
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
code_point & operator[](size_type index)
code point dereference operator
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
UString & append(const UString &str)
appends str on to the end of the current string
const code_point * c_str() const
returns a pointer to the first character in the current string
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
_const_fwd_iterator const_iterator
const iterator
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
size_t size_type
size type used to indicate string size and character positions within the string
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
size_type length_Characters() const
Returns the number of Unicode characters in the string.
void push_back(unicode_char val)
appends val to the end of the string
iterator begin()
returns an iterator to the first element of the string
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
void swap(UString &from)
exchanges the elements of the current string with those of from
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space
bool empty() const
returns true if the string has no elements, false otherwise
float len(float x, float y)