MyGUI 3.4.1
MyGUI_UString.cpp
Go to the documentation of this file.
1/*
2 * This source file is part of MyGUI. For the latest info, see http://mygui.info/
3 * Distributed under the MIT License
4 * (See accompanying file COPYING.MIT or copy at http://opensource.org/licenses/MIT)
5 */
6
7#include "MyGUI_Precompiled.h"
8#include "MyGUI_UString.h"
9
10namespace MyGUI
11{
12
13 //--------------------------------------------------------------------------
15 {
16 mString = nullptr;
17 }
18 //--------------------------------------------------------------------------
20 {
21 mIter += c;
22 }
23 //--------------------------------------------------------------------------
25 {
26 mIter -= c;
27 }
28 //--------------------------------------------------------------------------
30 {
31 mIter = i.mIter;
32 mString = i.mString;
33 }
34 //--------------------------------------------------------------------------
36 {
37 return mIter == mString->mData.begin();
38 }
39 //--------------------------------------------------------------------------
41 {
42 return mIter == mString->mData.end();
43 }
44 //--------------------------------------------------------------------------
46 {
47 return mIter - mString->mData.begin();
48 }
49 //--------------------------------------------------------------------------
51 {
52 mIter = mString->mData.begin() + index;
53 }
54 //--------------------------------------------------------------------------
56 {
57 size_type current_index = _get_index();
58 return mString->getChar( current_index );
59 }
60 //--------------------------------------------------------------------------
62 {
63 size_type current_index = _get_index();
64 int change = mString->setChar( current_index, uc );
65 _jump_to( current_index );
66 return change;
67 }
68 //--------------------------------------------------------------------------
70 {
71 _seekFwd( 1 ); // move 1 code point forward
72 if ( _test_end() ) return; // exit if we hit the end
73 if ( _utf16_surrogate_follow( mIter[0] ) ) {
74 // landing on a follow code point means we might be part of a bigger character
75 // so we test for that
76 code_point lead_half = 0;
77 //NB: we can't possibly be at the beginning here, so no need to test
78 lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
79 if ( _utf16_surrogate_lead( lead_half ) ) {
80 _seekFwd( 1 ); // if so, then advance 1 more code point
81 }
82 }
83 }
84 //--------------------------------------------------------------------------
86 {
87 _seekRev( 1 ); // move 1 code point backwards
88 if ( _test_begin() ) return; // exit if we hit the beginning
89 if ( _utf16_surrogate_follow( mIter[0] ) ) {
90 // landing on a follow code point means we might be part of a bigger character
91 // so we test for that
92 code_point lead_half = 0;
93 lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
94 if ( _utf16_surrogate_lead( lead_half ) ) {
95 _seekRev( 1 ); // if so, then rewind 1 more code point
96 }
97 }
98 }
99 //--------------------------------------------------------------------------
100 //--------------------------------------------------------------------------
101 //--------------------------------------------------------------------------
102 //--------------------------------------------------------------------------
104 //--------------------------------------------------------------------------
106 {
107 _become( i );
108 }
109 //--------------------------------------------------------------------------
111 {
112 _become( i );
113 return *this;
114 }
115 //--------------------------------------------------------------------------
117 {
118 _seekFwd( 1 );
119 return *this;
120 }
121 //--------------------------------------------------------------------------
123 {
124 _fwd_iterator tmp( *this );
125 _seekFwd( 1 );
126 return tmp;
127 }
128 //--------------------------------------------------------------------------
130 {
131 _seekRev( 1 );
132 return *this;
133 }
134 //--------------------------------------------------------------------------
136 {
137 _fwd_iterator tmp( *this );
138 _seekRev( 1 );
139 return tmp;
140 }
141 //--------------------------------------------------------------------------
143 {
144 _fwd_iterator tmp( *this );
145 if ( n < 0 )
146 tmp._seekRev( -n );
147 else
148 tmp._seekFwd( n );
149 return tmp;
150 }
151 //--------------------------------------------------------------------------
153 {
154 _fwd_iterator tmp( *this );
155 if ( n < 0 )
156 tmp._seekFwd( -n );
157 else
158 tmp._seekRev( n );
159 return tmp;
160 }
161 //--------------------------------------------------------------------------
163 {
164 if ( n < 0 )
165 _seekRev( -n );
166 else
167 _seekFwd( n );
168 return *this;
169 }
170 //--------------------------------------------------------------------------
172 {
173 if ( n < 0 )
174 _seekFwd( -n );
175 else
176 _seekRev( n );
177 return *this;
178 }
179 //--------------------------------------------------------------------------
181 {
182 return *mIter;
183 }
184 //--------------------------------------------------------------------------
186 {
187 _fwd_iterator tmp( *this );
188 tmp += n;
189 return *tmp;
190 }
191 //--------------------------------------------------------------------------
193 {
194 _moveNext();
195 return *this;
196 }
197 //--------------------------------------------------------------------------
199 {
200 _movePrev();
201 return *this;
202 }
203 //--------------------------------------------------------------------------
205 {
206 return _getCharacter();
207 }
208 //--------------------------------------------------------------------------
210 {
211 return _setCharacter( uc );
212 }
213 //--------------------------------------------------------------------------
214 //--------------------------------------------------------------------------
215 //--------------------------------------------------------------------------
216 //--------------------------------------------------------------------------
218 //--------------------------------------------------------------------------
220 {
221 _become( i );
222 }
223 //--------------------------------------------------------------------------
225 {
226 _become( i );
227 return *this;
228 }
229 //--------------------------------------------------------------------------
231 {
232 _become( i );
233 }
234 //--------------------------------------------------------------------------
236 {
237 _seekFwd( 1 );
238 return *this;
239 }
240 //--------------------------------------------------------------------------
242 {
243 _const_fwd_iterator tmp( *this );
244 _seekFwd( 1 );
245 return tmp;
246 }
247 //--------------------------------------------------------------------------
249 {
250 _seekRev( 1 );
251 return *this;
252 }
253 //--------------------------------------------------------------------------
255 {
256 _const_fwd_iterator tmp( *this );
257 _seekRev( 1 );
258 return tmp;
259 }
260 //--------------------------------------------------------------------------
262 {
263 _const_fwd_iterator tmp( *this );
264 if ( n < 0 )
265 tmp._seekRev( -n );
266 else
267 tmp._seekFwd( n );
268 return tmp;
269 }
270 //--------------------------------------------------------------------------
272 {
273 _const_fwd_iterator tmp( *this );
274 if ( n < 0 )
275 tmp._seekFwd( -n );
276 else
277 tmp._seekRev( n );
278 return tmp;
279 }
280 //--------------------------------------------------------------------------
282 {
283 if ( n < 0 )
284 _seekRev( -n );
285 else
286 _seekFwd( n );
287 return *this;
288 }
289 //--------------------------------------------------------------------------
291 {
292 if ( n < 0 )
293 _seekFwd( -n );
294 else
295 _seekRev( n );
296 return *this;
297 }
298 //--------------------------------------------------------------------------
300 {
301 return *mIter;
302 }
303 //--------------------------------------------------------------------------
305 {
306 _const_fwd_iterator tmp( *this );
307 tmp += n;
308 return *tmp;
309 }
310 //--------------------------------------------------------------------------
312 {
313 _moveNext();
314 return *this;
315 }
316 //--------------------------------------------------------------------------
318 {
319 _movePrev();
320 return *this;
321 }
322 //--------------------------------------------------------------------------
324 {
325 return _getCharacter();
326 }
327 //--------------------------------------------------------------------------
328 //--------------------------------------------------------------------------
329 //--------------------------------------------------------------------------
330 //--------------------------------------------------------------------------
332 //--------------------------------------------------------------------------
334 {
335 _become( i );
336 }
337 //--------------------------------------------------------------------------
339 {
340 _seekRev( 1 );
341 return *this;
342 }
343 //--------------------------------------------------------------------------
345 {
346 _rev_iterator tmp( *this );
347 _seekRev( 1 );
348 return tmp;
349 }
350 //--------------------------------------------------------------------------
352 {
353 _seekFwd( 1 );
354 return *this;
355 }
356 //--------------------------------------------------------------------------
358 {
359 _rev_iterator tmp( *this );
360 _seekFwd( 1 );
361 return tmp;
362 }
363 //--------------------------------------------------------------------------
365 {
366 _rev_iterator tmp( *this );
367 if ( n < 0 )
368 tmp._seekFwd( -n );
369 else
370 tmp._seekRev( n );
371 return tmp;
372 }
373 //--------------------------------------------------------------------------
375 {
376 _rev_iterator tmp( *this );
377 if ( n < 0 )
378 tmp._seekRev( -n );
379 else
380 tmp._seekFwd( n );
381 return tmp;
382 }
383 //--------------------------------------------------------------------------
385 {
386 if ( n < 0 )
387 _seekFwd( -n );
388 else
389 _seekRev( n );
390 return *this;
391 }
392 //--------------------------------------------------------------------------
394 {
395 if ( n < 0 )
396 _seekRev( -n );
397 else
398 _seekFwd( n );
399 return *this;
400 }
401 //--------------------------------------------------------------------------
403 {
404 return mIter[-1];
405 }
406 //--------------------------------------------------------------------------
408 {
409 _rev_iterator tmp( *this );
410 tmp -= n;
411 return *tmp;
412 }
413 //--------------------------------------------------------------------------
414 //--------------------------------------------------------------------------
415 //--------------------------------------------------------------------------
416 //--------------------------------------------------------------------------
418 //--------------------------------------------------------------------------
420 {
421 _become( i );
422 }
423 //--------------------------------------------------------------------------
425 {
426 _become( i );
427 }
428 //--------------------------------------------------------------------------
430 {
431 _seekRev( 1 );
432 return *this;
433 }
434 //--------------------------------------------------------------------------
436 {
437 _const_rev_iterator tmp( *this );
438 _seekRev( 1 );
439 return tmp;
440 }
441 //--------------------------------------------------------------------------
443 {
444 _seekFwd( 1 );
445 return *this;
446 }
447 //--------------------------------------------------------------------------
449 {
450 _const_rev_iterator tmp( *this );
451 _seekFwd( 1 );
452 return tmp;
453 }
454 //--------------------------------------------------------------------------
456 {
457 _const_rev_iterator tmp( *this );
458 if ( n < 0 )
459 tmp._seekFwd( -n );
460 else
461 tmp._seekRev( n );
462 return tmp;
463 }
464 //--------------------------------------------------------------------------
466 {
467 _const_rev_iterator tmp( *this );
468 if ( n < 0 )
469 tmp._seekRev( -n );
470 else
471 tmp._seekFwd( n );
472 return tmp;
473 }
474 //--------------------------------------------------------------------------
476 {
477 if ( n < 0 )
478 _seekFwd( -n );
479 else
480 _seekRev( n );
481 return *this;
482 }
483 //--------------------------------------------------------------------------
485 {
486 if ( n < 0 )
487 _seekRev( -n );
488 else
489 _seekFwd( n );
490 return *this;
491 }
492 //--------------------------------------------------------------------------
494 {
495 return mIter[-1];
496 }
497 //--------------------------------------------------------------------------
499 {
500 _const_rev_iterator tmp( *this );
501 tmp -= n;
502 return *tmp;
503 }
504 //--------------------------------------------------------------------------
505 //--------------------------------------------------------------------------
506 //--------------------------------------------------------------------------
507 //--------------------------------------------------------------------------
509 {
510 _init();
511 }
512 //--------------------------------------------------------------------------
514 {
515 _init();
516 mData = copy.mData;
517 }
518 //--------------------------------------------------------------------------
520 {
521 _init();
522 assign( length, ch );
523 }
524 //--------------------------------------------------------------------------
526 {
527 _init();
528 assign( str );
529 }
530 //--------------------------------------------------------------------------
532 {
533 _init();
534 assign( str, length );
535 }
536 //--------------------------------------------------------------------------
538 {
539 _init();
540 assign( str, index, length );
541 }
542 //--------------------------------------------------------------------------
543#if MYGUI_IS_NATIVE_WCHAR_T
544 UString::UString( const wchar_t* w_str )
545 {
546 _init();
547 assign( w_str );
548 }
549 //--------------------------------------------------------------------------
550 UString::UString( const wchar_t* w_str, size_type length )
551 {
552 _init();
553 assign( w_str, length );
554 }
555#endif
556 //--------------------------------------------------------------------------
557 UString::UString( const std::wstring& wstr )
558 {
559 _init();
560 assign( wstr );
561 }
562 //--------------------------------------------------------------------------
563 UString::UString( const char* c_str )
564 {
565 _init();
566 assign( c_str );
567 }
568 //--------------------------------------------------------------------------
570 {
571 _init();
572 assign( c_str, length );
573 }
574 //--------------------------------------------------------------------------
575 UString::UString( const std::string& str )
576 {
577 _init();
578 assign( str );
579 }
580 //--------------------------------------------------------------------------
582 {
583 _init();
584 assign( str );
585 }
586 //--------------------------------------------------------------------------
588 {
589 _cleanBuffer();
590 }
591 //--------------------------------------------------------------------------
593 {
594 return mData.size();
595 }
596 //--------------------------------------------------------------------------
598 {
599 return size();
600 }
601 //--------------------------------------------------------------------------
603 {
604 const_iterator i = begin(), ie = end();
605 size_type c = 0;
606 while ( i != ie ) {
607 i.moveNext();
608 ++c;
609 }
610 return c;
611 }
612 //--------------------------------------------------------------------------
614 {
615 return mData.max_size();
616 }
617 //--------------------------------------------------------------------------
619 {
620 mData.reserve( size );
621 }
622 //--------------------------------------------------------------------------
623 void UString::resize( size_type num, const code_point& val /*= 0 */ )
624 {
625 mData.resize( num, val );
626 }
627 //--------------------------------------------------------------------------
628 void UString::swap( UString& from )
629 {
630 mData.swap( from.mData );
631 }
632 //--------------------------------------------------------------------------
633 bool UString::empty() const
634 {
635 return mData.empty();
636 }
637 //--------------------------------------------------------------------------
639 {
640 return mData.c_str();
641 }
642 //--------------------------------------------------------------------------
644 {
645 return c_str();
646 }
647 //--------------------------------------------------------------------------
649 {
650 return mData.capacity();
651 }
652 //--------------------------------------------------------------------------
654 {
655 mData.clear();
656 }
657 //--------------------------------------------------------------------------
658 UString UString::substr( size_type index, size_type num /*= npos */ ) const
659 {
660 // this could avoid the extra copy if we used a private specialty constructor
661 dstring data = mData.substr( index, num );
662 UString tmp;
663 tmp.mData.swap( data );
664 return tmp;
665 }
666 //--------------------------------------------------------------------------
668 {
669 code_point cp[2];
670 size_t c = _utf32_to_utf16( val, cp );
671 if ( c > 0 ) push_back( cp[0] );
672 if ( c > 1 ) push_back( cp[1] );
673 }
674 //--------------------------------------------------------------------------
675#if MYGUI_IS_NATIVE_WCHAR_T
676 void UString::push_back( wchar_t val )
677 {
678 // we do this because the Unicode method still preserves UTF-16 code points
679 mData.push_back( static_cast<code_point>( val ) );
680 }
681#endif
682 //--------------------------------------------------------------------------
684 {
685 mData.push_back( val );
686 }
687
688 void UString::push_back( char val )
689 {
690 mData.push_back( static_cast<code_point>( val ) );
691 }
692
694 {
695 const_iterator i, ie = end();
696 for ( i = begin(); i != ie; i.moveNext() ) {
697 if ( i.getCharacter() == ch )
698 return true;
699 }
700 return false;
701 }
702
703 const std::string& UString::asUTF8() const
704 {
705 _load_buffer_UTF8();
706 return *m_buffer.mStrBuffer;
707 }
708
709 const char* UString::asUTF8_c_str() const
710 {
711 _load_buffer_UTF8();
712 return m_buffer.mStrBuffer->c_str();
713 }
714
716 {
717 _load_buffer_UTF32();
718 return *m_buffer.mUTF32StrBuffer;
719 }
720
722 {
723 _load_buffer_UTF32();
724 return m_buffer.mUTF32StrBuffer->c_str();
725 }
726
727 const std::wstring& UString::asWStr() const
728 {
729 _load_buffer_WStr();
730 return *m_buffer.mWStrBuffer;
731 }
732
733 const wchar_t* UString::asWStr_c_str() const
734 {
735 _load_buffer_WStr();
736 return m_buffer.mWStrBuffer->c_str();
737 }
738
740 {
741 return mData.at( loc );
742 }
743
745 {
746 return mData.at( loc );
747 }
748
750 {
751 const code_point* ptr = c_str();
752 unicode_char uc;
753 size_t l = _utf16_char_length( ptr[loc] );
754 code_point cp[2] = { /* blame the code beautifier */
755 0, 0
756 };
757 cp[0] = ptr[loc];
758
759 if ( l == 2 && ( loc + 1 ) < mData.length() ) {
760 cp[1] = ptr[loc+1];
761 }
762 _utf16_to_utf32( cp, uc );
763 return uc;
764 }
765
767 {
768 code_point cp[2] = { /* blame the code beautifier */
769 0, 0
770 };
771 size_t l = _utf32_to_utf16( ch, cp );
772 unicode_char existingChar = getChar( loc );
773 size_t existingSize = _utf16_char_length( existingChar );
774 size_t newSize = _utf16_char_length( ch );
775
776 if ( newSize > existingSize ) {
777 at( loc ) = cp[0];
778 insert( loc + 1, 1, cp[1] );
779 return 1;
780 }
781 if ( newSize < existingSize ) {
782 erase( loc, 1 );
783 at( loc ) = cp[0];
784 return -1;
785 }
786
787 // newSize == existingSize
788 at( loc ) = cp[0];
789 if ( l == 2 ) at( loc + 1 ) = cp[1];
790 return 0;
791 }
792
794 {
795 iterator i;
796 i.mIter = mData.begin();
797 i.mString = this;
798 return i;
799 }
800
802 {
804 i.mIter = const_cast<UString*>( this )->mData.begin();
805 i.mString = const_cast<UString*>( this );
806 return i;
807 }
808
810 {
811 iterator i;
812 i.mIter = mData.end();
813 i.mString = this;
814 return i;
815 }
816
818 {
820 i.mIter = const_cast<UString*>( this )->mData.end();
821 i.mString = const_cast<UString*>( this );
822 return i;
823 }
824
826 {
828 i.mIter = mData.end();
829 i.mString = this;
830 return i;
831 }
832
834 {
836 i.mIter = const_cast<UString*>( this )->mData.end();
837 i.mString = const_cast<UString*>( this );
838 return i;
839 }
840
842 {
844 i.mIter = mData.begin();
845 i.mString = this;
846 return i;
847 }
848
850 {
852 i.mIter = const_cast<UString*>( this )->mData.begin();
853 i.mString = const_cast<UString*>( this );
854 return i;
855 }
856
858 {
859 mData.assign( start.mIter, end.mIter );
860 return *this;
861 }
862
864 {
865 mData.assign( str.mData );
866 return *this;
867 }
868
870 {
871 mData.assign( str );
872 return *this;
873 }
874
876 {
877 mData.assign( str, num );
878 return *this;
879 }
880
882 {
883 mData.assign( str.mData, index, len );
884 return *this;
885 }
886
888 {
889 mData.assign( num, ch );
890 return *this;
891 }
892
893 UString& UString::assign( const std::wstring& wstr )
894 {
895 mData.clear();
896 mData.reserve( wstr.length() ); // best guess bulk allocate
897#ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
898 code_point tmp;
899 std::wstring::const_iterator i, ie = wstr.end();
900 for ( i = wstr.begin(); i != ie; i++ ) {
901 tmp = static_cast<code_point>( *i );
902 mData.push_back( tmp );
903 }
904#else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
905 code_point cp[3] = {0, 0, 0};
906 unicode_char tmp;
907 std::wstring::const_iterator i, ie = wstr.end();
908 for ( i = wstr.begin(); i != ie; i++ ) {
909 tmp = static_cast<unicode_char>( *i );
910 size_t l = _utf32_to_utf16( tmp, cp );
911 if ( l > 0 ) mData.push_back( cp[0] );
912 if ( l > 1 ) mData.push_back( cp[1] );
913 }
914#endif
915 return *this;
916 }
917
918#if MYGUI_IS_NATIVE_WCHAR_T
919 UString& UString::assign( const wchar_t* w_str )
920 {
921 std::wstring tmp;
922 tmp.assign( w_str );
923 return assign( tmp );
924 }
925
926 UString& UString::assign( const wchar_t* w_str, size_type num )
927 {
928 std::wstring tmp;
929 tmp.assign( w_str, num );
930 return assign( tmp );
931 }
932#endif
933
934 UString& UString::assign( const std::string& str )
935 {
936 size_type len = _verifyUTF8( str );
937 clear(); // empty our contents, if there are any
938 reserve( len ); // best guess bulk capacity growth
939
940 // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
941 // then converting it to UTF-16, then finally appending the data buffer
942
943 unicode_char uc; // temporary Unicode character buffer
944 unsigned char utf8buf[7]; // temporary UTF-8 buffer
945 utf8buf[6] = 0;
946 size_t utf8len; // UTF-8 length
947 code_point utf16buff[3]; // temporary UTF-16 buffer
948 utf16buff[2] = 0;
949 size_t utf16len; // UTF-16 length
950
951 std::string::const_iterator i, ie = str.end();
952 for ( i = str.begin(); i != ie; i++ ) {
953 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
954 for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes
955 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
956 }
957 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
958 utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
959 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
960
961 utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
962 append( utf16buff, utf16len ); // append the characters to the string
963 }
964 return *this;
965 }
966
968 {
969 for (const auto& character : str)
970 {
971 push_back(character);
972 }
973 return *this;
974 }
975
977 {
978 std::string tmp( c_str );
979 return assign( tmp );
980 }
981
983 {
984 std::string tmp;
985 tmp.assign( c_str, num );
986 return assign( tmp );
987 }
988
990 {
991 mData.append( str.mData );
992 return *this;
993 }
994
996 {
997 mData.append( str );
998 return *this;
999 }
1000
1002 {
1003 mData.append( str.mData, index, len );
1004 return *this;
1005 }
1006
1008 {
1009 mData.append( str, num );
1010 return *this;
1011 }
1012
1014 {
1015 mData.append( num, ch );
1016 return *this;
1017 }
1018
1020 {
1021 mData.append( start.mIter, end.mIter );
1022 return *this;
1023 }
1024
1025#if MYGUI_IS_NATIVE_WCHAR_T
1026 UString& UString::append( const wchar_t* w_str, size_type num )
1027 {
1028 std::wstring tmp( w_str, num );
1029 return append( tmp );
1030 }
1031
1032 UString& UString::append( size_type num, wchar_t ch )
1033 {
1034 return append( num, static_cast<unicode_char>( ch ) );
1035 }
1036#endif
1038 {
1039 UString tmp( c_str, num );
1040 append( tmp );
1041 return *this;
1042 }
1043
1045 {
1046 append( num, static_cast<code_point>( ch ) );
1047 return *this;
1048 }
1049
1051 {
1052 code_point cp[2] = {0, 0};
1053 if ( _utf32_to_utf16( ch, cp ) == 2 ) {
1054 for ( size_type i = 0; i < num; i++ ) {
1055 append( 1, cp[0] );
1056 append( 1, cp[1] );
1057 }
1058 } else {
1059 for ( size_type i = 0; i < num; i++ ) {
1060 append( 1, cp[0] );
1061 }
1062 }
1063 return *this;
1064 }
1065
1067 {
1068 iterator ret;
1069 ret.mIter = mData.insert( i.mIter, ch );
1070 ret.mString = this;
1071 return ret;
1072 }
1073
1075 {
1076 mData.insert( index, str.mData );
1077 return *this;
1078 }
1079
1080 UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num )
1081 {
1082 mData.insert( index1, str.mData, index2, num );
1083 return *this;
1084 }
1085
1087 {
1088 mData.insert( i.mIter, start.mIter, end.mIter );
1089 }
1090
1092 {
1093 mData.insert( index, str, num );
1094 return *this;
1095 }
1096
1097#if MYGUI_IS_NATIVE_WCHAR_T
1098 UString& UString::insert( size_type index, const wchar_t* w_str, size_type num )
1099 {
1100 UString tmp( w_str, num );
1101 insert( index, tmp );
1102 return *this;
1103 }
1104#endif
1105
1107 {
1108 UString tmp( c_str, num );
1109 insert( index, tmp );
1110 return *this;
1111 }
1112
1114 {
1115 mData.insert( index, num, ch );
1116 return *this;
1117 }
1118
1119#if MYGUI_IS_NATIVE_WCHAR_T
1120 UString& UString::insert( size_type index, size_type num, wchar_t ch )
1121 {
1122 insert( index, num, static_cast<unicode_char>( ch ) );
1123 return *this;
1124 }
1125#endif
1126
1128 {
1129 insert( index, num, static_cast<code_point>( ch ) );
1130 return *this;
1131 }
1132
1134 {
1135 code_point cp[3] = {0, 0, 0};
1136 size_t l = _utf32_to_utf16( ch, cp );
1137 if ( l == 1 ) {
1138 return insert( index, num, cp[0] );
1139 }
1140 for ( size_type c = 0; c < num; c++ ) {
1141 // insert in reverse order to preserve ordering after insert
1142 insert( index, 1, cp[1] );
1143 insert( index, 1, cp[0] );
1144 }
1145 return *this;
1146 }
1147
1149 {
1150 mData.insert( i.mIter, num, ch );
1151 }
1152#if MYGUI_IS_NATIVE_WCHAR_T
1153 void UString::insert( iterator i, size_type num, const wchar_t& ch )
1154 {
1155 insert( i, num, static_cast<unicode_char>( ch ) );
1156 }
1157#endif
1158
1159 void UString::insert( iterator i, size_type num, const char& ch )
1160 {
1161 insert( i, num, static_cast<code_point>( ch ) );
1162 }
1163
1165 {
1166 code_point cp[3] = {0, 0, 0};
1167 size_t l = _utf32_to_utf16( ch, cp );
1168 if ( l == 1 ) {
1169 insert( i, num, cp[0] );
1170 } else {
1171 for ( size_type c = 0; c < num; c++ ) {
1172 // insert in reverse order to preserve ordering after insert
1173 insert( i, 1, cp[1] );
1174 insert( i, 1, cp[0] );
1175 }
1176 }
1177 }
1178
1180 {
1181 iterator ret;
1182 ret.mIter = mData.erase( loc.mIter );
1183 ret.mString = this;
1184 return ret;
1185 }
1186
1188 {
1189 iterator ret;
1190 ret.mIter = mData.erase( start.mIter, end.mIter );
1191 ret.mString = this;
1192 return ret;
1193 }
1194
1195 UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ )
1196 {
1197 if ( num == npos )
1198 mData.erase( index );
1199 else
1200 mData.erase( index, num );
1201 return *this;
1202 }
1203
1205 {
1206 mData.replace( index1, num1, str.mData, 0, npos );
1207 return *this;
1208 }
1209
1211 {
1212 mData.replace( index1, num1, str.mData, 0, num2 );
1213 return *this;
1214 }
1215
1216 UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
1217 {
1218 mData.replace( index1, num1, str.mData, index2, num2 );
1219 return *this;
1220 }
1221
1222 UString& UString::replace( iterator start, iterator end, const UString& str, size_type num /*= npos */ )
1223 {
1224 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1225
1226 size_type index1 = begin() - st;
1227 size_type num1 = end - st;
1228 return replace( index1, num1, str, 0, num );
1229 }
1230
1232 {
1233 mData.replace( index, num1, num2, ch );
1234 return *this;
1235 }
1236
1238 {
1239 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1240
1241 size_type index1 = begin() - st;
1242 size_type num1 = end - st;
1243 return replace( index1, num1, num, ch );
1244 }
1245
1246 int UString::compare( const UString& str ) const
1247 {
1248 return mData.compare( str.mData );
1249 }
1250
1251 int UString::compare( const code_point* str ) const
1252 {
1253 return mData.compare( str );
1254 }
1255
1256 int UString::compare( size_type index, size_type length, const UString& str ) const
1257 {
1258 return mData.compare( index, length, str.mData );
1259 }
1260
1261 int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
1262 {
1263 return mData.compare( index, length, str.mData, index2, length2 );
1264 }
1265
1266 int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
1267 {
1268 return mData.compare( index, length, str, length2 );
1269 }
1270
1271#if MYGUI_IS_NATIVE_WCHAR_T
1272 int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
1273 {
1274 UString tmp( w_str, length2 );
1275 return compare( index, length, tmp );
1276 }
1277#endif
1278
1279 int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
1280 {
1281 UString tmp( c_str, length2 );
1282 return compare( index, length, tmp );
1283 }
1284
1285 UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const
1286 {
1287 return mData.find( str.c_str(), index );
1288 }
1289
1291 {
1292 UString tmp( cp_str );
1293 return mData.find( tmp.c_str(), index, length );
1294 }
1295
1297 {
1298 UString tmp( c_str );
1299 return mData.find( tmp.c_str(), index, length );
1300 }
1301
1302#if MYGUI_IS_NATIVE_WCHAR_T
1303 UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const
1304 {
1305 UString tmp( w_str );
1306 return mData.find( tmp.c_str(), index, length );
1307 }
1308#endif
1309
1310 UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const
1311 {
1312 return find( static_cast<code_point>( ch ), index );
1313 }
1314
1316 {
1317 return mData.find( ch, index );
1318 }
1319
1320#if MYGUI_IS_NATIVE_WCHAR_T
1321 UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const
1322 {
1323 return find( static_cast<unicode_char>( ch ), index );
1324 }
1325#endif
1326
1328 {
1329 code_point cp[3] = {0, 0, 0};
1330 size_t l = _utf32_to_utf16( ch, cp );
1331 return find( UString( cp, l ), index );
1332 }
1333
1334 UString::size_type UString::rfind( const UString& str, size_type index /*= 0 */ ) const
1335 {
1336 return mData.rfind( str.c_str(), index );
1337 }
1338
1340 {
1341 UString tmp( cp_str );
1342 return mData.rfind( tmp.c_str(), index, num );
1343 }
1344
1346 {
1347 UString tmp( c_str );
1348 return mData.rfind( tmp.c_str(), index, num );
1349 }
1350
1351#if MYGUI_IS_NATIVE_WCHAR_T
1352 UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const
1353 {
1354 UString tmp( w_str );
1355 return mData.rfind( tmp.c_str(), index, num );
1356 }
1357#endif
1358
1359 UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const
1360 {
1361 return rfind( static_cast<code_point>( ch ), index );
1362 }
1363
1365 {
1366 return mData.rfind( ch, index );
1367 }
1368
1369#if MYGUI_IS_NATIVE_WCHAR_T
1370 UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const
1371 {
1372 return rfind( static_cast<unicode_char>( ch ), index );
1373 }
1374#endif
1375
1377 {
1378 code_point cp[3] = {0, 0, 0};
1379 size_t l = _utf32_to_utf16( ch, cp );
1380 return rfind( UString( cp, l ), index );
1381 }
1382
1383 UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const
1384 {
1385 size_type i = 0;
1386 const size_type len = length();
1387 while ( i < num && ( index + i ) < len ) {
1388 unicode_char ch = getChar( index + i );
1389 if ( str.inString( ch ) )
1390 return index + i;
1391 i += _utf16_char_length( ch ); // increment by the Unicode character length
1392 }
1393 return npos;
1394 }
1395
1397 {
1398 UString tmp;
1399 tmp.assign( 1, ch );
1400 return find_first_of( tmp, index );
1401 }
1402
1403 UString::size_type UString::find_first_of( char ch, size_type index /*= 0 */ ) const
1404 {
1405 return find_first_of( static_cast<code_point>( ch ), index );
1406 }
1407
1408#if MYGUI_IS_NATIVE_WCHAR_T
1409 UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const
1410 {
1411 return find_first_of( static_cast<unicode_char>( ch ), index );
1412 }
1413#endif
1414
1416 {
1417 code_point cp[3] = {0, 0, 0};
1418 size_t l = _utf32_to_utf16( ch, cp );
1419 return find_first_of( UString( cp, l ), index );
1420 }
1421
1422 UString::size_type UString::find_first_not_of( const UString& str, size_type index /*= 0*/, size_type num /*= npos */ ) const
1423 {
1424 size_type i = 0;
1425 const size_type len = length();
1426 while ( i < num && ( index + i ) < len ) {
1427 unicode_char ch = getChar( index + i );
1428 if ( !str.inString( ch ) )
1429 return index + i;
1430 i += _utf16_char_length( ch ); // increment by the Unicode character length
1431 }
1432 return npos;
1433 }
1434
1436 {
1437 UString tmp;
1438 tmp.assign( 1, ch );
1439 return find_first_not_of( tmp, index );
1440 }
1441
1443 {
1444 return find_first_not_of( static_cast<code_point>( ch ), index );
1445 }
1446
1447#if MYGUI_IS_NATIVE_WCHAR_T
1448 UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const
1449 {
1450 return find_first_not_of( static_cast<unicode_char>( ch ), index );
1451 }
1452#endif
1453
1455 {
1456 code_point cp[3] = {0, 0, 0};
1457 size_t l = _utf32_to_utf16( ch, cp );
1458 return find_first_not_of( UString( cp, l ), index );
1459 }
1460
1461 UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1462 {
1463 size_type i = 0;
1464 const size_type len = length();
1465 if ( index > len ) index = len - 1;
1466
1467 while ( i < num && ( index - i ) != npos ) {
1468 size_type j = index - i;
1469 // careful to step full Unicode characters
1470 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1471 j = index - ++i;
1472 }
1473 // and back to the usual dull test
1474 unicode_char ch = getChar( j );
1475 if ( str.inString( ch ) )
1476 return j;
1477 i++;
1478 }
1479 return npos;
1480 }
1481
1483 {
1484 UString tmp;
1485 tmp.assign( 1, ch );
1486 return find_last_of( tmp, index );
1487 }
1488
1489#if MYGUI_IS_NATIVE_WCHAR_T
1490 UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const
1491 {
1492 return find_last_of( static_cast<unicode_char>( ch ), index );
1493 }
1494#endif
1495
1497 {
1498 code_point cp[3] = {0, 0, 0};
1499 size_t l = _utf32_to_utf16( ch, cp );
1500 return find_last_of( UString( cp, l ), index );
1501 }
1502
1503 UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1504 {
1505 size_type i = 0;
1506 const size_type len = length();
1507 if ( index > len ) index = len - 1;
1508
1509 while ( i < num && ( index - i ) != npos ) {
1510 size_type j = index - i;
1511 // careful to step full Unicode characters
1512 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1513 j = index - ++i;
1514 }
1515 // and back to the usual dull test
1516 unicode_char ch = getChar( j );
1517 if ( !str.inString( ch ) )
1518 return j;
1519 i++;
1520 }
1521 return npos;
1522 }
1523
1525 {
1526 UString tmp;
1527 tmp.assign( 1, ch );
1528 return find_last_not_of( tmp, index );
1529 }
1530
1531 UString::size_type UString::find_last_not_of( char ch, size_type index /*= npos */ ) const
1532 {
1533 return find_last_not_of( static_cast<code_point>( ch ), index );
1534 }
1535
1536#if MYGUI_IS_NATIVE_WCHAR_T
1537 UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const
1538 {
1539 return find_last_not_of( static_cast<unicode_char>( ch ), index );
1540 }
1541#endif
1542
1544 {
1545 code_point cp[3] = {0, 0, 0};
1546 size_t l = _utf32_to_utf16( ch, cp );
1547 return find_last_not_of( UString( cp, l ), index );
1548 }
1549
1550 bool UString::operator<( const UString& right ) const
1551 {
1552 return compare( right ) < 0;
1553 }
1554
1555 bool UString::operator<=( const UString& right ) const
1556 {
1557 return compare( right ) <= 0;
1558 }
1559
1561 {
1562 return assign( s );
1563 }
1564
1566 {
1567 clear();
1568 return append( 1, ch );
1569 }
1570
1572 {
1573 clear();
1574 return append( 1, ch );
1575 }
1576
1577#if MYGUI_IS_NATIVE_WCHAR_T
1578 UString& UString::operator=( wchar_t ch )
1579 {
1580 clear();
1581 return append( 1, ch );
1582 }
1583#endif
1584
1586 {
1587 clear();
1588 return append( 1, ch );
1589 }
1590
1591 bool UString::operator>( const UString& right ) const
1592 {
1593 return compare( right ) > 0;
1594 }
1595
1596 bool UString::operator>=( const UString& right ) const
1597 {
1598 return compare( right ) >= 0;
1599 }
1600
1601 bool UString::operator==( const UString& right ) const
1602 {
1603 return compare( right ) == 0;
1604 }
1605
1606 bool UString::operator!=( const UString& right ) const
1607 {
1608 return !operator==( right );
1609 }
1610
1612 {
1613 return at( index );
1614 }
1615
1617 {
1618 return at( index );
1619 }
1620
1621 UString::operator std::string() const
1622 {
1623 return std::string( asUTF8() );
1624 }
1625
1627 UString::operator std::wstring() const
1628 {
1629 return std::wstring( asWStr() );
1630 }
1631
1632
1634 {
1635 if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
1636 return false; // it matches a surrogate pair signature
1637 return true; // everything else is a standalone code point
1638 }
1639
1641 {
1642 if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
1643 return true; // it is a 1st word
1644 return false; // it isn't
1645 }
1646
1648 {
1649 if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
1650 return true; // it is a 2nd word
1651 return false; // everything else isn't
1652 }
1653
1655 {
1656 if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
1657 return 2; // if it is, then we are 2 words long
1658 return 1; // otherwise we are only 1 word long
1659 }
1660
1662 {
1663 if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
1664 return 2; // if so, we need a surrogate pair
1665 return 1; // otherwise we can stuff it into a single word
1666 }
1667
1668 size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
1669 {
1670 const code_point& cp1 = in_cp[0];
1671 const code_point& cp2 = in_cp[1];
1672 bool wordPair = false;
1673
1674 // does it look like a surrogate pair?
1675 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1676 // looks like one, but does the other half match the algorithm as well?
1677 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1678 wordPair = true; // yep!
1679 }
1680
1681 if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value
1682 out_uc = cp1;
1683 return 1;
1684 }
1685
1686 unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
1687 cU -= 0xD800; // remove the encoding markers
1688 cL -= 0xDC00;
1689
1690 out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
1691 out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
1692 out_uc += 0x10000; // add back in the value offset
1693
1694 return 2; // this whole operation takes to words, so that's what we'll return
1695 }
1696
1697 size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
1698 {
1699 if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them
1700 out_cp[0] = static_cast<code_point>(in_uc);
1701 return 1;
1702 }
1703 unicode_char uc = in_uc; // copy to writable buffer
1704 unsigned short tmp; // single code point buffer
1705 uc -= 0x10000; // subtract value offset
1706
1707 //process upper word
1708 tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits
1709 tmp += 0xD800; // add encoding offset
1710 out_cp[0] = tmp; // write
1711
1712 // process lower word
1713 tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
1714 tmp += 0xDC00; // add encoding offset
1715 out_cp[1] = tmp; // write
1716
1717 return 2; // return used word count (2 for surrogate pairs)
1718 }
1719
1720 bool UString::_utf8_start_char( unsigned char cp )
1721 {
1722 return ( cp & ~_cont_mask ) != _cont;
1723 }
1724
1725 size_t UString::_utf8_char_length( unsigned char cp )
1726 {
1727 if ( !( cp & 0x80 ) ) return 1;
1728 if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
1729 if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
1730 if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
1731 if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
1732 if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
1733
1734 return 1;
1735 //throw invalid_data( "invalid UTF-8 sequence header value" );
1736 }
1737
1739 {
1740 /*
1741 7 bit: U-00000000 - U-0000007F: 0xxxxxxx
1742 11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
1743 16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
1744 21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1745 26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1746 31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1747 */
1748 if ( !( uc & ~0x0000007F ) ) return 1;
1749 if ( !( uc & ~0x000007FF ) ) return 2;
1750 if ( !( uc & ~0x0000FFFF ) ) return 3;
1751 if ( !( uc & ~0x001FFFFF ) ) return 4;
1752 if ( !( uc & ~0x03FFFFFF ) ) return 5;
1753 if ( !( uc & ~0x7FFFFFFF ) ) return 6;
1754
1755 return 1;
1756 //throw invalid_data( "invalid UTF-32 value" );
1757 }
1758
1759 size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
1760 {
1761 size_t len = _utf8_char_length( in_cp[0] );
1762 if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit
1763 out_uc = in_cp[0];
1764 return 1;
1765 }
1766
1767 unicode_char c = 0; // temporary buffer
1768 size_t i = 0;
1769 switch ( len ) { // load header byte
1770 case 6:
1771 c = in_cp[i] & _lead5_mask;
1772 break;
1773 case 5:
1774 c = in_cp[i] & _lead4_mask;
1775 break;
1776 case 4:
1777 c = in_cp[i] & _lead3_mask;
1778 break;
1779 case 3:
1780 c = in_cp[i] & _lead2_mask;
1781 break;
1782 case 2:
1783 c = in_cp[i] & _lead1_mask;
1784 break;
1785 }
1786
1787 // load each continuation byte
1788 for ( ++i; i < len; i++ )
1789 {
1790 if (( in_cp[i] & ~_cont_mask ) != _cont )
1791 {
1792 //throw invalid_data( "bad UTF-8 continuation byte" );
1793 out_uc = in_cp[0];
1794 return 1;
1795 }
1796 c <<= 6;
1797 c |= ( in_cp[i] & _cont_mask );
1798 }
1799
1800 out_uc = c; // write the final value and return the used byte length
1801 return len;
1802 }
1803
1804 size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
1805 {
1806 size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
1807 unicode_char c = in_uc; // copy to temp buffer
1808
1809 //stuff all of the lower bits
1810 for ( size_t i = len - 1; i > 0; i-- ) {
1811 out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1812 c >>= 6;
1813 }
1814
1815 //now write the header byte
1816 switch ( len ) {
1817 case 6:
1818 out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1819 break;
1820 case 5:
1821 out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1822 break;
1823 case 4:
1824 out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1825 break;
1826 case 3:
1827 out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1828 break;
1829 case 2:
1830 out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1831 break;
1832 case 1:
1833 default:
1834 out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
1835 break;
1836 }
1837
1838 // return the byte length of the sequence
1839 return len;
1840 }
1841
1843 {
1844 std::string tmp( reinterpret_cast<const char*>( c_str ) );
1845 return _verifyUTF8( tmp );
1846 }
1847
1849 {
1850 std::string::const_iterator i, ie = str.end();
1851 i = str.begin();
1852 size_type length = 0;
1853
1854 while ( i != ie ) {
1855 // characters pass until we find an extended sequence
1856 if (( *i ) & 0x80 ) {
1857 unsigned char c = ( *i );
1858 size_t contBytes = 0;
1859
1860 // get continuation byte count and test for overlong sequences
1861 if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte
1862 if ( c == _lead1 )
1863 {
1864 //throw invalid_data( "overlong UTF-8 sequence" );
1865 return str.size();
1866 }
1867 contBytes = 1;
1868
1869 } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes
1870 contBytes = 2;
1871 if ( c == _lead2 ) { // possible overlong UTF-8 sequence
1872 c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1873 if (( c & _lead2 ) == _cont )
1874 {
1875 //throw invalid_data( "overlong UTF-8 sequence" );
1876 return str.size();
1877 }
1878 }
1879
1880 } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes
1881 contBytes = 3;
1882 if ( c == _lead3 ) { // possible overlong UTF-8 sequence
1883 c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1884 if (( c & _lead3 ) == _cont )
1885 {
1886 //throw invalid_data( "overlong UTF-8 sequence" );
1887 return str.size();
1888 }
1889 }
1890
1891 } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes
1892 contBytes = 4;
1893 if ( c == _lead4 ) { // possible overlong UTF-8 sequence
1894 c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1895 if (( c & _lead4 ) == _cont )
1896 {
1897 //throw invalid_data( "overlong UTF-8 sequence" );
1898 return str.size();
1899 }
1900 }
1901
1902 } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes
1903 contBytes = 5;
1904 if ( c == _lead5 ) { // possible overlong UTF-8 sequence
1905 c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1906 if (( c & _lead5 ) == _cont )
1907 {
1908 //throw invalid_data( "overlong UTF-8 sequence" );
1909 return str.size();
1910 }
1911 }
1912 }
1913
1914 // check remaining continuation bytes for
1915 while ( contBytes-- ) {
1916 c = ( *( ++i ) ); // get next byte in sequence
1917 if (( c & ~_cont_mask ) != _cont )
1918 {
1919 //throw invalid_data( "bad UTF-8 continuation byte" );
1920 return str.size();
1921 }
1922 }
1923 }
1924 length++;
1925 i++;
1926 }
1927 return length;
1928 }
1929
1930 void UString::_init()
1931 {
1932 m_buffer.mVoidBuffer = nullptr;
1933 m_bufferType = bt_none;
1934 m_bufferSize = 0;
1935 }
1936
1937 void UString::_cleanBuffer() const
1938 {
1939 if ( m_buffer.mVoidBuffer != nullptr ) {
1940 switch ( m_bufferType ) {
1941 case bt_string:
1942 delete m_buffer.mStrBuffer;
1943 break;
1944 case bt_wstring:
1945 delete m_buffer.mWStrBuffer;
1946 break;
1947 case bt_utf32string:
1948 delete m_buffer.mUTF32StrBuffer;
1949 break;
1950 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
1951 //delete m_buffer.mVoidBuffer;
1952 // delete void* is undefined, don't do that
1953 assert("This should never happen - mVoidBuffer should never contain something if we "
1954 "don't know the type");
1955 break;
1956 }
1957 m_buffer.mVoidBuffer = nullptr;
1958 m_bufferSize = 0;
1959 m_bufferType = bt_none;
1960 }
1961 }
1962
1963 void UString::_getBufferStr() const
1964 {
1965 if ( m_bufferType != bt_string ) {
1966 _cleanBuffer();
1967 m_buffer.mStrBuffer = new std::string();
1968 m_bufferType = bt_string;
1969 }
1970 m_buffer.mStrBuffer->clear();
1971 }
1972
1973 void UString::_getBufferWStr() const
1974 {
1975 if ( m_bufferType != bt_wstring ) {
1976 _cleanBuffer();
1977 m_buffer.mWStrBuffer = new std::wstring();
1978 m_bufferType = bt_wstring;
1979 }
1980 m_buffer.mWStrBuffer->clear();
1981 }
1982
1983 void UString::_getBufferUTF32Str() const
1984 {
1985 if ( m_bufferType != bt_utf32string ) {
1986 _cleanBuffer();
1987 m_buffer.mUTF32StrBuffer = new utf32string();
1988 m_bufferType = bt_utf32string;
1989 }
1990 m_buffer.mUTF32StrBuffer->clear();
1991 }
1992
1993 void UString::_load_buffer_UTF8() const
1994 {
1995 _getBufferStr();
1996 std::string& buffer = ( *m_buffer.mStrBuffer );
1997 buffer.reserve( length() );
1998
1999 unsigned char utf8buf[6];
2000 char* charbuf = ( char* )utf8buf;
2001 unicode_char c;
2002 size_t len;
2003
2004 const_iterator i, ie = end();
2005 for ( i = begin(); i != ie; i.moveNext() ) {
2006 c = i.getCharacter();
2007 len = _utf32_to_utf8( c, utf8buf );
2008 size_t j = 0;
2009 while ( j < len )
2010 buffer.push_back( charbuf[j++] );
2011 }
2012 }
2013
2014 void UString::_load_buffer_WStr() const
2015 {
2016 _getBufferWStr();
2017 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
2018 buffer.reserve( length() ); // may over reserve, but should be close enough
2019#ifdef WCHAR_UTF16 // wchar_t matches UTF-16
2020 const_iterator i, ie = end();
2021 for ( i = begin(); i != ie; ++i ) {
2022 buffer.push_back(( wchar_t )( *i ) );
2023 }
2024#else // wchar_t fits UTF-32
2025 unicode_char c;
2026 const_iterator i, ie = end();
2027 for ( i = begin(); i != ie; i.moveNext() ) {
2028 c = i.getCharacter();
2029 buffer.push_back(( wchar_t )c );
2030 }
2031#endif
2032 }
2033
2034 void UString::_load_buffer_UTF32() const
2035 {
2036 _getBufferUTF32Str();
2037 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2038 buffer.reserve( length() ); // may over reserve, but should be close enough
2039
2040 unicode_char c;
2041
2042 const_iterator i, ie = end();
2043 for ( i = begin(); i != ie; i.moveNext() ) {
2044 c = i.getCharacter();
2045 buffer.push_back( c );
2046 }
2047 }
2048
2049} // namespace MyGUI
base iterator class for UString
int _setCharacter(unicode_char uc)
void _become(const _base_iterator &i)
void _jump_to(size_type index)
unicode_char _getCharacter() const
const forward iterator for UString
_const_fwd_iterator & operator=(const _const_fwd_iterator &i)
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_fwd_iterator & operator++()
pre-increment
_const_fwd_iterator operator-(difference_type n)
subtraction operator
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_fwd_iterator & operator--()
pre-decrement
_const_fwd_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
const value_type & operator*() const
dereference operator
const reverse iterator for UString
_const_rev_iterator operator-(difference_type n)
subtraction operator
_const_rev_iterator operator+(difference_type n)
addition operator
_const_rev_iterator & operator++()
pre-increment
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_rev_iterator & operator--()
pre-decrement
const value_type & operator*() const
dereference operator
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
forward iterator for UString
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
_fwd_iterator & operator++()
pre-increment
_fwd_iterator operator-(difference_type n)
subtraction operator
_fwd_iterator & operator=(const _fwd_iterator &i)
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator operator+(difference_type n)
addition operator
value_type & operator*() const
dereference operator
_fwd_iterator & operator--()
pre-decrement
value_type & operator[](difference_type n) const
dereference at offset operator
forward iterator for UString
_rev_iterator & operator+=(difference_type n)
addition assignment operator
_rev_iterator & operator--()
pre-decrement
value_type & operator*() const
dereference operator
_rev_iterator & operator++()
pre-increment
_rev_iterator operator-(difference_type n)
subtraction operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
value_type & operator[](difference_type n) const
dereference at offset operator
_rev_iterator operator+(difference_type n)
addition operator
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
size_type length() const
Returns the number of code points in the current string.
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array
bool operator>(const UString &right) const
greater than operator
size_type size() const
Returns the number of code points in the current string.
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
const code_point * data() const
returns a pointer to the first character in the current string
UString()
default constructor, creates an empty string
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
bool operator==(const UString &right) const
equality operator
bool operator!=(const UString &right) const
inequality operator
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
bool operator>=(const UString &right) const
greater than or equal operator
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
void reserve(size_type size)
sets the capacity of the string to at least size code points
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding,...
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc,...
void clear()
deletes all of the elements in the string
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
~UString()
destructor
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
int compare(const UString &str) const
compare str to the current string
code_point value_type
value type typedef for use in iterators
bool operator<=(const UString &right) const
less than or equal operator
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str,...
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character
std::basic_string< code_point > dstring
bool operator<(const UString &right) const
less than operator
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
uint16 code_point
a single UTF-16 code point
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str,...
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
iterator end()
returns an iterator just past the end of the string
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
static const size_type npos
the usual constant representing: not found, no limit, etc
uint32 unicode_char
a single 32-bit Unicode character
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
_fwd_iterator iterator
iterator
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
code_point & operator[](size_type index)
code point dereference operator
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
UString & append(const UString &str)
appends str on to the end of the current string
const code_point * c_str() const
returns a pointer to the first character in the current string
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
_const_fwd_iterator const_iterator
const iterator
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
size_t size_type
size type used to indicate string size and character positions within the string
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
size_type length_Characters() const
Returns the number of Unicode characters in the string.
void push_back(unicode_char val)
appends val to the end of the string
iterator begin()
returns an iterator to the first element of the string
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
void swap(UString &from)
exchanges the elements of the current string with those of from
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space
bool empty() const
returns true if the string has no elements, false otherwise
float len(float x, float y)