Gnash  0.8.11dev
utf8.h
Go to the documentation of this file.
1 // utf8.h: utilities for converting to and from UTF-8
2 //
3 // Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 //
19 // Based on the public domain work of Thatcher Ulrich <tu@tulrich.com> 2004
20 
21 #ifndef UTF8_H
22 #define UTF8_H
23 
24 #include <string>
25 #include <cstdint> // for C99 int types
26 #include <vector>
27 
28 #include "dsodefs.h" // For DSOEXPORT
29 
30 // Android doesn't have any support for wide characters at all.
31 #ifdef __ANDROID__
32 namespace std {
33 typedef basic_string
34  <wchar_t
35  ,std::char_traits<wchar_t>
36  ,std::allocator<wchar_t> >
37 wstring;
38 }
39 #endif
40 
41 namespace gnash {
42 
44 //
66 //
70 namespace utf8 {
71 
73  //
77  //
80  DSOEXPORT std::wstring decodeCanonicalString(const std::string& str, int version);
81 
83  //
93  DSOEXPORT std::string encodeCanonicalString(const std::wstring& wstr, int version);
94 
96  //
101  DSOEXPORT std::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it,
102  const std::string::const_iterator& e);
103 
106  DSOEXPORT std::string encodeUnicodeCharacter(std::uint32_t ucs_character);
107 
109  //
112  DSOEXPORT std::string encodeLatin1Character(std::uint32_t ucsCharacter);
113 
125  };
126 
128  //
151  DSOEXPORT const char* stripBOM(const char* in, size_t& size,
152  TextEncoding& encoding);
153 
155  DSOEXPORT const char* textEncodingName(TextEncoding enc);
156 
161  };
162 
164  // Shift-Jis, UTF8, and other. Puts the DisplayObject count in length,
165  // and the offsets to the DisplayObjects in offsets, if offsets is not NULL.
166  // If not NULL, offsets should be at least s.length().
167  // offsets are not accurate if the return value is GUESSENC_OTHER
168  //
171  DSOEXPORT EncodingGuess guessEncoding(const std::string& s, int& length,
172  std::vector<int>& offsets);
173 
174 
175 } // namespace utf8
176 } // namespace gnash
177 
178 #endif // UTF8_H
179 
180 
181 // Local Variables:
182 // mode: C++
183 // c-basic-offset: 8
184 // tab-width: 8
185 // indent-tabs-mode: t
186 // End:
EncodingGuess guessEncoding(const std::string &str, int &length, std::vector< int > &offsets)
Common code for guessing at the encoding of random text, between.
Definition: utf8.cpp:281
std::string encodeLatin1Character(std::uint32_t ucsCharacter)
Encodes the given wide character into an at least 8-bit character.
Definition: utf8.cpp:84
Definition: utf8.h:158
std::string encodeUnicodeCharacter(std::uint32_t ucs_character)
Encodes the given wide character into a canonical string, theoretically up to 6 chars in length...
Definition: utf8.cpp:165
Definition: utf8.h:121
Definition: utf8.h:118
Definition: utf8.h:123
Anonymous namespace for callbacks, local functions, event handlers etc.
Definition: dbus_ext.cpp:40
Definition: utf8.h:117
Definition: utf8.h:115
Definition: utf8.h:119
const char * stripBOM(const char *in, size_t &size, TextEncoding &encoding)
Interpret (and skip) Byte Order Mark in input stream.
Definition: utf8.cpp:208
std::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator &it, const std::string::const_iterator &e)
Return the next Unicode character in the UTF-8 encoded string.
Definition: utf8.cpp:93
Definition: utf8.h:159
Definition: utf8.h:122
Definition: utf8.h:160
EncodingGuess
Definition: utf8.h:157
Definition: klash_part.cpp:329
#define DSOEXPORT
Definition: dsodefs.h:55
Definition: utf8.h:116
Definition: GnashKey.h:151
Definition: utf8.h:124
std::string encodeCanonicalString(const std::wstring &wstr, int version)
Converts a std::wstring into canonical std::string.
Definition: utf8.cpp:67
std::wstring decodeCanonicalString(const std::string &str, int version)
Converts a std::string with multibyte characters into a std::wstring.
Definition: utf8.cpp:39
Definition: GnashKey.h:165
TextEncoding
Definition: utf8.h:114
const char * textEncodingName(TextEncoding enc)
Return name of a text encoding.
Definition: utf8.cpp:262
Definition: utf8.h:120