1:
37:
38: package ;
39:
40: import ;
41: import ;
42:
43:
48: public class UnicodeReader
49: {
50:
51: final Reader in;
52:
53: UnicodeReader(Reader in)
54: {
55: this.in = in;
56: }
57:
58: public void mark(int limit)
59: throws IOException
60: {
61: in.mark(limit * 2);
62: }
63:
64: public void reset()
65: throws IOException
66: {
67: in.reset();
68: }
69:
70: public int read()
71: throws IOException
72: {
73: int ret = in.read();
74: if (ret == -1)
75: return ret;
76: if (ret >= 0xd800 && ret < 0xdc00)
77: {
78:
79: int low = in.read();
80: if (low >= 0xdc00 && low < 0xe000)
81: ret = Character.toCodePoint((char) ret, (char) low);
82: else
83: throw new IOException("unpaired surrogate: U+" +
84: Integer.toHexString(ret));
85: }
86: else if (ret >= 0xdc00 && ret < 0xe000)
87: throw new IOException("unpaired surrogate: U+" +
88: Integer.toHexString(ret));
89: return ret;
90: }
91:
92: public int read(int[] buf, int off, int len)
93: throws IOException
94: {
95: if (len == 0)
96: return 0;
97: char[] b2 = new char[len];
98: int ret = in.read(b2, 0, len);
99: if (ret <= 0)
100: return ret;
101: int l = ret - 1;
102: int i = 0, j = off;
103: for (; i < l; i++)
104: {
105: char c = b2[i];
106: if (c >= 0xd800 && c < 0xdc00)
107: {
108:
109: char d = b2[i + 1];
110: if (d >= 0xdc00 && d < 0xe000)
111: {
112: buf[j++] = Character.toCodePoint(c, d);
113: i++;
114: continue;
115: }
116: else
117: throw new IOException("unpaired surrogate: U+" +
118: Integer.toHexString(c));
119: }
120: else if (c >= 0xdc00 && c < 0xe000)
121: throw new IOException("unpaired surrogate: U+" +
122: Integer.toHexString(c));
123: buf[j++] = (int) c;
124: }
125: if (i == l)
126: {
127:
128: char c = b2[l];
129: if (c >= 0xd800 && c < 0xdc00)
130: {
131: int low = in.read();
132: if (low >= 0xdc00 && low < 0xe000)
133: {
134: buf[j++] = Character.toCodePoint(c, (char) low);
135: return j;
136: }
137: else
138: throw new IOException("unpaired surrogate: U+" +
139: Integer.toHexString(c));
140: }
141: else if (c >= 0xdc00 && c < 0xe000)
142: throw new IOException("unpaired surrogate: U+" +
143: Integer.toHexString(c));
144: buf[j++] = (int) c;
145: }
146: return j;
147: }
148:
149: public void close()
150: throws IOException
151: {
152: in.close();
153: }
154:
155:
159: public static int[] toCodePointArray(String text)
160: throws IOException
161: {
162: char[] b2 = text.toCharArray();
163: int[] buf = new int[b2.length];
164: if (b2.length > 0)
165: {
166: int l = b2.length - 1;
167: int i = 0, j = 0;
168: for (; i < l; i++)
169: {
170: char c = b2[i];
171: if (c >= 0xd800 && c < 0xdc00)
172: {
173:
174: char d = b2[i + 1];
175: if (d >= 0xdc00 && d < 0xe000)
176: {
177: buf[j++] = Character.toCodePoint(c, d);
178: i++;
179: continue;
180: }
181: else
182: throw new IOException("unpaired surrogate: U+" +
183: Integer.toHexString(c));
184: }
185: else if (c >= 0xdc00 && c < 0xe000)
186: throw new IOException("unpaired surrogate: U+" +
187: Integer.toHexString(c));
188: buf[j++] = (int) c;
189: }
190: if (i == l)
191: {
192:
193: buf[j++] = (int) b2[l];
194: if (j < buf.length)
195: {
196: int[] buf2 = new int[j];
197: System.arraycopy(buf, 0, buf2, 0, j);
198: buf = buf2;
199: }
200: }
201: }
202: return buf;
203: }
204:
205: }