Frames | No Frames |
1: /* NumericShaper.java 2: Copyright (C) 2003 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.awt.font; 40: 41: import java.io.Serializable; 42: import java.lang.Character.UnicodeBlock; 43: 44: /** 45: * This class handles numeric shaping. A shaper can either be contextual 46: * or not. A non-contextual shaper will always translate ASCII digits 47: * in its input into the target Unicode range. A contextual shaper will 48: * change the target Unicode range depending on the characters it has 49: * previously processed. 50: * 51: * @author Michael Koch 52: * @author Tom Tromey 53: * 54: * @since 1.4 55: * @specnote This class does not handle LIMBU or OSMANYA. 56: * @specnote The JDK does not seem to properly handle ranges without a 57: * digit zero, such as TAMIL. This implementation does. 58: */ 59: public final class NumericShaper implements Serializable 60: { 61: private static final long serialVersionUID = -8022764705923730308L; 62: 63: /** Convenience constant representing all the valid Unicode ranges. */ 64: public static final int ALL_RANGES = 524287; 65: 66: /** 67: * Constant representing the Unicode ARABIC range. Shaping done 68: * using this range will translate to the arabic decimal characters. 69: * Use EASTERN_ARABIC if you want to shape to the eastern arabic 70: * (also known as the extended arabic) decimal characters. 71: */ 72: public static final int ARABIC = 2; 73: 74: /** Constant representing the Unicode BENGALI range. */ 75: public static final int BENGALI = 16; 76: 77: /** Constant representing the Unicode DEVANAGARI range. */ 78: public static final int DEVANAGARI = 8; 79: 80: /** 81: * Constant representing the Unicode extended arabic range. 82: * In Unicode there are two different sets of arabic digits; 83: * this selects the extended or eastern set. 84: */ 85: public static final int EASTERN_ARABIC = 4; 86: 87: /** 88: * Constant representing the Unicode ETHIOPIC range. Note that 89: * there is no digit zero in this range; an ASCII digit zero 90: * is left unchanged when shaping to this range. 91: */ 92: public static final int ETHIOPIC = 65536; 93: 94: /** 95: * Constant representing the Unicode EUROPEAN range. For 96: * contextual shaping purposes, characters in the various 97: * extended Latin character blocks are recognized as EUROPEAN. 98: */ 99: public static final int EUROPEAN = 1; 100: 101: /** Constant representing the Unicode GUJARATI range. */ 102: public static final int GUJARATI = 64; 103: 104: /** Constant representing the Unicode GURMUKHI range. */ 105: public static final int GURMUKHI = 32; 106: 107: /** Constant representing the Unicode KANNADA range. */ 108: public static final int KANNADA = 1024; 109: 110: /** Constant representing the Unicode KHMER range. */ 111: public static final int KHMER = 131072; 112: 113: /** Constant representing the Unicode LAO range. */ 114: public static final int LAO = 8192; 115: 116: /** Constant representing the Unicode MALAYALAM range. */ 117: public static final int MALAYALAM = 2048; 118: 119: /** Constant representing the Unicode MONGOLIAN range. */ 120: public static final int MONGOLIAN = 262144; 121: 122: /** Constant representing the Unicode MYANMAR range. */ 123: public static final int MYANMAR = 32768; 124: 125: /** Constant representing the Unicode ORIYA range. */ 126: public static final int ORIYA = 128; 127: 128: /** 129: * Constant representing the Unicode TAMIL range. Note that 130: * there is no digit zero in this range; an ASCII digit zero 131: * is left unchanged when shaping to this range. 132: */ 133: public static final int TAMIL = 256; 134: 135: /** Constant representing the Unicode TELUGU range. */ 136: public static final int TELUGU = 512; 137: 138: /** Constant representing the Unicode THAI range. */ 139: public static final int THAI = 4096; 140: 141: /** Constant representing the Unicode TIBETAN range. */ 142: public static final int TIBETAN = 16384; 143: 144: /** 145: * This table holds the zero digits for each language. This is hard-coded 146: * because the values will not change and the table layout is tied to the 147: * other constants in this class in any case. In the two places where a 148: * language does not have a zero digit, the character immediately preceeding 149: * the one digit is used instead. These languages are special-cased in 150: * the shaping code. 151: */ 152: private static final char[] zeroDigits = 153: { 154: '0', // EUROPEAN 155: '\u0660', // ARABIC 156: '\u06f0', // EASTERN_ARABIC 157: '\u0966', // DEVANAGARI 158: '\u09e6', // BENGALI 159: '\u0a66', // GURMUKHI 160: '\u0ae6', // GUJARATI 161: '\u0b66', // ORIYA 162: '\u0be6', // TAMIL - special case as there is no digit zero 163: '\u0c66', // TELUGU 164: '\u0ce6', // KANNADA 165: '\u0d66', // MALAYALAM 166: '\u0e50', // THAI 167: '\u0ed0', // LAO 168: '\u0f20', // TIBETAN 169: '\u1040', // MYANMAR 170: '\u1368', // ETHIOPIC - special case as there is no digit zero 171: '\u17e0', // KHMER 172: '\u1810' // MONGOLIAN 173: }; 174: 175: /** 176: * The default initial context for this shaper, specified as 177: * an integer from 0 to 18. 178: */ 179: private int key; 180: 181: /** 182: * The target ranges handled by this shaper. If the shaper 183: * is not contextual, the high bit of this field will be set. 184: * @specnote This was discovered by reading the serialization spec 185: */ 186: private int mask; 187: 188: /** 189: * Create a new numeric shaper. The key given is a constant from 190: * this class, the constructor turns it into its internal form. 191: * @param key the key to use, as one of the manifest constants 192: * @param mask a mask of languages to shape for 193: */ 194: private NumericShaper (int key, int mask) 195: { 196: // This internal form is a bit goofy, but it is specified by 197: // the serialization spec. 198: this.key = Integer.numberOfTrailingZeros(key); 199: this.mask = mask; 200: } 201: 202: /** 203: * Return an integer representing all the languages for which this 204: * shaper will shape. The result is taken by "or"ing together 205: * the constants representing the various languages. 206: */ 207: public int getRanges () 208: { 209: return mask & ALL_RANGES; 210: } 211: 212: /** 213: * Return true if this shaper is contextual, false if it is not. 214: */ 215: public boolean isContextual () 216: { 217: return mask > 0; 218: } 219: 220: /** 221: * Shape the text in the given array. The starting context will 222: * be the context passed to the shaper at creation time. 223: * @param text the text to shape 224: * @param start the index of the starting character of the array 225: * @param count the number of characters in the array 226: */ 227: public void shape (char[] text, int start, int count) 228: { 229: shape (text, start, count, 1 << key); 230: } 231: 232: /** 233: * Given a unicode block object, return corresponding language constant. 234: * If the block is not recognized, returns zero. Note that as there 235: * is no separate ARABIC block in Character, this case must 236: * be specially handled by the caller; EASTERN_ARABIC is preferred when 237: * both are specified. 238: * @param b the unicode block to classify 239: * @return the language constant, or zero if not recognized 240: */ 241: private int classify(UnicodeBlock b) 242: { 243: if (b == null) 244: return 0; 245: // ARABIC is handled by the caller; from testing we know 246: // that EASTERN_ARABIC takes precedence. 247: if (b == UnicodeBlock.ARABIC) 248: return EASTERN_ARABIC; 249: if (b == UnicodeBlock.BENGALI) 250: return BENGALI; 251: if (b == UnicodeBlock.DEVANAGARI) 252: return DEVANAGARI; 253: if (b == UnicodeBlock.ETHIOPIC) 254: return ETHIOPIC; 255: if (b == UnicodeBlock.BASIC_LATIN 256: || b == UnicodeBlock.LATIN_1_SUPPLEMENT 257: || b == UnicodeBlock.LATIN_EXTENDED_A 258: || b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL 259: || b == UnicodeBlock.LATIN_EXTENDED_B) 260: return EUROPEAN; 261: if (b == UnicodeBlock.GUJARATI) 262: return GUJARATI; 263: if (b == UnicodeBlock.GURMUKHI) 264: return GURMUKHI; 265: if (b == UnicodeBlock.KANNADA) 266: return KANNADA; 267: if (b == UnicodeBlock.KHMER) 268: return KHMER; 269: if (b == UnicodeBlock.LAO) 270: return LAO; 271: if (b == UnicodeBlock.MALAYALAM) 272: return MALAYALAM; 273: if (b == UnicodeBlock.MONGOLIAN) 274: return MONGOLIAN; 275: if (b == UnicodeBlock.MYANMAR) 276: return MYANMAR; 277: if (b == UnicodeBlock.ORIYA) 278: return ORIYA; 279: if (b == UnicodeBlock.TAMIL) 280: return TAMIL; 281: if (b == UnicodeBlock.TELUGU) 282: return TELUGU; 283: if (b == UnicodeBlock.THAI) 284: return THAI; 285: if (b == UnicodeBlock.TIBETAN) 286: return TIBETAN; 287: return 0; 288: } 289: 290: /** 291: * Shape the given text, using the indicated initial context. 292: * If this shaper is not a contextual shaper, then the given context 293: * will be ignored. 294: * @param text the text to shape 295: * @param start the index of the first character of the text to shape 296: * @param count the number of characters to shape in the text 297: * @param context the initial context 298: * @throws IllegalArgumentException if the initial context is invalid 299: */ 300: public void shape (char[] text, int start, int count, int context) 301: { 302: int currentContext; 303: if (isContextual()) 304: { 305: if (Integer.bitCount(context) != 1 || (context & ~ALL_RANGES) != 0) 306: throw new IllegalArgumentException("invalid context argument"); 307: // If the indicated context is not one we are handling, reset it. 308: if ((context & mask) == 0) 309: currentContext = -1; 310: else 311: currentContext = Integer.numberOfTrailingZeros(context); 312: } 313: else 314: currentContext = key; 315: 316: for (int i = 0; i < count; ++i) 317: { 318: char c = text[start + i]; 319: if (c >= '0' && c <= '9') 320: { 321: if (currentContext >= 0) 322: { 323: // Shape into the current context. 324: if (c == '0' 325: && ((1 << currentContext) == TAMIL 326: || (1 << currentContext) == ETHIOPIC)) 327: { 328: // No digit 0 in this context; do nothing. 329: } 330: else 331: text[start + i] 332: = (char) (zeroDigits[currentContext] + c - '0'); 333: } 334: } 335: else if (isContextual()) 336: { 337: // if c is in a group, set currentContext; else reset it. 338: int group = classify(UnicodeBlock.of(c)); 339: // Specially handle ARABIC. 340: if (group == EASTERN_ARABIC && (mask & EASTERN_ARABIC) == 0 341: && (mask & ARABIC) != 0) 342: group = ARABIC; 343: if ((mask & group) != 0) 344: { 345: // The character was classified as being in a group 346: // we recognize, and it was selected by the shaper. 347: // So, change the context. 348: currentContext = Integer.numberOfTrailingZeros(group); 349: } 350: } 351: } 352: } 353: 354: public boolean equals (Object obj) 355: { 356: if (! (obj instanceof NumericShaper)) 357: return false; 358: NumericShaper tmp = (NumericShaper) obj; 359: return key == tmp.key && mask == tmp.mask; 360: } 361: 362: public int hashCode () 363: { 364: return key ^ mask; 365: } 366: 367: public String toString () 368: { 369: // For debugging only. 370: return "key=" + key + "; mask=" + mask; 371: } 372: 373: /** 374: * Return a non-contextual shaper which can shape to a single range. 375: * All ASCII digits in the input text are translated to this language. 376: * @param singleRange the target language 377: * @return a non-contextual shaper for this language 378: * @throws IllegalArgumentException if the argument does not name a 379: * single language, as specified by the constants declared in this class 380: */ 381: public static NumericShaper getShaper (int singleRange) 382: { 383: if (Integer.bitCount(singleRange) != 1) 384: throw new IllegalArgumentException("more than one bit set in argument"); 385: if ((singleRange & ~ALL_RANGES) != 0) 386: throw new IllegalArgumentException("argument out of range"); 387: return new NumericShaper(singleRange, Integer.MIN_VALUE | singleRange); 388: } 389: 390: /** 391: * Return a contextual shaper which can shape to any of the indicated 392: * languages. The default initial context for this shaper is EUROPEAN. 393: * @param ranges the ranges to shape to 394: * @return a contextual shaper which will target any of these ranges 395: * @throws IllegalArgumentException if the argument specifies an 396: * unrecognized range 397: */ 398: public static NumericShaper getContextualShaper (int ranges) 399: { 400: if ((ranges & ~ALL_RANGES) != 0) 401: throw new IllegalArgumentException("argument out of range"); 402: return new NumericShaper(EUROPEAN, ranges); 403: } 404: 405: /** 406: * Return a contextual shaper which can shape to any of the indicated 407: * languages. The default initial context for this shaper is given as 408: * an argument. 409: * @param ranges the ranges to shape to 410: * @param defaultContext the default initial context 411: * @return a contextual shaper which will target any of these ranges 412: * @throws IllegalArgumentException if the ranges argument specifies an 413: * unrecognized range, or if the defaultContext argument does not specify 414: * a single valid range 415: */ 416: public static NumericShaper getContextualShaper (int ranges, 417: int defaultContext) 418: { 419: if (Integer.bitCount(defaultContext) != 1) 420: throw new IllegalArgumentException("more than one bit set in context"); 421: if ((ranges & ~ALL_RANGES) != 0 || (defaultContext & ~ALL_RANGES) != 0) 422: throw new IllegalArgumentException("argument out of range"); 423: return new NumericShaper(defaultContext, ranges); 424: } 425: }