Source for gnu.java.awt.font.opentype.CharGlyphMap

   1: /* CharGlyphMap.java -- Manages the 'cmap' table of TrueType fonts
   2:    Copyright (C) 2006 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package gnu.java.awt.font.opentype;
  40: 
  41: import java.nio.ByteBuffer;
  42: import java.nio.CharBuffer;
  43: import java.nio.ShortBuffer;
  44: import java.nio.IntBuffer;
  45: 
  46: 
  47: /**
  48:  * A mapping from Unicode codepoints to glyphs. This mapping
  49:  * does not perform any re-ordering or decomposition, so it
  50:  * is not everything that is needed to support Unicode.
  51:  *
  52:  * <p>This class manages the <code>cmap</code> table of
  53:  * OpenType and TrueType fonts.
  54:  *
  55:  * @see <a href="http://partners.adobe.com/asn/tech/type/opentype/cmap.jsp">
  56:  *      the <code>cmap</code> part of Adobe&#x2019; OpenType Specification</a>
  57:  *
  58:  * @see <a href="http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html">
  59:  *      the <code>cmap</code> section of Apple&#x2019;s TrueType Reference
  60:  *      Manual</a>
  61:  *
  62:  * @author Sascha Brawer (brawer@dandelis.ch)
  63:  */
  64: public abstract class CharGlyphMap
  65: {
  66:   private static final int PLATFORM_UNICODE = 0;
  67:   private static final int PLATFORM_MACINTOSH = 1;
  68:   private static final int PLATFORM_MICROSOFT = 3;
  69: 
  70: 
  71:   /**
  72:    * Determines the glyph index for a given Unicode codepoint.  Users
  73:    * should be aware that the character-to-glyph mapping not not
  74:    * everything that is needed for full Unicode support.  For example,
  75:    * the <code>cmap</code> table is not able to synthesize accented
  76:    * glyphs from the canonical decomposition sequence, even if the
  77:    * font would contain a glyph for the composed form.
  78:    *
  79:    * @param ucs4 the Unicode codepoint in UCS-4 encoding. Surrogates
  80:    * (U+D800 to U+DFFF) cannot be passed, they must be mapped to
  81:    * UCS-4 first.
  82:    *
  83:    * @return the glyph index, or 0 if the font does not contain
  84:    * a glyph for this codepoint.
  85:    */
  86:   public abstract int getGlyph(int ucs4);
  87: 
  88: 
  89:   /**
  90:    * Reads a CharGlyphMap from an OpenType or TrueType <code>cmap</code>
  91:    * table. The current implementation works as follows:
  92:    *
  93:    * <p><ol><li>If the font has a type 4 cmap for the Unicode platform
  94:    * (encoding 0, 1, 2, 3 or 4), or a type 4 cmap for the Microsoft
  95:    * platform (encodings 1 or 10), that table is used to map Unicode
  96:    * codepoints to glyphs.  Most recent fonts, both for Macintosh and
  97:    * Windows, should provide such a table.</li>
  98:    *
  99:    * <li>Otherwise, if the font has any type 0 cmap for the Macintosh
 100:    * platform, a Unicode-to-glyph mapping is synthesized from certain
 101:    * type 0 cmaps. The current implementation collects mappings from
 102:    * Roman, Icelandic, Turkish, Croatian, Romanian, Eastern European,
 103:    * Cyrillic, Greek, Hebrew, Arabic and Farsi cmaps.</li>.</ol>
 104:    *
 105:    * @param buf a buffer whose position is right at the start
 106:    * of the entire <code>cmap</code> table, and whose limit
 107:    * is at its end.
 108:    *
 109:    * @return a concrete subclass of <code>CharGlyphMap</code>
 110:    * that performs the mapping.
 111:    *
 112:    * @see <a href=
 113:    * "http://partners.adobe.com/asn/tech/type/opentype/cmap.jsp"
 114:    * >the <code>cmap</code> part of Adobe&#x2019; OpenType Specification</a>
 115:    *
 116:    * @see <a href=
 117:    * "http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html"
 118:    * >the <code>cmap</code> section of Apple&#x2019;s TrueType Reference
 119:    * Manual</a>
 120:    */
 121:   public static CharGlyphMap forTable(ByteBuffer buf)
 122:   {
 123:     boolean hasType0 = false;
 124:     int start4 = -1, platform4 = 0, encoding4 = 0;
 125:     int start12 = -1, platform12 = 0, encoding12 = 0;
 126:     int version;
 127:     int numTables;
 128:     int tableStart = buf.position();
 129:     int limit = buf.limit();
 130:     int format, platform, language, encoding, length, offset;
 131: 
 132:     version = buf.getChar();
 133:     if (version != 0)
 134:       return null;
 135: 
 136:     numTables = buf.getChar();
 137:     for (int i = 0; i < numTables; i++)
 138:     {
 139:       buf.limit(limit).position(tableStart + 4 + i * 8);
 140:       platform = buf.getChar();
 141:       encoding = buf.getChar();
 142:       offset = tableStart + buf.getInt();
 143: 
 144:       buf.position(offset);
 145:       format = buf.getChar();
 146: 
 147:       switch (format)
 148:       {
 149:       case 0:
 150:         hasType0 = true;
 151:         break;
 152: 
 153:       case 4:
 154:         length = buf.getChar();
 155:         language = buf.getChar();
 156:         if ((start4 == -1)
 157:             && Type4.isSupported(platform, language, encoding))
 158:         {
 159:           start4 = offset;
 160:           platform4 = platform;
 161:           encoding4 = encoding;
 162:         }
 163:         break;
 164: 
 165:       case 12:
 166:         if ((start12 == -1) && Type12.isSupported(platform, encoding))
 167:         {
 168:           start12 = offset;
 169:           platform12 = platform;
 170:           encoding12 = encoding;
 171:         }
 172:         break;
 173:       }
 174:     }
 175: 
 176: 
 177:     if (start12 >= 0)
 178:     {
 179:       try
 180:       {
 181:         buf.limit(limit).position(start12);
 182:         return new Type12(buf, platform12, encoding12);
 183:       }
 184:       catch (Exception ex)
 185:       {
 186:         ex.printStackTrace();
 187:       }
 188:     }
 189: 
 190:     if (start4 >= 0)
 191:     {
 192:       try
 193:       {
 194:         buf.limit(limit).position(start4);
 195:         return Type4.readTable(buf, platform4, encoding4);
 196:       }
 197:       catch (Exception ex)
 198:       {
 199:       }
 200:     }
 201: 
 202:     if (hasType0)
 203:     {
 204:       try
 205:       {
 206:         buf.limit(limit).position(tableStart);
 207:         return new Type0(buf);
 208:       }
 209:       catch (Exception ex)
 210:       {
 211:       }
 212:     }
 213: 
 214:     return new Dummy();
 215:   }
 216: 
 217: 
 218:   /**
 219:    * A dummy mapping that maps anything to the undefined glyph.
 220:    * Used if no other cmap is understood in a font.
 221:    *
 222:    * @author Sascha Brawer (brawer@dandelis.ch)
 223:    */
 224:   private static final class Dummy
 225:     extends CharGlyphMap
 226:   {
 227:     public int getGlyph(int ucs4)
 228:     {
 229:       return 0;
 230:     }
 231:   }
 232: 
 233: 
 234:   /**
 235:    * A mapping from Unicode code points to glyph IDs through CMAP Type
 236:    * 0 tables. These tables have serious limitations: Only the first
 237:    * 256 glyphs can be addressed, and the source of the mapping is not
 238:    * Unicode, but an encoding used on the Macintosh.
 239:    *
 240:    * <p>However, some fonts have only a Type 0 cmap. In this case, we
 241:    * process all the Type 0 tables we understand, and establish
 242:    * a reversed glyph-to-Unicode mapping. When a glyph is requested
 243:    * for a given Unicode character, we perform a linear search on the
 244:    * reversed table to find the glyph which maps to the requested
 245:    * character. While not blazingly fast, this gives a reasonable
 246:    * fallback for old fonts.
 247:    *
 248:    * @author Sascha Brawer (brawer@dandelis.ch)
 249:    */
 250:   private static final class Type0
 251:     extends CharGlyphMap
 252:   {
 253:     /**
 254:      * An array whose <code>i</code>-th element indicates the
 255:      * Unicode code point of glyph <code>i</code> in the font.
 256:      */
 257:     private char[] glyphToUCS2 = new char[256];
 258: 
 259: 
 260:     /**
 261:      * A String whose <code>charAt(i)</code> is the Unicode character
 262:      * that corresponds to the codepoint <code>i + 127</code> in the
 263:      * MacOS Arabic encoding.
 264:      *
 265:      * @see <a href=
 266:      * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ARABIC.TXT"
 267:      * >the Unicode mapping table for the MacOS Arabic encoding</a>
 268:      */
 269:     private static final String UPPER_ARABIC
 270:       = "\u007e\u0000\u00c4\u00a0\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
 271:       + "\u00e0\u00e2\u00e4\u06ba\u00ab\u00e7\u00e9\u00e8\u00ea\u00eb"
 272:       + "\u00ed\u2026\u00ee\u00ef\u00f1\u00f3\u00bb\u00f4\u00f6\u00f7"
 273:       + "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u066a"
 274:       + "\u0026\u0027\u0028\u0029\u002a\u002b\u060c\u002d\u002e\u002f"
 275:       + "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669"
 276:       + "\u003a\u061b\u003c\u003d\u003e\u061f\u274a\u0621\u0622\u0623"
 277:       + "\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d"
 278:       + "\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637"
 279:       + "\u0638\u0639\u063a\u005b\\\u005d\u005e\u005f\u0640\u0641"
 280:       + "\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b"
 281:       + "\u064c\u064d\u064e\u064f\u0650\u0651\u0652\u067e\u0679\u0686"
 282:       + "\u06d5\u06a4\u06af\u0688\u0691\u007b\u007c\u007d\u0698\u06d2";
 283: 
 284: 
 285:     /**
 286:      * A String whose <code>charAt(i)</code> is the Unicode character
 287:      * that corresponds to the codepoint <code>i + 127</code> in the
 288:      * MacOS East European Roman encoding.
 289:      *
 290:      * @see <a href=
 291:      * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CENTEURO.TXT"
 292:      * >the Unicode mapping table for the MacOS Central European
 293:      * encoding</a>
 294:      */
 295:     private static final String UPPER_EAST_EUROPEAN_ROMAN
 296:       = "\u007e\u0000\u00c4\u0100\u0101\u00c9\u0104\u00d6\u00dc\u00e1"
 297:       + "\u0105\u010c\u00e4\u010d\u0106\u0107\u00e9\u0179\u017a\u010e"
 298:       + "\u00ed\u010f\u0112\u0113\u0116\u00f3\u0117\u00f4\u00f6\u00f5"
 299:       + "\u00fa\u011a\u011b\u00fc\u2020\u00b0\u0118\u00a3\u00a7\u2022"
 300:       + "\u00b6\u00df\u00ae\u00a9\u2122\u0119\u00a8\u2260\u0123\u012e"
 301:       + "\u012f\u012a\u2264\u2265\u012b\u0136\u2202\u2211\u0142\u013b"
 302:       + "\u013c\u013d\u013e\u0139\u013a\u0145\u0146\u0143\u00ac\u221a"
 303:       + "\u0144\u0147\u2206\u00ab\u00bb\u2026\u00a0\u0148\u0150\u00d5"
 304:       + "\u0151\u014c\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
 305:       + "\u014d\u0154\u0155\u0158\u2039\u203a\u0159\u0156\u0157\u0160"
 306:       + "\u201a\u201e\u0161\u015a\u015b\u00c1\u0164\u0165\u00cd\u017d"
 307:       + "\u017e\u016a\u00d3\u00d4\u016b\u016e\u00da\u016f\u0170\u0171"
 308:       + "\u0172\u0173\u00dd\u00fd\u0137\u017b\u0141\u017c\u0122\u02c7";
 309: 
 310: 
 311:     /**
 312:      * A String whose <code>charAt(i)</code> is the Unicode character
 313:      * that corresponds to the codepoint <code>i + 127</code> in the
 314:      * MacOS Roman encoding for the Croatian language.
 315:      *
 316:      * @see <a href=
 317:      * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CROATIAN.TXT"
 318:      * >the Unicode mapping table for the MacOS Croatian encoding</a>
 319:      */
 320:     private static final String UPPER_CROATIAN
 321:       = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
 322:       + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
 323:       + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
 324:       + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022"
 325:       + "\u00b6\u00df\u00ae\u0160\u2122\u00b4\u00a8\u2260\u017d\u00d8"
 326:       + "\u221e\u00b1\u2264\u2265\u2206\u00b5\u2202\u2211\u220f\u0161"
 327:       + "\u222b\u00aa\u00ba\u03a9\u017e\u00f8\u00bf\u00a1\u00ac\u221a"
 328:       + "\u0192\u2248\u0106\u00ab\u010c\u2026\u00a0\u00c0\u00c3\u00d5"
 329:       + "\u0152\u0153\u0110\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
 330:       + "\uf8ff\u00a9\u2044\u20ac\u2039\u203a\u00c6\u00bb\u2013\u00b7"
 331:       + "\u201a\u201e\u2030\u00c2\u0107\u00c1\u010d\u00c8\u00cd\u00ce"
 332:       + "\u00cf\u00cc\u00d3\u00d4\u0111\u00d2\u00da\u00db\u00d9\u0131"
 333:       + "\u02c6\u02dc\u00af\u03c0\u00cb\u02da\u00b8\u00ca\u00e6\u02c7";
 334: 
 335: 
 336:     /**
 337:      * A String whose <code>charAt(i)</code> is the Unicode character
 338:      * that corresponds to the codepoint <code>i + 127</code> in the
 339:      * MacOS Cyrillic encoding.
 340:      *
 341:      * @see <a href=
 342:      * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CYRILLIC.TXT"
 343:      * >the Unicode mapping table for the MacOS Cyrillic encoding</a>
 344:      */
 345:     private static final String UPPER_CYRILLIC
 346:       = "\u007e\u0000\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417"
 347:       + "\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421"
 348:       + "\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b"
 349:       + "\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022"
 350:       + "\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453"
 351:       + "\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454"
 352:       + "\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a"
 353:       + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c"
 354:       + "\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e"
 355:       + "\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431"
 356:       + "\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b"
 357:       + "\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445"
 358:       + "\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac";
 359: 
 360: 
 361:     /**
 362:      * A String whose <code>charAt(i)</code> is the Unicode character
 363:      * that corresponds to the codepoint <code>i + 127</code> in the
 364:      * MacOS Arabic encoding with the Farsi language.
 365:      *
 366:      * @see <a href=
 367:      * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/FARSI.TXT"
 368:      * >the Unicode mapping table for the MacOS Farsi encoding</a>
 369:      */
 370:     private static final String UPPER_FARSI
 371:       = "\u007e\u0000\u00c4\u00a0\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
 372:       + "\u00e0\u00e2\u00e4\u06ba\u00ab\u00e7\u00e9\u00e8\u00ea\u00eb"
 373:       + "\u00ed\u2026\u00ee\u00ef\u00f1\u00f3\u00bb\u00f4\u00f6\u00f7"
 374:       + "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u066a"
 375:       + "\u0026\u0027\u0028\u0029\u002a\u002b\u060c\u002d\u002e\u002f"
 376:       + "\u06f0\u06f1\u06f2\u06f3\u06f4\u06f5\u06f6\u06f7\u06f8\u06f9"
 377:       + "\u003a\u061b\u003c\u003d\u003e\u061f\u274a\u0621\u0622\u0623"
 378:       + "\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d"
 379:       + "\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637"
 380:       + "\u0638\u0639\u063a\u005b\\\u005d\u005e\u005f\u0640\u0641"
 381:       + "\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b"
 382:       + "\u064c\u064d\u064e\u064f\u0650\u0651\u0652\u067e\u0679\u0686"
 383:       + "\u06d5\u06a4\u06af\u0688\u0691\u007b\u007c\u007d\u0698\u06d2";
 384: 
 385: 
 386:     /**
 387:      * A String whose <code>charAt(i)</code> is the Unicode character
 388:      * that corresponds to the codepoint <code>i + 127</code> in the
 389:      * MacOS Greek encoding.
 390:      *
 391:      * @see <a
 392:      * href="http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/GREEK.TXT"
 393:      * >the Unicode mapping table for the MacOS Greek encoding</a>
 394:      */
 395:     private static final String UPPER_GREEK
 396:       = "\u007e\u0000\u00c4\u00b9\u00b2\u00c9\u00b3\u00d6\u00dc\u0385"
 397:       + "\u00e0\u00e2\u00e4\u0384\u00a8\u00e7\u00e9\u00e8\u00ea\u00eb"
 398:       + "\u00a3\u2122\u00ee\u00ef\u2022\u00bd\u2030\u00f4\u00f6\u00a6"
 399:       + "\u20ac\u00f9\u00fb\u00fc\u2020\u0393\u0394\u0398\u039b\u039e"
 400:       + "\u03a0\u00df\u00ae\u00a9\u03a3\u03aa\u00a7\u2260\u00b0\u00b7"
 401:       + "\u0391\u00b1\u2264\u2265\u00a5\u0392\u0395\u0396\u0397\u0399"
 402:       + "\u039a\u039c\u03a6\u03ab\u03a8\u03a9\u03ac\u039d\u00ac\u039f"
 403:       + "\u03a1\u2248\u03a4\u00ab\u00bb\u2026\u00a0\u03a5\u03a7\u0386"
 404:       + "\u0388\u0153\u2013\u2015\u201c\u201d\u2018\u2019\u00f7\u0389"
 405:       + "\u038a\u038c\u038e\u03ad\u03ae\u03af\u03cc\u038f\u03cd\u03b1"
 406:       + "\u03b2\u03c8\u03b4\u03b5\u03c6\u03b3\u03b7\u03b9\u03be\u03ba"
 407:       + "\u03bb\u03bc\u03bd\u03bf\u03c0\u03ce\u03c1\u03c3\u03c4\u03b8"
 408:       + "\u03c9\u03c2\u03c7\u03c5\u03b6\u03ca\u03cb\u0390\u03b0\u00ad";
 409: 
 410: 
 411:     /**
 412:      * A String whose <code>charAt(i)</code> is the Unicode character
 413:      * that corresponds to the codepoint <code>i + 127</code> in the
 414:      * MacOS Hebrew encoding.
 415:      *
 416:      * <p>The codepoint 0x81 (HEBREW LIGATURE YIDDISH YOD YOD PATAH)
 417:      * has no composed Unicode equivalent, but is expressed as the
 418:      * sequence U+05F2 U+05B7 in Unicode. A similar situation exists
 419:      * with the codepoint 0xC0 (HEBREW LIGATURE LAMED HOLAM), which
 420:      * MacOS converts to U+F86A U+05DC U+05B9. To correctly deal
 421:      * with these sequences, we probably should synthesize a ligature
 422:      * table if a Hebrew font only provides a Type 0 CMAP.
 423:      *
 424:      * @see <a href=
 425:      * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/HEBREW.TXT"
 426:      * >the Unicode mapping table for the MacOS Hebrew encoding</a>
 427:      */
 428:     private static final String UPPER_HEBREW
 429:       = "\u007e\u0000\u00c4\u0000\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
 430:       + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
 431:       + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
 432:       + "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u0025"
 433:       + "\u20aa\u0027\u0029\u0028\u002a\u002b\u002c\u002d\u002e\u002f"
 434:       + "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037\u0038\u0039"
 435:       + "\u003a\u003b\u003c\u003d\u003e\u003f\u0000\u201e\uf89b\uf89c"
 436:       + "\uf89d\uf89e\u05bc\ufb4b\ufb35\u2026\u00a0\u05b8\u05b7\u05b5"
 437:       + "\u05b6\u05b4\u2013\u2014\u201c\u201d\u2018\u2019\ufb2a\ufb2b"
 438:       + "\u05bf\u05b0\u05b2\u05b1\u05bb\u05b9\u0000\u05b3\u05d0\u05d1"
 439:       + "\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db"
 440:       + "\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5"
 441:       + "\u05e6\u05e7\u05e8\u05e9\u05ea\u007d\u005d\u007b\u005b\u007c";
 442: 
 443: 
 444:     /**
 445:      * A String whose <code>charAt(i)</code> is the Unicode character
 446:      * that corresponds to the codepoint <code>i + 127</code> in the
 447:      * MacOS Roman encoding with the Icelandic language.
 448:      *
 449:      * @see <a href=
 450:      * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ICELAND.TXT"
 451:      * >the Unicode mapping table for the MacOS Icelandic encoding</a>
 452:      */
 453:     private static final String UPPER_ICELANDIC
 454:       = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
 455:       + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
 456:       + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
 457:       + "\u00fa\u00f9\u00fb\u00fc\u00dd\u00b0\u00a2\u00a3\u00a7\u2022"
 458:       + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8"
 459:       + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0"
 460:       + "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a"
 461:       + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5"
 462:       + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
 463:       + "\u00ff\u0178\u2044\u20ac\u00d0\u00f0\u00de\u00fe\u00fd\u00b7"
 464:       + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce"
 465:       + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131"
 466:       + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7";
 467: 
 468: 
 469:     /**
 470:      * A String whose <code>charAt(i)</code> is the Unicode character
 471:      * that corresponds to the codepoint <code>i + 127</code> in the
 472:      * MacOS Roman encoding for most languages. Exceptions include
 473:      * Croatian, Icelandic, Romanian, and Turkish.
 474:      *
 475:      * @see <a
 476:      * href="http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT"
 477:      * >the Unicode mapping table for the MacOS Roman encoding</a>
 478:      */
 479:     private static final String UPPER_ROMAN
 480:       = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
 481:       + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
 482:       + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
 483:       + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022"
 484:       + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8"
 485:       + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0"
 486:       + "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a"
 487:       + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5"
 488:       + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
 489:       + "\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7"
 490:       + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce"
 491:       + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131"
 492:       + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7";
 493: 
 494: 
 495:     /**
 496:      * A String whose <code>charAt(i)</code> is the Unicode character
 497:      * that corresponds to the codepoint <code>i + 127</code> in the
 498:      * MacOS Roman encoding with the Romanian language.
 499:      *
 500:      * @see <a href=
 501:      * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT"
 502:      * >the Unicode mapping table for the MacOS Romanian encoding</a>
 503:      */
 504:     private static final String UPPER_ROMANIAN
 505:       = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
 506:       + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
 507:       + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
 508:       + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022"
 509:       + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u0102\u0218"
 510:       + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0"
 511:       + "\u222b\u00aa\u00ba\u03a9\u0103\u0219\u00bf\u00a1\u00ac\u221a"
 512:       + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5"
 513:       + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
 514:       + "\u00ff\u0178\u2044\u20ac\u2039\u203a\u021a\u021b\u2021\u00b7"
 515:       + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce"
 516:       + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131"
 517:       + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7";
 518: 
 519: 
 520:     /**
 521:      * A String whose <code>charAt(i)</code> is the Unicode character
 522:      * that corresponds to the codepoint <code>i + 127</code> in the
 523:      * MacOS Roman encoding with the Turkish language.
 524:      *
 525:      * @see <a href=
 526:      * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/TURKISH.TXT"
 527:      * >the Unicode mapping table for the MacOS Turkish encoding</a>
 528:      */
 529:     private static final String UPPER_TURKISH
 530:       = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1"
 531:       + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb"
 532:       + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5"
 533:       + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022"
 534:       + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8"
 535:       + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0"
 536:       + "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a"
 537:       + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5"
 538:       + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca"
 539:       + "\u00ff\u0178\u011e\u011f\u0130\u0131\u015e\u015f\u2021\u00b7"
 540:       + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce"
 541:       + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\uf8a0"
 542:       + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7";
 543: 
 544: 
 545:     /**
 546:      * Constructs a CharGlyphMap.Type0 from all type 0 cmaps provided
 547:      * by the font. The implementation is able to fuse multiple type
 548:      * 0 cmaps, such as the MacRoman, Turkish, Icelandic and Croatian
 549:      * encoding, into a single map from Unicode characters to glyph
 550:      * indices.
 551:      *
 552:      * @param buf a ByteBuffer whose position is right at the
 553:      * beginning of the entire cmap table of the font (<i>not</i>
 554:      * at some subtable).
 555:      */
 556:     public Type0(ByteBuffer buf)
 557:     {
 558:       int numTables;
 559:       int tableStart = buf.position();
 560:       int limit = buf.limit();
 561: 
 562:       /* The CMAP version must be 0. */
 563:       if (buf.getChar() != 0)
 564:         throw new IllegalStateException();
 565: 
 566:       numTables = buf.getChar();
 567:       for (int i = 0; i < numTables; i++)
 568:       {
 569:         buf.limit(limit).position(tableStart + 4 + i * 8);
 570:         int platform = buf.getChar();
 571:         int encoding = buf.getChar();
 572:         int offset = tableStart + buf.getInt();
 573: 
 574:         buf.position(offset);
 575:         int format = buf.getChar();
 576:         int length = buf.getChar();
 577:         buf.limit(offset + length);
 578:         int language = buf.getChar();
 579: 
 580:         if (format == 0)
 581:           readSingleTable(buf, platform, language, encoding);
 582:       }
 583:     }
 584: 
 585: 
 586:     /**
 587:      * Processes a CMAP Type 0 table whose platform, encoding and
 588:      * language are already known.
 589:      *
 590:      * @param buf the buffer to read the table from, positioned
 591:      *        right after the language tag.
 592:      */
 593:     private void readSingleTable(ByteBuffer buf,
 594:                                  int platform, int language,
 595:                                  int encoding)
 596:     {
 597:       String upper = getUpper129(platform, encoding, language);
 598:       if (upper == null)
 599:         return;
 600: 
 601:       /* Skip the MacOS codepoints [0 .. 31] because they do not
 602:        * correspond to any Unicode codepoint.
 603:        */
 604:       buf.position(buf.position() + 32);
 605: 
 606:       /* Irrespective of script and language, the MacOS codepoints
 607:        * [32 .. 126] correspond to the same Unicode codepoint.
 608:        */
 609:       for (int i = 32; i < 126; i++)
 610:         glyphToUCS2[buf.get() & 0xff] = (char) i;
 611: 
 612:       for (int i = 127; i < 256; i++)
 613:         glyphToUCS2[buf.get() & 0xff] = upper.charAt(i - 127);
 614: 
 615:       /* Glyph 0 is always the undefined character, which has
 616:        * no codepoint in Unicode.
 617:        */
 618:       glyphToUCS2[0] = 0;
 619:     }
 620: 
 621: 
 622:     /**
 623:      * Determines the glyph index for a given Unicode codepoint.
 624:      *
 625:      * @param ucs4 the Unicode codepoint in UCS-4 encoding.
 626:      *
 627:      * @return the glyph index, or 0 if the font does not contain
 628:      * a glyph for this codepoint.
 629:      */
 630:     public int getGlyph(int ucs4)
 631:     {
 632:       /* This linear search is not exactly super fast. However,
 633:        * only really ancient fonts have only a type 0 cmap,
 634:        * so it should not hurt in very many cases. If it shows
 635:        * to be a performance problem, one could do a binary search
 636:        * on a 256-entry table sorted by Unicode codepoint. The
 637:        * matching index of that table could then be used to look
 638:        * up the glyph ID at that position.
 639:        */
 640:       for (int i = 0; i < 256; i++)
 641:         if (glyphToUCS2[i] == ucs4)
 642:           return i;
 643:       return 0;
 644:     }
 645: 
 646: 
 647:     /**
 648:      * Returns a String whose <code>charAt(i)</code> is the Unicode
 649:      * character that corresponds to the codepoint <code>i +
 650:      * 127</code> in the encoding specified by the platform, script
 651:      * and language tag of a Type 0 CMAP.
 652:      *
 653:      * @param language the language tag in the cmap subtable.  For the
 654:      * Macintosh platform, this is 0 to indicate language-neutral
 655:      * encoding, or the MacOS language code <i>plus one.</i> The
 656:      * Apple documentation does not mention that one needs to be
 657:      * added, but the Adobe OpenType specification does.
 658:      *
 659:      * @return a String for mapping the top 129 characters to
 660:      * UCS-2. If <code>platform</code> is not <code>1</code>
 661:      * (indicating Macintosh), or if the combination of
 662:      * <code>script</code> and <code>language</code> is not
 663:      * recognized, <code>null</code> will be returned.
 664:      */
 665:     private static String getUpper129(int platform, int script, int language)
 666:     {
 667:       if (platform != PLATFORM_MACINTOSH)
 668:         return null;
 669: 
 670:       switch (script)
 671:       {
 672:       case 0: /* smRoman */
 673:         if (language == /* langIcelandic+1 */ 16)
 674:           return UPPER_ICELANDIC;
 675:         else if (language == /* langTurkish+1 */ 18)
 676:           return UPPER_TURKISH;
 677:         else if (language == /* langCroatian+1 */ 19)
 678:           return UPPER_CROATIAN;
 679:         else if (language == /* langRomanian+1 */ 38)
 680:           return UPPER_ROMANIAN;
 681:         else if (language == /* language-neutral */ 0)
 682:           return UPPER_ROMAN;
 683:         else
 684:           return null;
 685: 
 686:       case 4: /* smArabic */
 687:         if (language == /* langFarsi+1 */ 32)
 688:           return UPPER_FARSI;
 689:         else
 690:           return UPPER_ARABIC;
 691: 
 692:       case 5: /* smHebrew */
 693:         return UPPER_HEBREW;
 694: 
 695:       case 6: /* smGreek */
 696:         return UPPER_GREEK;
 697: 
 698:       case 7: /* smCyrillic */
 699:         return UPPER_CYRILLIC;
 700: 
 701:       case 29: /* smSlavic == smEastEurRoman */
 702:         return UPPER_EAST_EUROPEAN_ROMAN;
 703:       }
 704: 
 705:       return null;
 706:     }
 707:   }
 708: 
 709: 
 710:   /**
 711:    * A mapping from Unicode code points to glyph IDs through CMAP Type
 712:    * 4 tables. These tables are able to map two-byte encoded text
 713:    * to glyph IDs, such as Unicode Basic Multilingual Plane which
 714:    * contains U+0000 .. U+FFFE without surrogates.
 715:    *
 716:    * @author Sascha Brawer (brawer@dandelis.ch)
 717:    */
 718:   private static final class Type4
 719:     extends CharGlyphMap
 720:   {
 721:     /**
 722:      * Determines whether this implementation supports a combination
 723:      * of platform, language and encoding is supported for a type 4
 724:      * <code>cmap</code> table.
 725:      *
 726:      * <p>Currently, we support the following combinations:
 727:      *
 728:      * <ul><li>the Unicode platform in encodings 0, 1, 2, 3 and
 729:      * 4;</li>
 730:      *
 731:      * <li>the Microsoft platform in encodings 1 (Basic Multilingual
 732:      * Plane) and 10 (full Unicode).</li></ul>
 733:      *
 734:      * <p>Most recent Macintosh fonts provide a type 4
 735:      * <code>cmap</code> for Unicode. Microsoft recommends providing a
 736:      * type 4 <code>cmap</code> for encoding 1 of the Microsoft
 737:      * platform. The implementation of GNU Classpath supports both
 738:      * variants.
 739:      *
 740:      * <p>Not supported are ShiftJIS, Big5, Wansung, Johab, and other
 741:      * non-Unicode encodings. Text can easily be converted to Unicode
 742:      * using the java.nio.charset package.
 743:      */
 744:     static boolean isSupported(int platform, int language, int encoding)
 745:     {
 746:       switch (platform)
 747:       {
 748:       case PLATFORM_UNICODE:
 749:         return (encoding >= 0) && (encoding <= 4);
 750: 
 751:       case PLATFORM_MICROSOFT:
 752:         return (encoding == /* Basic Multilingual Plane */ 1)
 753:           || (encoding == /* Full Unicode */ 10);
 754:       }
 755: 
 756:       return false;
 757:     }
 758: 
 759: 
 760:     /**
 761:      * Processes a CMAP Type 4 table whose platform, encoding and
 762:      * language are already known. We understand the Unicode platform
 763:      * with encodings 0, 1, 2, 3 and 4, and the Microsoft platform
 764:      * with encodings 1 (Unicode BMP) and 10 (UCS-4).
 765:      *
 766:      * @param buf the buffer to read the table from, positioned at
 767:      * its beginning.
 768:      *
 769:      * @return a Type4 table, or <code>null</code> if the combination
 770:      * of platform and encoding is not understood.
 771:      */
 772:     static Type4 readTable(ByteBuffer buf,
 773:                            int platform, int encoding)
 774:     {
 775:       int tableStart = buf.position();
 776:       char format = buf.getChar();
 777:       int length = buf.getChar();
 778:       int language = buf.getChar();
 779: 
 780:       if ((format != 4) || !isSupported(platform, language, encoding))
 781:         throw new IllegalArgumentException();
 782: 
 783:       buf.limit(tableStart + length);
 784: 
 785:       int segCountX2 = buf.getChar();
 786:       int segCount = segCountX2 / 2;
 787:       int searchRange = buf.getChar();
 788:       int entrySelector = buf.getChar();
 789:       int rangeShift = buf.getChar();
 790: 
 791:       CharBuffer endCode, startCode, idRangeOffset_glyphID;
 792:       ShortBuffer idDelta;
 793: 
 794:       int pos = buf.position();
 795:       endCode = buf.asCharBuffer();
 796:       pos += segCountX2 + /* reservedPad */ 2;
 797: 
 798:       buf.position(pos);
 799:       startCode = buf.asCharBuffer();
 800:       pos += segCountX2;
 801: 
 802:       buf.position(pos);
 803:       idDelta = buf.asShortBuffer();
 804:       pos += segCountX2;
 805: 
 806:       buf.position(pos);
 807:       idRangeOffset_glyphID = buf.asCharBuffer();
 808: 
 809:       endCode.limit(segCount);
 810:       startCode.limit(segCount);
 811:       idDelta.limit(segCount);
 812:       idRangeOffset_glyphID.limit((buf.limit() - pos) / 2);
 813: 
 814:       return new Type4(segCount,
 815:                        endCode, startCode, idDelta,
 816:                        idRangeOffset_glyphID);
 817:     }
 818: 
 819: 
 820:     private CharBuffer lastChar;
 821:     private CharBuffer firstChar;
 822:     private ShortBuffer idDelta;
 823:     private CharBuffer rangeID;
 824:     private int numSegments;
 825: 
 826:     private Type4(int numSegments,
 827:                   CharBuffer lastChar, CharBuffer firstChar,
 828:                   ShortBuffer idDelta, CharBuffer rangeID)
 829:     {
 830:       this.numSegments = numSegments;
 831:       this.lastChar = lastChar;
 832:       this.firstChar = firstChar;
 833:       this.idDelta = idDelta;
 834:       this.rangeID = rangeID;
 835:     }
 836: 
 837: 
 838:     /**
 839:      * Determines the glyph index for a given Unicode codepoint.
 840:      *
 841:      * @param ucs4 the Unicode codepoint in UCS-4 encoding.
 842:      *
 843:      * @return the glyph index, or 0 if the font does not contain
 844:      * a glyph for this codepoint.
 845:      */
 846:     public int getGlyph(int ucs4)
 847:     {
 848:       char c, segStart;
 849:       int segment, idRangeOffset;
 850: 
 851:       if (ucs4 > 0xffff)
 852:         return 0;
 853: 
 854:       c = (char) ucs4;
 855:       segment = find(c);
 856:       segStart = firstChar.get(segment);
 857:       if ((c < segStart) || (c > lastChar.get(segment)))
 858:         return 0;
 859: 
 860:       /*
 861:        *      System.out.println("seg " + segment
 862:        *                 + ", range=" + (int) rangeID[segment]
 863:        *                 + ", delta=" + delta[segment]);
 864:        */
 865: 
 866:       idRangeOffset = rangeID.get(segment);
 867:       if (idRangeOffset == 0)
 868:         return (int) (char) (((int) c) + idDelta.get(segment));
 869:       int result = rangeID.get((idRangeOffset >> 1)
 870:                                + (c - segStart) + segment);
 871:       if (result == 0)
 872:         return 0;
 873:       return (int) (char) (result + idDelta.get(segment));
 874:     }
 875: 
 876: 
 877:     private int find(char c)
 878:     {
 879:       int min, max, mid;
 880: 
 881:       min = 0;
 882:       max = numSegments - 1;
 883:       mid = max >> 1;
 884: 
 885:       while (min < max)
 886:       {
 887:         // System.out.println("(" + min + "," + max + ") " + mid);
 888:         char val = lastChar.get(mid);
 889:         if (val == c)
 890:           break;
 891:         else if (val < c)
 892:           min = mid + 1;
 893:         else if (val > c)
 894:           max = mid;
 895:         mid = (min + max) >> 1;
 896:       }
 897: 
 898:       return mid;
 899:     }
 900:   }
 901: 
 902: 
 903:   /**
 904:    * A mapping from Unicode code points to glyph IDs through CMAP Type
 905:    * 12 tables. These tables are able to map four-byte encoded text
 906:    * to glyph IDs, such as Unicode UCS-4.
 907:    *
 908:    * @author Sascha Brawer (brawer@dandelis.ch)
 909:    */
 910:   private static final class Type12
 911:     extends CharGlyphMap
 912:   {
 913:     int numGroups;
 914:     IntBuffer data;
 915: 
 916: 
 917:     /**
 918:      * Determines whether this implementation supports a combination
 919:      * of platform and encoding for a type 12 <code>cmap</code> table.
 920:      *
 921:      * <p>Currently, we support the following combinations:
 922:      *
 923:      * <ul><li>the Unicode platform in encodings 0, 1, 2, 3 and
 924:      * 4;</li>
 925:      *
 926:      * <li>the Microsoft platform in encodings 1 (Basic Multilingual
 927:      * Plane) and 10 (full Unicode).</li></ul>
 928:      */
 929:     static boolean isSupported(int platform, int encoding)
 930:     {
 931:       switch (platform)
 932:       {
 933:       case PLATFORM_UNICODE:
 934:         return (encoding >= 0) && (encoding <= 4);
 935: 
 936:       case PLATFORM_MICROSOFT:
 937:         return (encoding == /* Basic Multilingual Plane */ 1)
 938:           || (encoding == /* Full Unicode */ 10);
 939:       }
 940: 
 941:       return false;
 942:     }
 943: 
 944: 
 945:     /**
 946:      * Constructs a <code>cmap</code> type 12 table whose platform and
 947:      * encoding are already known. We understand the Unicode platform
 948:      * with encodings 0, 1, 2, 3 and 4, and the Microsoft platform
 949:      * with encodings 1 (Unicode BMP) and 10 (UCS-4).
 950:      *
 951:      * @param buf the buffer to read the table from, positioned at
 952:      * its beginning.
 953:      */
 954:     Type12(ByteBuffer buf, int platform, int encoding)
 955:     {
 956:       int tableStart = buf.position();
 957:       int format = buf.getChar();
 958:       if ((format != 12) || !isSupported(platform, encoding))
 959:         throw new IllegalStateException();
 960: 
 961:       buf.getChar(); // skip reserved field
 962:       buf.limit(tableStart + buf.getInt());
 963:       int language = buf.getInt();
 964:       numGroups = buf.getInt();
 965:       data = buf.asIntBuffer();
 966:     }
 967: 
 968: 
 969:     /**
 970:      * Determines the glyph index for a given Unicode codepoint.  Users
 971:      * should be aware that the character-to-glyph mapping not not
 972:      * everything that is needed for full Unicode support.  For example,
 973:      * the <code>cmap</code> table is not able to synthesize accented
 974:      * glyphs from the canonical decomposition sequence, even if the
 975:      * font would contain a glyph for the composed form.
 976:      *
 977:      * @param ucs4 the Unicode codepoint in UCS-4 encoding. Surrogates
 978:      * (U+D800 to U+DFFF) cannot be passed, they must be mapped to
 979:      * UCS-4 first.
 980:      *
 981:      * @return the glyph index, or 0 if the font does not contain
 982:      * a glyph for this codepoint.
 983:      */
 984:     public int getGlyph(int ucs4)
 985:     {
 986:       int min, max, mid, startCharCode, endCharCode;
 987: 
 988:       min = 0;
 989:       max = numGroups - 1;
 990:       mid = max >> 1;
 991:       do
 992:       {
 993:         startCharCode = data.get(3 * mid);
 994:         endCharCode = data.get(3 * mid + 1);
 995: 
 996: 
 997:         /*
 998:         System.out.println("group " + mid + " (U+"
 999:         + Integer.toHexString(startCharCode)
1000:         + " .. U+" + Integer.toHexString(endCharCode)
1001:         + "): glyph " + (int) data.get(mid*3+2));
1002:         */
1003: 
1004:         if ((startCharCode <= ucs4)  && (ucs4 <= endCharCode))
1005:           return ucs4
1006:             - startCharCode
1007:             + /* startGlyphID */ data.get(mid * 3 + 2);
1008: 
1009:         if (endCharCode < ucs4)
1010:           min = mid + 1;
1011:         else
1012:           max = mid;
1013:         mid = (min + max) >> 1;
1014:       }
1015:       while (min < max);
1016: 
1017:       startCharCode = data.get(3 * mid);
1018:       endCharCode = data.get(3 * mid + 1);
1019:       if ((startCharCode <= ucs4)  && (ucs4 <= endCharCode))
1020:         return ucs4
1021:           - startCharCode
1022:           + /* startGlyphID */ data.get(mid * 3 + 2);
1023: 
1024:       return 0;
1025:     }
1026:   }
1027: }