001: /*
002: * Copyright 2003 by Paulo Soares.
003: *
004: * The contents of this file are subject to the Mozilla Public License Version 1.1
005: * (the "License"); you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at http://www.mozilla.org/MPL/
007: *
008: * Software distributed under the License is distributed on an "AS IS" basis,
009: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
010: * for the specific language governing rights and limitations under the License.
011: *
012: * The Original Code is 'iText, a free JAVA-PDF library'.
013: *
014: * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
015: * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
016: * All Rights Reserved.
017: * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
018: * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
019: *
020: * Contributor(s): all the names of the contributors are added in the source code
021: * where applicable.
022: *
023: * Alternatively, the contents of this file may be used under the terms of the
024: * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
025: * provisions of LGPL are applicable instead of those above. If you wish to
026: * allow use of your version of this file only under the terms of the LGPL
027: * License and not to allow others to use your version of this file under
028: * the MPL, indicate your decision by deleting the provisions above and
029: * replace them with the notice and other provisions required by the LGPL.
030: * If you do not delete the provisions above, a recipient may use your version
031: * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
032: *
033: * This library is free software; you can redistribute it and/or modify it
034: * under the terms of the MPL as stated above or under the terms of the GNU
035: * Library General Public License as published by the Free Software Foundation;
036: * either version 2 of the License, or any later version.
037: *
038: * This library is distributed in the hope that it will be useful, but WITHOUT
039: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
040: * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
041: * details.
042: *
043: * If you didn't download this code from the following link, you should check if
044: * you aren't using an obsolete version:
045: * http://www.lowagie.com/iText/
046: */
047: package com.lowagie.text.pdf;
048:
049: /** Shape arabic characters. This code was converted from a C version
050: * at www.pango.org.
051: *
052: * @author Paulo Soares (psoares@consiste.pt)
053: */
054: public class ArabicLigaturizer {
055:
056: static boolean isVowel(char s) {
057: return ((s >= 0x064B) && (s <= 0x0655)) || (s == 0x0670);
058: }
059:
060: static char charshape(char s, int which)
061: /* which 0=isolated 1=final 2=initial 3=medial */
062: {
063: int l, r, m;
064: if ((s >= 0x0621) && (s <= 0x06D3)) {
065: l = 0;
066: r = chartable.length - 1;
067: while (l <= r) {
068: m = (l + r) / 2;
069: if (s == chartable[m][0]) {
070: return chartable[m][which + 1];
071: } else if (s < chartable[m][0]) {
072: r = m - 1;
073: } else {
074: l = m + 1;
075: }
076: }
077: } else if (s >= 0xfef5 && s <= 0xfefb)
078: return (char) (s + which);
079: return s;
080: }
081:
082: static int shapecount(char s) {
083: int l, r, m;
084: if ((s >= 0x0621) && (s <= 0x06D3) && !isVowel(s)) {
085: l = 0;
086: r = chartable.length - 1;
087: while (l <= r) {
088: m = (l + r) / 2;
089: if (s == chartable[m][0]) {
090: return chartable[m].length - 1;
091: } else if (s < chartable[m][0]) {
092: r = m - 1;
093: } else {
094: l = m + 1;
095: }
096: }
097: } else if (s == ZWJ) {
098: return 4;
099: }
100: return 1;
101: }
102:
103: static int ligature(char newchar, charstruct oldchar) {
104: /* 0 == no ligature possible; 1 == vowel; 2 == two chars; 3 == Lam+Alef */
105: int retval = 0;
106:
107: if (oldchar.basechar == 0)
108: return 0;
109: if (isVowel(newchar)) {
110: retval = 1;
111: if ((oldchar.vowel != 0) && (newchar != SHADDA)) {
112: retval = 2; /* we eliminate the old vowel .. */
113: }
114: switch (newchar) {
115: case SHADDA:
116: if (oldchar.mark1 == 0) {
117: oldchar.mark1 = SHADDA;
118: } else {
119: return 0; /* no ligature possible */
120: }
121: break;
122: case HAMZABELOW:
123: switch (oldchar.basechar) {
124: case ALEF:
125: oldchar.basechar = ALEFHAMZABELOW;
126: retval = 2;
127: break;
128: case LAM_ALEF:
129: oldchar.basechar = LAM_ALEFHAMZABELOW;
130: retval = 2;
131: break;
132: default:
133: oldchar.mark1 = HAMZABELOW;
134: break;
135: }
136: break;
137: case HAMZAABOVE:
138: switch (oldchar.basechar) {
139: case ALEF:
140: oldchar.basechar = ALEFHAMZA;
141: retval = 2;
142: break;
143: case LAM_ALEF:
144: oldchar.basechar = LAM_ALEFHAMZA;
145: retval = 2;
146: break;
147: case WAW:
148: oldchar.basechar = WAWHAMZA;
149: retval = 2;
150: break;
151: case YEH:
152: case ALEFMAKSURA:
153: case FARSIYEH:
154: oldchar.basechar = YEHHAMZA;
155: retval = 2;
156: break;
157: default: /* whatever sense this may make .. */
158: oldchar.mark1 = HAMZAABOVE;
159: break;
160: }
161: break;
162: case MADDA:
163: switch (oldchar.basechar) {
164: case ALEF:
165: oldchar.basechar = ALEFMADDA;
166: retval = 2;
167: break;
168: }
169: break;
170: default:
171: oldchar.vowel = newchar;
172: break;
173: }
174: if (retval == 1) {
175: oldchar.lignum++;
176: }
177: return retval;
178: }
179: if (oldchar.vowel != 0) { /* if we already joined a vowel, we can't join a Hamza */
180: return 0;
181: }
182:
183: switch (oldchar.basechar) {
184: case LAM:
185: switch (newchar) {
186: case ALEF:
187: oldchar.basechar = LAM_ALEF;
188: oldchar.numshapes = 2;
189: retval = 3;
190: break;
191: case ALEFHAMZA:
192: oldchar.basechar = LAM_ALEFHAMZA;
193: oldchar.numshapes = 2;
194: retval = 3;
195: break;
196: case ALEFHAMZABELOW:
197: oldchar.basechar = LAM_ALEFHAMZABELOW;
198: oldchar.numshapes = 2;
199: retval = 3;
200: break;
201: case ALEFMADDA:
202: oldchar.basechar = LAM_ALEFMADDA;
203: oldchar.numshapes = 2;
204: retval = 3;
205: break;
206: }
207: break;
208: case 0:
209: oldchar.basechar = newchar;
210: oldchar.numshapes = shapecount(newchar);
211: retval = 1;
212: break;
213: }
214: return retval;
215: }
216:
217: static void copycstostring(StringBuffer string, charstruct s,
218: int level) {
219: /* s is a shaped charstruct; i is the index into the string */
220: if (s.basechar == 0)
221: return;
222:
223: string.append(s.basechar);
224: s.lignum--;
225: if (s.mark1 != 0) {
226: if ((level & ar_novowel) == 0) {
227: string.append(s.mark1);
228: s.lignum--;
229: } else {
230: s.lignum--;
231: }
232: }
233: if (s.vowel != 0) {
234: if ((level & ar_novowel) == 0) {
235: string.append(s.vowel);
236: s.lignum--;
237: } else { /* vowel elimination */
238: s.lignum--;
239: }
240: }
241: // while (s.lignum > 0) { /* NULL-insertion for Langbox-font */
242: // string[i] = 0;
243: // i++;
244: // (s.lignum)--;
245: // }
246: // return i;
247: }
248:
249: // return len
250: static void doublelig(StringBuffer string, int level)
251: /* Ok. We have presentation ligatures in our font. */
252: {
253: int len;
254: int olen = len = string.length();
255: int j = 0, si = 1;
256: char lapresult;
257:
258: while (si < olen) {
259: lapresult = 0;
260: if ((level & ar_composedtashkeel) != 0) {
261: switch (string.charAt(j)) {
262: case SHADDA:
263: switch (string.charAt(si)) {
264: case KASRA:
265: lapresult = 0xFC62;
266: break;
267: case FATHA:
268: lapresult = 0xFC60;
269: break;
270: case DAMMA:
271: lapresult = 0xFC61;
272: break;
273: case 0x064C:
274: lapresult = 0xFC5E;
275: break;
276: case 0x064D:
277: lapresult = 0xFC5F;
278: break;
279: }
280: break;
281: case KASRA:
282: if (string.charAt(si) == SHADDA)
283: lapresult = 0xFC62;
284: break;
285: case FATHA:
286: if (string.charAt(si) == SHADDA)
287: lapresult = 0xFC60;
288: break;
289: case DAMMA:
290: if (string.charAt(si) == SHADDA)
291: lapresult = 0xFC61;
292: break;
293: }
294: }
295:
296: if ((level & ar_lig) != 0) {
297: switch (string.charAt(j)) {
298: case 0xFEDF: /* LAM initial */
299: switch (string.charAt(si)) {
300: case 0xFE9E:
301: lapresult = 0xFC3F;
302: break; /* JEEM final */
303: case 0xFEA0:
304: lapresult = 0xFCC9;
305: break; /* JEEM medial */
306: case 0xFEA2:
307: lapresult = 0xFC40;
308: break; /* HAH final */
309: case 0xFEA4:
310: lapresult = 0xFCCA;
311: break; /* HAH medial */
312: case 0xFEA6:
313: lapresult = 0xFC41;
314: break; /* KHAH final */
315: case 0xFEA8:
316: lapresult = 0xFCCB;
317: break; /* KHAH medial */
318: case 0xFEE2:
319: lapresult = 0xFC42;
320: break; /* MEEM final */
321: case 0xFEE4:
322: lapresult = 0xFCCC;
323: break; /* MEEM medial */
324: }
325: break;
326: case 0xFE97: /* TEH inital */
327: switch (string.charAt(si)) {
328: case 0xFEA0:
329: lapresult = 0xFCA1;
330: break; /* JEEM medial */
331: case 0xFEA4:
332: lapresult = 0xFCA2;
333: break; /* HAH medial */
334: case 0xFEA8:
335: lapresult = 0xFCA3;
336: break; /* KHAH medial */
337: }
338: break;
339: case 0xFE91: /* BEH inital */
340: switch (string.charAt(si)) {
341: case 0xFEA0:
342: lapresult = 0xFC9C;
343: break; /* JEEM medial */
344: case 0xFEA4:
345: lapresult = 0xFC9D;
346: break; /* HAH medial */
347: case 0xFEA8:
348: lapresult = 0xFC9E;
349: break; /* KHAH medial */
350: }
351: break;
352: case 0xFEE7: /* NOON inital */
353: switch (string.charAt(si)) {
354: case 0xFEA0:
355: lapresult = 0xFCD2;
356: break; /* JEEM initial */
357: case 0xFEA4:
358: lapresult = 0xFCD3;
359: break; /* HAH medial */
360: case 0xFEA8:
361: lapresult = 0xFCD4;
362: break; /* KHAH medial */
363: }
364: break;
365:
366: case 0xFEE8: /* NOON medial */
367: switch (string.charAt(si)) {
368: case 0xFEAE:
369: lapresult = 0xFC8A;
370: break; /* REH final */
371: case 0xFEB0:
372: lapresult = 0xFC8B;
373: break; /* ZAIN final */
374: }
375: break;
376: case 0xFEE3: /* MEEM initial */
377: switch (string.charAt(si)) {
378: case 0xFEA0:
379: lapresult = 0xFCCE;
380: break; /* JEEM medial */
381: case 0xFEA4:
382: lapresult = 0xFCCF;
383: break; /* HAH medial */
384: case 0xFEA8:
385: lapresult = 0xFCD0;
386: break; /* KHAH medial */
387: case 0xFEE4:
388: lapresult = 0xFCD1;
389: break; /* MEEM medial */
390: }
391: break;
392:
393: case 0xFED3: /* FEH initial */
394: switch (string.charAt(si)) {
395: case 0xFEF2:
396: lapresult = 0xFC32;
397: break; /* YEH final */
398: }
399: break;
400:
401: default:
402: break;
403: } /* end switch string[si] */
404: }
405: if (lapresult != 0) {
406: string.setCharAt(j, lapresult);
407: len--;
408: si++; /* jump over one character */
409: /* we'll have to change this, too. */
410: } else {
411: j++;
412: string.setCharAt(j, string.charAt(si));
413: si++;
414: }
415: }
416: string.setLength(len);
417: }
418:
419: static boolean connects_to_left(charstruct a) {
420: return a.numshapes > 2;
421: }
422:
423: static void shape(char text[], StringBuffer string, int level) {
424: /* string is assumed to be empty and big enough.
425: * text is the original text.
426: * This routine does the basic arabic reshaping.
427: * *len the number of non-null characters.
428: *
429: * Note: We have to unshape each character first!
430: */
431: int join;
432: int which;
433: char nextletter;
434:
435: int p = 0; /* initialize for output */
436: charstruct oldchar = new charstruct();
437: charstruct curchar = new charstruct();
438: while (p < text.length) {
439: nextletter = text[p++];
440: //nextletter = unshape (nextletter);
441:
442: join = ligature(nextletter, curchar);
443: if (join == 0) { /* shape curchar */
444: int nc = shapecount(nextletter);
445: //(*len)++;
446: if (nc == 1) {
447: which = 0; /* final or isolated */
448: } else {
449: which = 2; /* medial or initial */
450: }
451: if (connects_to_left(oldchar)) {
452: which++;
453: }
454:
455: which = which % (curchar.numshapes);
456: curchar.basechar = charshape(curchar.basechar, which);
457:
458: /* get rid of oldchar */
459: copycstostring(string, oldchar, level);
460: oldchar = curchar; /* new values in oldchar */
461:
462: /* init new curchar */
463: curchar = new charstruct();
464: curchar.basechar = nextletter;
465: curchar.numshapes = nc;
466: curchar.lignum++;
467: // (*len) += unligature (&curchar, level);
468: } else if (join == 1) {
469: }
470: // else
471: // {
472: // (*len) += unligature (&curchar, level);
473: // }
474: // p = g_utf8_next_char (p);
475: }
476:
477: /* Handle last char */
478: if (connects_to_left(oldchar))
479: which = 1;
480: else
481: which = 0;
482: which = which % (curchar.numshapes);
483: curchar.basechar = charshape(curchar.basechar, which);
484:
485: /* get rid of oldchar */
486: copycstostring(string, oldchar, level);
487: copycstostring(string, curchar, level);
488: }
489:
490: static int arabic_shape(char src[], int srcoffset, int srclength,
491: char dest[], int destoffset, int destlength, int level) {
492: char str[] = new char[srclength];
493: for (int k = srclength + srcoffset - 1; k >= srcoffset; --k)
494: str[k - srcoffset] = src[k];
495: StringBuffer string = new StringBuffer(srclength);
496: shape(str, string, level);
497: if ((level & (ar_composedtashkeel | ar_lig)) != 0)
498: doublelig(string, level);
499: // string.reverse();
500: System.arraycopy(string.toString().toCharArray(), 0, dest,
501: destoffset, string.length());
502: return string.length();
503: }
504:
505: static void processNumbers(char text[], int offset, int length,
506: int options) {
507: int limit = offset + length;
508: if ((options & DIGITS_MASK) != 0) {
509: char digitBase = '\u0030'; // European digits
510: switch (options & DIGIT_TYPE_MASK) {
511: case DIGIT_TYPE_AN:
512: digitBase = '\u0660'; // Arabic-Indic digits
513: break;
514:
515: case DIGIT_TYPE_AN_EXTENDED:
516: digitBase = '\u06f0'; // Eastern Arabic-Indic digits (Persian and Urdu)
517: break;
518:
519: default:
520: break;
521: }
522:
523: switch (options & DIGITS_MASK) {
524: case DIGITS_EN2AN: {
525: int digitDelta = digitBase - '\u0030';
526: for (int i = offset; i < limit; ++i) {
527: char ch = text[i];
528: if (ch <= '\u0039' && ch >= '\u0030') {
529: text[i] += digitDelta;
530: }
531: }
532: }
533: break;
534:
535: case DIGITS_AN2EN: {
536: char digitTop = (char) (digitBase + 9);
537: int digitDelta = '\u0030' - digitBase;
538: for (int i = offset; i < limit; ++i) {
539: char ch = text[i];
540: if (ch <= digitTop && ch >= digitBase) {
541: text[i] += digitDelta;
542: }
543: }
544: }
545: break;
546:
547: case DIGITS_EN2AN_INIT_LR:
548: shapeToArabicDigitsWithContext(text, 0, length,
549: digitBase, false);
550: break;
551:
552: case DIGITS_EN2AN_INIT_AL:
553: shapeToArabicDigitsWithContext(text, 0, length,
554: digitBase, true);
555: break;
556:
557: default:
558: break;
559: }
560: }
561: }
562:
563: static void shapeToArabicDigitsWithContext(char[] dest, int start,
564: int length, char digitBase, boolean lastStrongWasAL) {
565: digitBase -= '0'; // move common adjustment out of loop
566:
567: int limit = start + length;
568: for (int i = start; i < limit; ++i) {
569: char ch = dest[i];
570: switch (BidiOrder.getDirection(ch)) {
571: case BidiOrder.L:
572: case BidiOrder.R:
573: lastStrongWasAL = false;
574: break;
575: case BidiOrder.AL:
576: lastStrongWasAL = true;
577: break;
578: case BidiOrder.EN:
579: if (lastStrongWasAL && ch <= '\u0039') {
580: dest[i] = (char) (ch + digitBase);
581: }
582: break;
583: default:
584: break;
585: }
586: }
587: }
588:
589: private static final char ALEF = 0x0627;
590: private static final char ALEFHAMZA = 0x0623;
591: private static final char ALEFHAMZABELOW = 0x0625;
592: private static final char ALEFMADDA = 0x0622;
593: private static final char LAM = 0x0644;
594: private static final char HAMZA = 0x0621;
595: private static final char TATWEEL = 0x0640;
596: private static final char ZWJ = 0x200D;
597:
598: private static final char HAMZAABOVE = 0x0654;
599: private static final char HAMZABELOW = 0x0655;
600:
601: private static final char WAWHAMZA = 0x0624;
602: private static final char YEHHAMZA = 0x0626;
603: private static final char WAW = 0x0648;
604: private static final char ALEFMAKSURA = 0x0649;
605: private static final char YEH = 0x064A;
606: private static final char FARSIYEH = 0x06CC;
607:
608: private static final char SHADDA = 0x0651;
609: private static final char KASRA = 0x0650;
610: private static final char FATHA = 0x064E;
611: private static final char DAMMA = 0x064F;
612: private static final char MADDA = 0x0653;
613:
614: private static final char LAM_ALEF = 0xFEFB;
615: private static final char LAM_ALEFHAMZA = 0xFEF7;
616: private static final char LAM_ALEFHAMZABELOW = 0xFEF9;
617: private static final char LAM_ALEFMADDA = 0xFEF5;
618:
619: private static final char chartable[][] = { { 0x0621, 0xFE80 }, /* HAMZA */
620: { 0x0622, 0xFE81, 0xFE82 }, /* ALEF WITH MADDA ABOVE */
621: { 0x0623, 0xFE83, 0xFE84 }, /* ALEF WITH HAMZA ABOVE */
622: { 0x0624, 0xFE85, 0xFE86 }, /* WAW WITH HAMZA ABOVE */
623: { 0x0625, 0xFE87, 0xFE88 }, /* ALEF WITH HAMZA BELOW */
624: { 0x0626, 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C }, /* YEH WITH HAMZA ABOVE */
625: { 0x0627, 0xFE8D, 0xFE8E }, /* ALEF */
626: { 0x0628, 0xFE8F, 0xFE90, 0xFE91, 0xFE92 }, /* BEH */
627: { 0x0629, 0xFE93, 0xFE94 }, /* TEH MARBUTA */
628: { 0x062A, 0xFE95, 0xFE96, 0xFE97, 0xFE98 }, /* TEH */
629: { 0x062B, 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C }, /* THEH */
630: { 0x062C, 0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0 }, /* JEEM */
631: { 0x062D, 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4 }, /* HAH */
632: { 0x062E, 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8 }, /* KHAH */
633: { 0x062F, 0xFEA9, 0xFEAA }, /* DAL */
634: { 0x0630, 0xFEAB, 0xFEAC }, /* THAL */
635: { 0x0631, 0xFEAD, 0xFEAE }, /* REH */
636: { 0x0632, 0xFEAF, 0xFEB0 }, /* ZAIN */
637: { 0x0633, 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4 }, /* SEEN */
638: { 0x0634, 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8 }, /* SHEEN */
639: { 0x0635, 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC }, /* SAD */
640: { 0x0636, 0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0 }, /* DAD */
641: { 0x0637, 0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4 }, /* TAH */
642: { 0x0638, 0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8 }, /* ZAH */
643: { 0x0639, 0xFEC9, 0xFECA, 0xFECB, 0xFECC }, /* AIN */
644: { 0x063A, 0xFECD, 0xFECE, 0xFECF, 0xFED0 }, /* GHAIN */
645: { 0x0640, 0x0640, 0x0640, 0x0640, 0x0640 }, /* TATWEEL */
646: { 0x0641, 0xFED1, 0xFED2, 0xFED3, 0xFED4 }, /* FEH */
647: { 0x0642, 0xFED5, 0xFED6, 0xFED7, 0xFED8 }, /* QAF */
648: { 0x0643, 0xFED9, 0xFEDA, 0xFEDB, 0xFEDC }, /* KAF */
649: { 0x0644, 0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0 }, /* LAM */
650: { 0x0645, 0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4 }, /* MEEM */
651: { 0x0646, 0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8 }, /* NOON */
652: { 0x0647, 0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC }, /* HEH */
653: { 0x0648, 0xFEED, 0xFEEE }, /* WAW */
654: { 0x0649, 0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9 }, /* ALEF MAKSURA */
655: { 0x064A, 0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4 }, /* YEH */
656: { 0x0671, 0xFB50, 0xFB51 }, /* ALEF WASLA */
657: { 0x0679, 0xFB66, 0xFB67, 0xFB68, 0xFB69 }, /* TTEH */
658: { 0x067A, 0xFB5E, 0xFB5F, 0xFB60, 0xFB61 }, /* TTEHEH */
659: { 0x067B, 0xFB52, 0xFB53, 0xFB54, 0xFB55 }, /* BEEH */
660: { 0x067E, 0xFB56, 0xFB57, 0xFB58, 0xFB59 }, /* PEH */
661: { 0x067F, 0xFB62, 0xFB63, 0xFB64, 0xFB65 }, /* TEHEH */
662: { 0x0680, 0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D }, /* BEHEH */
663: { 0x0683, 0xFB76, 0xFB77, 0xFB78, 0xFB79 }, /* NYEH */
664: { 0x0684, 0xFB72, 0xFB73, 0xFB74, 0xFB75 }, /* DYEH */
665: { 0x0686, 0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D }, /* TCHEH */
666: { 0x0687, 0xFB7E, 0xFB7F, 0xFB80, 0xFB81 }, /* TCHEHEH */
667: { 0x0688, 0xFB88, 0xFB89 }, /* DDAL */
668: { 0x068C, 0xFB84, 0xFB85 }, /* DAHAL */
669: { 0x068D, 0xFB82, 0xFB83 }, /* DDAHAL */
670: { 0x068E, 0xFB86, 0xFB87 }, /* DUL */
671: { 0x0691, 0xFB8C, 0xFB8D }, /* RREH */
672: { 0x0698, 0xFB8A, 0xFB8B }, /* JEH */
673: { 0x06A4, 0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D }, /* VEH */
674: { 0x06A6, 0xFB6E, 0xFB6F, 0xFB70, 0xFB71 }, /* PEHEH */
675: { 0x06A9, 0xFB8E, 0xFB8F, 0xFB90, 0xFB91 }, /* KEHEH */
676: { 0x06AD, 0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6 }, /* NG */
677: { 0x06AF, 0xFB92, 0xFB93, 0xFB94, 0xFB95 }, /* GAF */
678: { 0x06B1, 0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D }, /* NGOEH */
679: { 0x06B3, 0xFB96, 0xFB97, 0xFB98, 0xFB99 }, /* GUEH */
680: { 0x06BA, 0xFB9E, 0xFB9F }, /* NOON GHUNNA */
681: { 0x06BB, 0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3 }, /* RNOON */
682: { 0x06BE, 0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD }, /* HEH DOACHASHMEE */
683: { 0x06C0, 0xFBA4, 0xFBA5 }, /* HEH WITH YEH ABOVE */
684: { 0x06C1, 0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9 }, /* HEH GOAL */
685: { 0x06C5, 0xFBE0, 0xFBE1 }, /* KIRGHIZ OE */
686: { 0x06C6, 0xFBD9, 0xFBDA }, /* OE */
687: { 0x06C7, 0xFBD7, 0xFBD8 }, /* U */
688: { 0x06C8, 0xFBDB, 0xFBDC }, /* YU */
689: { 0x06C9, 0xFBE2, 0xFBE3 }, /* KIRGHIZ YU */
690: { 0x06CB, 0xFBDE, 0xFBDF }, /* VE */
691: { 0x06CC, 0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF }, /* FARSI YEH */
692: { 0x06D0, 0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7 }, /* E */
693: { 0x06D2, 0xFBAE, 0xFBAF }, /* YEH BARREE */
694: { 0x06D3, 0xFBB0, 0xFBB1 } /* YEH BARREE WITH HAMZA ABOVE */
695: };
696:
697: public static final int ar_nothing = 0x0;
698: public static final int ar_novowel = 0x1;
699: public static final int ar_composedtashkeel = 0x4;
700: public static final int ar_lig = 0x8;
701: /**
702: * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits.
703: */
704: public static final int DIGITS_EN2AN = 0x20;
705:
706: /**
707: * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039).
708: */
709: public static final int DIGITS_AN2EN = 0x40;
710:
711: /**
712: * Digit shaping option:
713: * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
714: * if the most recent strongly directional character
715: * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
716: * The initial state at the start of the text is assumed to be not an Arabic,
717: * letter, so European digits at the start of the text will not change.
718: * Compare to DIGITS_ALEN2AN_INIT_AL.
719: */
720: public static final int DIGITS_EN2AN_INIT_LR = 0x60;
721:
722: /**
723: * Digit shaping option:
724: * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
725: * if the most recent strongly directional character
726: * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
727: * The initial state at the start of the text is assumed to be an Arabic,
728: * letter, so European digits at the start of the text will change.
729: * Compare to DIGITS_ALEN2AN_INT_LR.
730: */
731: public static final int DIGITS_EN2AN_INIT_AL = 0x80;
732:
733: /** Not a valid option value. */
734: private static final int DIGITS_RESERVED = 0xa0;
735:
736: /**
737: * Bit mask for digit shaping options.
738: */
739: public static final int DIGITS_MASK = 0xe0;
740:
741: /**
742: * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
743: */
744: public static final int DIGIT_TYPE_AN = 0;
745:
746: /**
747: * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
748: */
749: public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;
750:
751: /**
752: * Bit mask for digit type options.
753: */
754: public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00?
755:
756: static class charstruct {
757: char basechar;
758: char mark1; /* has to be initialized to zero */
759: char vowel;
760: int lignum; /* is a ligature with lignum aditional characters */
761: int numshapes = 1;
762: };
763:
764: }
|