001: /*
002: * Fast Infoset ver. 0.1 software ("Software")
003: *
004: * Copyright, 2004-2005 Sun Microsystems, Inc. All Rights Reserved.
005: *
006: * Software is licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License. You may
008: * obtain a copy of the License at:
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
014: * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
015: * License for the specific language governing permissions and limitations.
016: *
017: * Sun supports and benefits from the global community of open source
018: * developers, and thanks the community for its important contributions and
019: * open standards-based technology, which Sun has adopted into many of its
020: * products.
021: *
022: * Please note that portions of Software may be provided with notices and
023: * open source licenses from such communities and third parties that govern the
024: * use of those portions, and any licenses granted hereunder do not alter any
025: * rights and obligations you may have under such open source licenses,
026: * however, the disclaimer of warranty and limitation of liability provisions
027: * in this License will apply to all Software in this distribution.
028: *
029: * You acknowledge that the Software is not designed, licensed or intended
030: * for use in the design, construction, operation or maintenance of any nuclear
031: * facility.
032: *
033: * Apache License
034: * Version 2.0, January 2004
035: * http://www.apache.org/licenses/
036: *
037: */
038:
039: package com.sun.xml.fastinfoset;
040:
041: public class DecoderStateTables {
042: private static int RANGE_INDEX_END = 0;
043: private static int RANGE_INDEX_VALUE = 1;
044:
045: public final static int STATE_ILLEGAL = 255;
046: public final static int STATE_UNSUPPORTED = 254;
047:
048: // EII child states
049: public final static int EII_NO_AIIS_INDEX_SMALL = 0;
050: public final static int EII_AIIS_INDEX_SMALL = 1;
051: public final static int EII_INDEX_MEDIUM = 2;
052: public final static int EII_INDEX_LARGE = 3;
053: public final static int EII_NAMESPACES = 4;
054: public final static int EII_LITERAL = 5;
055: public final static int CII_UTF8_SMALL_LENGTH = 6;
056: public final static int CII_UTF8_MEDIUM_LENGTH = 7;
057: public final static int CII_UTF8_LARGE_LENGTH = 8;
058: public final static int CII_UTF16_SMALL_LENGTH = 9;
059: public final static int CII_UTF16_MEDIUM_LENGTH = 10;
060: public final static int CII_UTF16_LARGE_LENGTH = 11;
061: public final static int CII_RA = 12;
062: public final static int CII_EA = 13;
063: public final static int CII_INDEX_SMALL = 14;
064: public final static int CII_INDEX_MEDIUM = 15;
065: public final static int CII_INDEX_LARGE = 16;
066: public final static int CII_INDEX_LARGE_LARGE = 17;
067: public final static int COMMENT_II = 18;
068: public final static int PROCESSING_INSTRUCTION_II = 19;
069: public final static int DOCUMENT_TYPE_DECLARATION_II = 20;
070: public final static int UNEXPANDED_ENTITY_REFERENCE_II = 21;
071: public final static int TERMINATOR_SINGLE = 22;
072: public final static int TERMINATOR_DOUBLE = 23;
073:
074: public static final int[] DII = new int[256];
075:
076: private static int[][] DII_RANGES = {
077: // EII
078:
079: // %00000000 to %00011111 EII no attributes small index
080: { 0x1F, EII_NO_AIIS_INDEX_SMALL },
081:
082: // %00100000 to %00100111 EII medium index
083: { 0x27, EII_INDEX_MEDIUM },
084:
085: // %00101000 to %00101111 EII large index
086: // %00110000 EII very large index
087: // %00101000 to %00110000
088: { 0x30, EII_INDEX_LARGE },
089:
090: // %00110001 to %00110111 ILLEGAL
091: { 0x37, STATE_ILLEGAL },
092:
093: // %00111000 EII namespaces
094: { 0x38, EII_NAMESPACES },
095:
096: // %00111001 to %00111011 ILLEGAL
097: { 0x3B, STATE_ILLEGAL },
098:
099: // %00111100 EII literal (no prefix, no namespace)
100: { 0x3C, EII_LITERAL },
101:
102: // %00111101 EII literal (no prefix, namespace)
103: { 0x3D, EII_LITERAL },
104:
105: // %00111110 ILLEGAL
106: { 0x3E, STATE_ILLEGAL },
107:
108: // %00111111 EII literal (prefix, namespace)
109: { 0x3F, EII_LITERAL },
110:
111: // %01000000 to %01011111 EII attributes small index
112: { 0x5F, EII_AIIS_INDEX_SMALL },
113:
114: // %01100000 to %01100111 EII medium index
115: { 0x67, EII_INDEX_MEDIUM },
116:
117: // %01101000 to %01101111 EII large index
118: // %01110000 EII very large index
119: // %01101000 to %01110000
120: { 0x70, EII_INDEX_LARGE },
121:
122: // %01110001 to %01110111 ILLEGAL
123: { 0x77, STATE_ILLEGAL },
124:
125: // %01111000 EII attributes namespaces
126: { 0x78, EII_NAMESPACES },
127:
128: // %01111001 to %01111011 ILLEGAL
129: { 0x7B, STATE_ILLEGAL },
130:
131: // %01111100 EII attributes literal (no prefix, no namespace)
132: { 0x7C, EII_LITERAL },
133:
134: // %01111101 EII attributes literal (no prefix, namespace)
135: { 0x7D, EII_LITERAL },
136:
137: // %01111110 ILLEGAL
138: { 0x7E, STATE_ILLEGAL },
139:
140: // %01111111 EII attributes literal (prefix, namespace)
141: { 0x7F, EII_LITERAL },
142:
143: // %10000000 to %11000011
144: { 0xC3, STATE_ILLEGAL },
145:
146: // %11000100 to %11000111
147: { 0xC7, DOCUMENT_TYPE_DECLARATION_II },
148:
149: // %11001000 to %1110000
150: { 0xE0, STATE_ILLEGAL },
151:
152: // %11100001 processing instruction
153: { 0xE1, PROCESSING_INSTRUCTION_II },
154:
155: // %11100010 comment
156: { 0xE2, COMMENT_II },
157:
158: // %111000011 to %11101111
159: { 0xEF, STATE_ILLEGAL },
160:
161: // Terminators
162:
163: // %11110000 single terminator
164: { 0xF0, TERMINATOR_SINGLE },
165:
166: // %11110000 to %11111110 ILLEGAL
167: { 0xFE, STATE_ILLEGAL },
168:
169: // %11111111 double terminator
170: { 0xFF, TERMINATOR_DOUBLE } };
171:
172: public static final int[] EII = new int[256];
173:
174: private static int[][] EII_RANGES = {
175: // EII
176:
177: // %00000000 to %00011111 EII no attributes small index
178: { 0x1F, EII_NO_AIIS_INDEX_SMALL },
179:
180: // %00100000 to %00100111 EII medium index
181: { 0x27, EII_INDEX_MEDIUM },
182:
183: // %00101000 to %00101111 EII large index
184: // %00110000 EII very large index
185: // %00101000 to %00110000
186: { 0x30, EII_INDEX_LARGE },
187:
188: // %00110001 to %00110111 ILLEGAL
189: { 0x37, STATE_ILLEGAL },
190:
191: // %00111000 EII namespaces
192: { 0x38, EII_NAMESPACES },
193:
194: // %00111001 to %00111011 ILLEGAL
195: { 0x3B, STATE_ILLEGAL },
196:
197: // %00111100 EII literal (no prefix, no namespace)
198: { 0x3C, EII_LITERAL },
199:
200: // %00111101 EII literal (no prefix, namespace)
201: { 0x3D, EII_LITERAL },
202:
203: // %00111110 ILLEGAL
204: { 0x3E, STATE_ILLEGAL },
205:
206: // %00111111 EII literal (prefix, namespace)
207: { 0x3F, EII_LITERAL },
208:
209: // %01000000 to %01011111 EII attributes small index
210: { 0x5F, EII_AIIS_INDEX_SMALL },
211:
212: // %01100000 to %01100111 EII medium index
213: { 0x67, EII_INDEX_MEDIUM },
214:
215: // %01101000 to %01101111 EII large index
216: // %01110000 EII very large index
217: // %01101000 to %01110000
218: { 0x70, EII_INDEX_LARGE },
219:
220: // %01110001 to %01110111 ILLEGAL
221: { 0x77, STATE_ILLEGAL },
222:
223: // %01111000 EII attributes namespaces
224: { 0x78, EII_NAMESPACES },
225:
226: // %01111001 to %01111011 ILLEGAL
227: { 0x7B, STATE_ILLEGAL },
228:
229: // %01111100 EII attributes literal (no prefix, no namespace)
230: { 0x7C, EII_LITERAL },
231:
232: // %01111101 EII attributes literal (no prefix, namespace)
233: { 0x7D, EII_LITERAL },
234:
235: // %01111110 ILLEGAL
236: { 0x7E, STATE_ILLEGAL },
237:
238: // %01111111 EII attributes literal (prefix, namespace)
239: { 0x7F, EII_LITERAL },
240:
241: // CII
242:
243: // UTF-8 string
244:
245: // %10000000 to %10000001 CII UTF-8 no add to table small length
246: { 0x81, CII_UTF8_SMALL_LENGTH },
247:
248: // %10000010 CII UTF-8 no add to table medium length
249: { 0x82, CII_UTF8_MEDIUM_LENGTH },
250:
251: // %10000011 CII UTF-8 no add to table large length
252: { 0x83, CII_UTF8_LARGE_LENGTH },
253:
254: // UTF-16 string
255:
256: // %10000100 to %10000101 CII UTF-16 no add to table small length
257: { 0x85, CII_UTF16_SMALL_LENGTH },
258:
259: // %10000110 CII UTF-16 no add to table medium length
260: { 0x86, CII_UTF16_MEDIUM_LENGTH },
261:
262: // %10000111 CII UTF-16 no add to table large length
263: { 0x87, CII_UTF16_LARGE_LENGTH },
264:
265: // Resitricted alphabet
266:
267: // %10001000 to %10001011 CII RA no add to table
268: { 0x8B, CII_RA },
269:
270: // Encoding algorithm
271:
272: // %10001100 to %10001111 CII EA no add to table
273: { 0x8F, CII_EA },
274:
275: // UTF-8 string, add to table
276:
277: // %10010000 to %10010001 CII add to table small length
278: { 0x91, CII_UTF8_SMALL_LENGTH },
279:
280: // %10010010 CII add to table medium length
281: { 0x92, CII_UTF8_MEDIUM_LENGTH },
282:
283: // %10010011 CII add to table large length
284: { 0x93, CII_UTF8_LARGE_LENGTH },
285:
286: // UTF-16 string, add to table
287:
288: // %10010100 to %10010101 CII UTF-16 add to table small length
289: { 0x95, CII_UTF16_SMALL_LENGTH },
290:
291: // %10010110 CII UTF-16 add to table medium length
292: { 0x96, CII_UTF16_MEDIUM_LENGTH },
293:
294: // %10010111 CII UTF-16 add to table large length
295: { 0x97, CII_UTF16_LARGE_LENGTH },
296:
297: // Restricted alphabet, add to table
298:
299: // %10011000 to %10011011 CII RA add to table
300: { 0x9B, CII_RA },
301:
302: // Encoding algorithm, add to table
303:
304: // %10011100 to %10011111 CII EA add to table
305: { 0x9F, CII_EA },
306:
307: // Index
308:
309: // %10100000 to %10101111 CII small index
310: { 0xAF, CII_INDEX_SMALL },
311:
312: // %10110000 to %10110011 CII medium index
313: { 0xB3, CII_INDEX_MEDIUM },
314:
315: // %10110100 to %10110111 CII large index
316: { 0xB7, CII_INDEX_LARGE },
317:
318: // %10111000 CII very large index
319: { 0xB8, CII_INDEX_LARGE_LARGE },
320:
321: // %10111001 to %11000111 ILLEGAL
322: { 0xC7, STATE_ILLEGAL },
323:
324: // %11001000 to %11001011
325: { 0xCB, UNEXPANDED_ENTITY_REFERENCE_II },
326:
327: // %11001100 to %11100000 ILLEGAL
328: { 0xE0, STATE_ILLEGAL },
329:
330: // %11100001 processing instruction
331: { 0xE1, PROCESSING_INSTRUCTION_II },
332:
333: // %11100010 comment
334: { 0xE2, COMMENT_II },
335:
336: // %111000011 to %11101111
337: { 0xEF, STATE_ILLEGAL },
338:
339: // Terminators
340:
341: // %11110000 single terminator
342: { 0xF0, TERMINATOR_SINGLE },
343:
344: // %11110000 to %11111110 ILLEGAL
345: { 0xFE, STATE_ILLEGAL },
346:
347: // %11111111 double terminator
348: { 0xFF, TERMINATOR_DOUBLE } };
349:
350: // AII states
351: public final static int AII_INDEX_SMALL = 0;
352: public final static int AII_INDEX_MEDIUM = 1;
353: public final static int AII_INDEX_LARGE = 2;
354: public final static int AII_LITERAL = 3;
355: public final static int AII_TERMINATOR_SINGLE = 4;
356: public final static int AII_TERMINATOR_DOUBLE = 5;
357:
358: public static final int[] AII = new int[256];
359:
360: private static int[][] AII_RANGES = {
361: // %00000000 to %00111111 AII small index
362: { 0x3F, AII_INDEX_SMALL },
363:
364: // %01000000 to %01011111 AII medium index
365: { 0x5F, AII_INDEX_MEDIUM },
366:
367: // %01100000 to %01101111 AII large index
368: { 0x6F, AII_INDEX_LARGE },
369:
370: // %01110000 to %01110111 ILLEGAL
371: { 0x77, STATE_ILLEGAL },
372:
373: // %01111000 AII literal (no prefix, no namespace)
374: // %01111001 AII literal (no prefix, namespace)
375: { 0x79, AII_LITERAL },
376:
377: // %01111010 ILLEGAL
378: { 0x7A, STATE_ILLEGAL },
379:
380: // %01111011 AII literal (prefix, namespace)
381: { 0x7B, AII_LITERAL },
382:
383: // %10000000 to %11101111 ILLEGAL
384: { 0xEF, STATE_ILLEGAL },
385:
386: // Terminators
387:
388: // %11110000 single terminator
389: { 0xF0, AII_TERMINATOR_SINGLE },
390:
391: // %11110000 to %11111110 ILLEGAL
392: { 0xFE, STATE_ILLEGAL },
393:
394: // %11111111 double terminator
395: { 0xFF, AII_TERMINATOR_DOUBLE } };
396:
397: // AII value states
398: public final static int NISTRING_UTF8_SMALL_LENGTH = 0;
399: public final static int NISTRING_UTF8_MEDIUM_LENGTH = 1;
400: public final static int NISTRING_UTF8_LARGE_LENGTH = 2;
401: public final static int NISTRING_UTF16_SMALL_LENGTH = 3;
402: public final static int NISTRING_UTF16_MEDIUM_LENGTH = 4;
403: public final static int NISTRING_UTF16_LARGE_LENGTH = 5;
404: public final static int NISTRING_RA = 6;
405: public final static int NISTRING_EA = 7;
406: public final static int NISTRING_INDEX_SMALL = 8;
407: public final static int NISTRING_INDEX_MEDIUM = 9;
408: public final static int NISTRING_INDEX_LARGE = 10;
409: public final static int NISTRING_EMPTY = 11;
410:
411: public static final int[] NISTRING = new int[256];
412:
413: private static int[][] NISTRING_RANGES = {
414: // UTF-8 string
415:
416: // %00000000 to %00000111 UTF-8 no add to table small length
417: { 0x07, NISTRING_UTF8_SMALL_LENGTH },
418:
419: // %00001000 UTF-8 no add to table medium length
420: { 0x08, NISTRING_UTF8_MEDIUM_LENGTH },
421:
422: // %00001001 to %00001011 ILLEGAL
423: { 0x0B, STATE_ILLEGAL },
424:
425: // %00001100 UTF-8 no add to table large length
426: { 0x0C, NISTRING_UTF8_LARGE_LENGTH },
427:
428: // %00001101 to %00001111 ILLEGAL
429: { 0x0F, STATE_ILLEGAL },
430:
431: // UTF-16 string
432:
433: // %00010000 to %00010111 UTF-16 no add to table small length
434: { 0x17, NISTRING_UTF16_SMALL_LENGTH },
435:
436: // %00001000 UTF-16 no add to table medium length
437: { 0x18, NISTRING_UTF16_MEDIUM_LENGTH },
438:
439: // %00011001 to %00011011 ILLEGAL
440: { 0x1B, STATE_ILLEGAL },
441:
442: // %00011100 UTF-16 no add to table large length
443: { 0x1C, NISTRING_UTF16_LARGE_LENGTH },
444:
445: // %00011101 to %00011111 ILLEGAL
446: { 0x1F, STATE_ILLEGAL },
447:
448: // Restricted alphabet
449:
450: // %00100000 to %00101111 RA no add to table small length
451: { 0x2F, NISTRING_RA },
452:
453: // Encoding algorithm
454:
455: // %00110000 to %00111111 EA no add to table
456: { 0x3F, NISTRING_EA },
457:
458: // UTF-8 string, add to table
459:
460: // %01000000 to %01000111 UTF-8 add to table small length
461: { 0x47, NISTRING_UTF8_SMALL_LENGTH },
462:
463: // %01001000 UTF-8 add to table medium length
464: { 0x48, NISTRING_UTF8_MEDIUM_LENGTH },
465:
466: // %01001001 to %01001011 ILLEGAL
467: { 0x4B, STATE_ILLEGAL },
468:
469: // %01001100 UTF-8 add to table large length
470: { 0x4C, NISTRING_UTF8_LARGE_LENGTH },
471:
472: // %01001101 to %01001111 ILLEGAL
473: { 0x4F, STATE_ILLEGAL },
474:
475: // UTF-16 string, add to table
476:
477: // %01010000 to %01010111 UTF-16 add to table small length
478: { 0x57, NISTRING_UTF16_SMALL_LENGTH },
479:
480: // %01001000 UTF-16 add to table medium length
481: { 0x58, NISTRING_UTF16_MEDIUM_LENGTH },
482:
483: // %01011001 to %01011011 ILLEGAL
484: { 0x5B, STATE_ILLEGAL },
485:
486: // %01011100 UTF-16 add to table large length
487: { 0x5C, NISTRING_UTF16_LARGE_LENGTH },
488:
489: // %01011101 to %01011111 ILLEGAL
490: { 0x5F, STATE_ILLEGAL },
491:
492: // Restricted alphabet, add to table
493:
494: // %01100000 to %01101111 RA no add to table small length
495: { 0x6F, NISTRING_RA },
496:
497: // Encoding algorithm, add to table
498:
499: // %01110000 to %01111111 EA add to table
500: { 0x7F, NISTRING_EA },
501:
502: // Index
503:
504: // %10000000 to %10111111 index small
505: { 0xBF, NISTRING_INDEX_SMALL },
506:
507: // %11000000 to %11011111 index medium
508: { 0xDF, NISTRING_INDEX_MEDIUM },
509:
510: // %11100000 to %11101111 index large
511: { 0xEF, NISTRING_INDEX_LARGE },
512:
513: // %11110000 to %11111110 ILLEGAL
514: { 0xFE, STATE_ILLEGAL },
515:
516: // %11111111 Empty value
517: { 0xFF, NISTRING_EMPTY }, };
518:
519: /* package */final static int ISTRING_SMALL_LENGTH = 0;
520: /* package */final static int ISTRING_MEDIUM_LENGTH = 1;
521: /* package */final static int ISTRING_LARGE_LENGTH = 2;
522: /* package */final static int ISTRING_INDEX_SMALL = 3;
523: /* package */final static int ISTRING_INDEX_MEDIUM = 4;
524: /* package */final static int ISTRING_INDEX_LARGE = 5;
525:
526: /* package */static final int[] ISTRING = new int[256];
527:
528: private static int[][] ISTRING_RANGES = {
529: // %00000000 to %00111111 small length
530: { 0x3F, ISTRING_SMALL_LENGTH },
531:
532: // %01000000 medium length
533: { 0x40, ISTRING_MEDIUM_LENGTH },
534:
535: // %01000001 to %01011111 ILLEGAL
536: { 0x5F, STATE_ILLEGAL },
537:
538: // %01100000 large length
539: { 0x60, ISTRING_LARGE_LENGTH },
540:
541: // %01100001 to %01111111 ILLEGAL
542: { 0x7F, STATE_ILLEGAL },
543:
544: // %10000000 to %10111111 index small
545: { 0xBF, ISTRING_INDEX_SMALL },
546:
547: // %11000000 to %11011111 index medium
548: { 0xDF, ISTRING_INDEX_MEDIUM },
549:
550: // %11100000 to %11101111 index large
551: { 0xEF, ISTRING_INDEX_LARGE },
552:
553: // %11110000 to %11111111 ILLEGAL
554: { 0xFF, STATE_ILLEGAL }, };
555:
556: /* package */final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3 = 6;
557: /* package */final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5 = 7;
558: /* package */final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29 = 8;
559: /* package */final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36 = 9;
560: /* package */final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10;
561:
562: /* package */static final int[] ISTRING_PREFIX_NAMESPACE = new int[256];
563:
564: private static int[][] ISTRING_PREFIX_NAMESPACE_RANGES = {
565: // %00000000 to %00000001 small length
566: { 0x01, ISTRING_SMALL_LENGTH },
567:
568: // %00000010 small length
569: { 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 },
570:
571: // %00000011 small length
572: { 0x03, ISTRING_SMALL_LENGTH },
573:
574: // %00000100 small length
575: { 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 },
576:
577: // %00011011 small length
578: { 0x1B, ISTRING_SMALL_LENGTH },
579:
580: // %00011100 small length
581: { 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 },
582:
583: // %00100010 small length
584: { 0x22, ISTRING_SMALL_LENGTH },
585:
586: // %00100011 small length
587: { 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 },
588:
589: // %00000101 to %00111111 small length
590: { 0x3F, ISTRING_SMALL_LENGTH },
591:
592: // %01000000 medium length
593: { 0x40, ISTRING_MEDIUM_LENGTH },
594:
595: // %01000001 to %01011111 ILLEGAL
596: { 0x5F, STATE_ILLEGAL },
597:
598: // %01100000 large length
599: { 0x60, ISTRING_LARGE_LENGTH },
600:
601: // %01100001 to %01111111 ILLEGAL
602: { 0x7F, STATE_ILLEGAL },
603:
604: // %10000000 index small, 0
605: { 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO },
606:
607: // %10000000 to %10111111 index small
608: { 0xBF, ISTRING_INDEX_SMALL },
609:
610: // %11000000 to %11011111 index medium
611: { 0xDF, ISTRING_INDEX_MEDIUM },
612:
613: // %11100000 to %11101111 index large
614: { 0xEF, ISTRING_INDEX_LARGE },
615:
616: // %11110000 to %11111111 ILLEGAL
617: { 0xFF, STATE_ILLEGAL }, };
618:
619: // UTF-8 states
620: /* package */final static int UTF8_NCNAME_NCNAME = 0;
621: /* package */final static int UTF8_NCNAME_NCNAME_CHAR = 1;
622: /* package */final static int UTF8_TWO_BYTES = 2;
623: /* package */final static int UTF8_THREE_BYTES = 3;
624: /* package */final static int UTF8_FOUR_BYTES = 4;
625:
626: /* package */static final int[] UTF8_NCNAME = new int[256];
627:
628: private static int[][] UTF8_NCNAME_RANGES = {
629:
630: // Basic Latin
631:
632: // %00000000 to %00101100
633: { 0x2C, STATE_ILLEGAL },
634:
635: // '-' '.'
636: // %%00101101 to %00101110 [#x002D-#x002E]
637: { 0x2E, UTF8_NCNAME_NCNAME_CHAR },
638:
639: // %00101111
640: { 0x2F, STATE_ILLEGAL },
641:
642: // [0-9]
643: // %0011000 to %00111001 [#x0030-#x0039]
644: { 0x39, UTF8_NCNAME_NCNAME_CHAR },
645:
646: // %01000000
647: { 0x40, STATE_ILLEGAL },
648:
649: // [A-Z]
650: // %01000001 to %01011010 [#x0041-#x005A]
651: { 0x5A, UTF8_NCNAME_NCNAME },
652:
653: // %01011110
654: { 0x5E, STATE_ILLEGAL },
655:
656: // '_'
657: // %01011111 [#x005F]
658: { 0x5F, UTF8_NCNAME_NCNAME },
659:
660: // %01100000
661: { 0x60, STATE_ILLEGAL },
662:
663: // [a-z]
664: // %01100001 to %01111010 [#x0061-#x007A]
665: { 0x7A, UTF8_NCNAME_NCNAME },
666:
667: // %01111011 to %01111111
668: { 0x7F, STATE_ILLEGAL },
669:
670: // Two bytes
671:
672: // %10000000 to %11000001
673: { 0xC1, STATE_ILLEGAL },
674:
675: // %11000010 to %11011111
676: { 0xDF, UTF8_TWO_BYTES },
677:
678: // Three bytes
679:
680: // %11100000 to %11101111
681: { 0xEF, UTF8_THREE_BYTES },
682:
683: // Four bytes
684:
685: // %11110000 to %11110111
686: { 0xF7, UTF8_FOUR_BYTES },
687:
688: // %11111000 to %11111111
689: { 0xFF, STATE_ILLEGAL } };
690:
691: /* package */final static int UTF8_ONE_BYTE = 1;
692:
693: /* package */static final int[] UTF8 = new int[256];
694:
695: private static int[][] UTF8_RANGES = {
696:
697: // Basic Latin
698:
699: // %00000000 to %00001000
700: { 0x08, STATE_ILLEGAL },
701:
702: // CHARACTER TABULATION, LINE FEED
703: // %%00001001 to %00001010 [#x0009-#x000A]
704: { 0x0A, UTF8_ONE_BYTE },
705:
706: // %00001011 to %00001100
707: { 0x0C, STATE_ILLEGAL },
708:
709: // CARRIAGE RETURN
710: // %00001101 [#x000D]
711: { 0x0D, UTF8_ONE_BYTE },
712:
713: // %00001110 to %00011111
714: { 0x1F, STATE_ILLEGAL },
715:
716: // %0010000 to %01111111
717: { 0x7F, UTF8_ONE_BYTE },
718:
719: // Two bytes
720:
721: // %10000000 to %11000001
722: { 0xC1, STATE_ILLEGAL },
723:
724: // %11000010 to %11011111
725: { 0xDF, UTF8_TWO_BYTES },
726:
727: // Three bytes
728:
729: // %11100000 to %11101111
730: { 0xEF, UTF8_THREE_BYTES },
731:
732: // Four bytes
733:
734: // %11110000 to %11110111
735: { 0xF7, UTF8_FOUR_BYTES },
736:
737: // %11111000 to %11111111
738: { 0xFF, STATE_ILLEGAL } };
739:
740: private static void constructTable(int[] table, int[][] ranges) {
741: int start = 0x00;
742: for (int range = 0; range < ranges.length; range++) {
743: int end = ranges[range][RANGE_INDEX_END];
744: int value = ranges[range][RANGE_INDEX_VALUE];
745: for (int i = start; i <= end; i++) {
746: table[i] = value;
747: }
748: start = end + 1;
749: }
750: }
751:
752: static {
753: // EII
754: constructTable(DII, DII_RANGES);
755:
756: // EII
757: constructTable(EII, EII_RANGES);
758:
759: // AII
760: constructTable(AII, AII_RANGES);
761:
762: // AII Value
763: constructTable(NISTRING, NISTRING_RANGES);
764:
765: // Identifying string
766: constructTable(ISTRING, ISTRING_RANGES);
767:
768: // Identifying string
769: constructTable(ISTRING_PREFIX_NAMESPACE,
770: ISTRING_PREFIX_NAMESPACE_RANGES);
771:
772: // UTF-8 NCNAME states
773: constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES);
774:
775: // UTF-8 states
776: constructTable(UTF8, UTF8_RANGES);
777: }
778:
779: private DecoderStateTables() {
780: }
781: }
|