Source Code Cross Referenced for TurtleParser.java in  » RSS-RDF » sesame » org » openrdf » rio » turtle » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » RSS RDF » sesame » org.openrdf.rio.turtle 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
0003:         *
0004:         * Licensed under the Aduna BSD-style license.
0005:         */
0006:        package org.openrdf.rio.turtle;
0007:
0008:        import java.io.IOException;
0009:        import java.io.InputStream;
0010:        import java.io.InputStreamReader;
0011:        import java.io.LineNumberReader;
0012:        import java.io.PushbackReader;
0013:        import java.io.Reader;
0014:        import java.io.UnsupportedEncodingException;
0015:
0016:        import info.aduna.text.ASCIIUtil;
0017:
0018:        import org.openrdf.model.BNode;
0019:        import org.openrdf.model.Literal;
0020:        import org.openrdf.model.Resource;
0021:        import org.openrdf.model.Statement;
0022:        import org.openrdf.model.URI;
0023:        import org.openrdf.model.Value;
0024:        import org.openrdf.model.ValueFactory;
0025:        import org.openrdf.model.impl.ValueFactoryImpl;
0026:        import org.openrdf.model.vocabulary.RDF;
0027:        import org.openrdf.model.vocabulary.XMLSchema;
0028:        import org.openrdf.rio.RDFFormat;
0029:        import org.openrdf.rio.RDFHandlerException;
0030:        import org.openrdf.rio.RDFParseException;
0031:        import org.openrdf.rio.helpers.RDFParserBase;
0032:
0033:        /**
0034:         * RDF parser for <a href="http://www.dajobe.org/2004/01/turtle/">Turtle</a>
0035:         * files. This parser is not thread-safe, therefore its public methods are
0036:         * synchronized.
0037:         * <p>
0038:         * This implementation is based on the 2006/01/02 version of the Turtle
0039:         * specification, with slight deviations:
0040:         * <ul>
0041:         * <li>Normalization of integer, floating point and boolean values is dependent
0042:         * on the specified datatype handling. According to the specification, integers
0043:         * and booleans should be normalized, but floats don't.</li>
0044:         * <li>Comments can be used anywhere in the document, and extend to the end of
0045:         * the line. The Turtle grammar doesn't allow comments to be used inside triple
0046:         * constructs that extend over multiple lines, but the author's own parser
0047:         * deviates from this too.</li>
0048:         * </ul>
0049:         * 
0050:         * @author Arjohn Kampman
0051:         */
0052:        public class TurtleParser extends RDFParserBase {
0053:
0054:            /*-----------*
0055:             * Variables *
0056:             *-----------*/
0057:
0058:            private LineNumberReader lineReader;
0059:
0060:            private PushbackReader reader;
0061:
0062:            private Resource subject;
0063:
0064:            private URI predicate;
0065:
0066:            private Value object;
0067:
0068:            /*--------------*
0069:             * Constructors *
0070:             *--------------*/
0071:
0072:            /**
0073:             * Creates a new TurtleParser that will use a {@link ValueFactoryImpl} to
0074:             * create RDF model objects.
0075:             */
0076:            public TurtleParser() {
0077:                super ();
0078:            }
0079:
0080:            /**
0081:             * Creates a new TurtleParser that will use the supplied ValueFactory to
0082:             * create RDF model objects.
0083:             * 
0084:             * @param valueFactory
0085:             *        A ValueFactory.
0086:             */
0087:            public TurtleParser(ValueFactory valueFactory) {
0088:                super (valueFactory);
0089:            }
0090:
0091:            /*---------*
0092:             * Methods *
0093:             *---------*/
0094:
0095:            public RDFFormat getRDFFormat() {
0096:                return RDFFormat.TURTLE;
0097:            }
0098:
0099:            /**
0100:             * Implementation of the <tt>parse(InputStream, String)</tt> method defined
0101:             * in the RDFParser interface.
0102:             * 
0103:             * @param in
0104:             *        The InputStream from which to read the data, must not be
0105:             *        <tt>null</tt>. The InputStream is supposed to contain UTF-8
0106:             *        encoded Unicode characters, as per the Turtle specification.
0107:             * @param baseURI
0108:             *        The URI associated with the data in the InputStream, must not be
0109:             *        <tt>null</tt>.
0110:             * @throws IOException
0111:             *         If an I/O error occurred while data was read from the InputStream.
0112:             * @throws RDFParseException
0113:             *         If the parser has found an unrecoverable parse error.
0114:             * @throws RDFHandlerException
0115:             *         If the configured statement handler encountered an unrecoverable
0116:             *         error.
0117:             * @throws IllegalArgumentException
0118:             *         If the supplied input stream or base URI is <tt>null</tt>.
0119:             */
0120:            public synchronized void parse(InputStream in, String baseURI)
0121:                    throws IOException, RDFParseException, RDFHandlerException {
0122:                if (in == null) {
0123:                    throw new IllegalArgumentException(
0124:                            "Input stream must not be 'null'");
0125:                }
0126:                // Note: baseURI will be checked in parse(Reader, String)
0127:
0128:                try {
0129:                    parse(new InputStreamReader(in, "UTF-8"), baseURI);
0130:                } catch (UnsupportedEncodingException e) {
0131:                    // Every platform should support the UTF-8 encoding...
0132:                    throw new RuntimeException(e);
0133:                }
0134:            }
0135:
0136:            /**
0137:             * Implementation of the <tt>parse(Reader, String)</tt> method defined in
0138:             * the RDFParser interface.
0139:             * 
0140:             * @param reader
0141:             *        The Reader from which to read the data, must not be <tt>null</tt>.
0142:             * @param baseURI
0143:             *        The URI associated with the data in the Reader, must not be
0144:             *        <tt>null</tt>.
0145:             * @throws IOException
0146:             *         If an I/O error occurred while data was read from the InputStream.
0147:             * @throws RDFParseException
0148:             *         If the parser has found an unrecoverable parse error.
0149:             * @throws RDFHandlerException
0150:             *         If the configured statement handler encountered an unrecoverable
0151:             *         error.
0152:             * @throws IllegalArgumentException
0153:             *         If the supplied reader or base URI is <tt>null</tt>.
0154:             */
0155:            public synchronized void parse(Reader reader, String baseURI)
0156:                    throws IOException, RDFParseException, RDFHandlerException {
0157:                if (reader == null) {
0158:                    throw new IllegalArgumentException(
0159:                            "Reader must not be 'null'");
0160:                }
0161:                if (baseURI == null) {
0162:                    throw new IllegalArgumentException(
0163:                            "base URI must not be 'null'");
0164:                }
0165:
0166:                rdfHandler.startRDF();
0167:
0168:                lineReader = new LineNumberReader(reader);
0169:                // Start counting lines at 1:
0170:                lineReader.setLineNumber(1);
0171:
0172:                // Allow at most 2 characters to be pushed back:
0173:                this .reader = new PushbackReader(lineReader, 2);
0174:
0175:                // Store normalized base URI
0176:                setBaseURI(baseURI);
0177:
0178:                reportLocation();
0179:
0180:                try {
0181:                    int c = skipWSC();
0182:
0183:                    while (c != -1) {
0184:                        parseStatement();
0185:                        c = skipWSC();
0186:                    }
0187:                } finally {
0188:                    clear();
0189:                }
0190:
0191:                rdfHandler.endRDF();
0192:            }
0193:
0194:            protected void parseStatement() throws IOException,
0195:                    RDFParseException, RDFHandlerException {
0196:                int c = peek();
0197:
0198:                if (c == '@') {
0199:                    parseDirective();
0200:                    skipWSC();
0201:                    verifyCharacter(read(), ".");
0202:                } else {
0203:                    parseTriples();
0204:                    skipWSC();
0205:                    verifyCharacter(read(), ".");
0206:                }
0207:            }
0208:
0209:            protected void parseDirective() throws IOException,
0210:                    RDFParseException, RDFHandlerException {
0211:                // Verify that the first characters form the string "prefix"
0212:                verifyCharacter(read(), "@");
0213:
0214:                StringBuilder sb = new StringBuilder(8);
0215:
0216:                int c = read();
0217:                while (c != -1 && !TurtleUtil.isWhitespace(c)) {
0218:                    sb.append((char) c);
0219:                    c = read();
0220:                }
0221:
0222:                String directive = sb.toString();
0223:                if (directive.equals("prefix")) {
0224:                    parsePrefixID();
0225:                } else if (directive.equals("base")) {
0226:                    parseBase();
0227:                } else if (directive.length() == 0) {
0228:                    reportFatalError("Directive name is missing, expected @prefix or @base");
0229:                } else {
0230:                    reportFatalError("Unknown directive \"@" + directive + "\"");
0231:                }
0232:            }
0233:
0234:            protected void parsePrefixID() throws IOException,
0235:                    RDFParseException, RDFHandlerException {
0236:                skipWSC();
0237:
0238:                // Read prefix ID (e.g. "rdf:" or ":")
0239:                StringBuilder prefixID = new StringBuilder(8);
0240:
0241:                while (true) {
0242:                    int c = read();
0243:
0244:                    if (c == ':') {
0245:                        unread(c);
0246:                        break;
0247:                    } else if (TurtleUtil.isWhitespace(c)) {
0248:                        break;
0249:                    } else if (c == -1) {
0250:                        throwEOFException();
0251:                    }
0252:
0253:                    prefixID.append((char) c);
0254:                }
0255:
0256:                skipWSC();
0257:
0258:                verifyCharacter(read(), ":");
0259:
0260:                skipWSC();
0261:
0262:                // Read the namespace URI
0263:                URI namespace = parseURI();
0264:
0265:                // Store and report this namespace mapping
0266:                String prefixStr = prefixID.toString();
0267:                String namespaceStr = namespace.toString();
0268:
0269:                setNamespace(prefixStr, namespaceStr);
0270:
0271:                rdfHandler.handleNamespace(prefixStr, namespaceStr);
0272:            }
0273:
0274:            protected void parseBase() throws IOException, RDFParseException,
0275:                    RDFHandlerException {
0276:                skipWSC();
0277:
0278:                URI baseURI = parseURI();
0279:
0280:                setBaseURI(baseURI.toString());
0281:            }
0282:
0283:            protected void parseTriples() throws IOException,
0284:                    RDFParseException, RDFHandlerException {
0285:                parseSubject();
0286:                skipWSC();
0287:                parsePredicateObjectList();
0288:
0289:                subject = null;
0290:                predicate = null;
0291:                object = null;
0292:            }
0293:
0294:            protected void parsePredicateObjectList() throws IOException,
0295:                    RDFParseException, RDFHandlerException {
0296:                predicate = parsePredicate();
0297:
0298:                skipWSC();
0299:
0300:                parseObjectList();
0301:
0302:                while (skipWSC() == ';') {
0303:                    read();
0304:
0305:                    int c = skipWSC();
0306:
0307:                    if (c == '.' || // end of triple
0308:                            c == ']') // end of predicateObjectList inside blank node
0309:                    {
0310:                        break;
0311:                    }
0312:
0313:                    predicate = parsePredicate();
0314:
0315:                    skipWSC();
0316:
0317:                    parseObjectList();
0318:                }
0319:            }
0320:
0321:            protected void parseObjectList() throws IOException,
0322:                    RDFParseException, RDFHandlerException {
0323:                parseObject();
0324:
0325:                while (skipWSC() == ',') {
0326:                    read();
0327:                    skipWSC();
0328:                    parseObject();
0329:                }
0330:            }
0331:
0332:            protected void parseSubject() throws IOException,
0333:                    RDFParseException, RDFHandlerException {
0334:                int c = peek();
0335:
0336:                if (c == '(') {
0337:                    subject = parseCollection();
0338:                } else if (c == '[') {
0339:                    subject = parseImplicitBlank();
0340:                } else {
0341:                    Value value = parseValue();
0342:
0343:                    if (value instanceof  Resource) {
0344:                        subject = (Resource) value;
0345:                    } else {
0346:                        reportFatalError("Illegal subject value: " + value);
0347:                    }
0348:                }
0349:            }
0350:
0351:            protected URI parsePredicate() throws IOException,
0352:                    RDFParseException {
0353:                // Check if the short-cut 'a' is used
0354:                int c1 = read();
0355:
0356:                if (c1 == 'a') {
0357:                    int c2 = read();
0358:
0359:                    if (TurtleUtil.isWhitespace(c2)) {
0360:                        // Short-cut is used, return the rdf:type URI
0361:                        return RDF.TYPE;
0362:                    }
0363:
0364:                    // Short-cut is not used, unread all characters
0365:                    unread(c2);
0366:                }
0367:                unread(c1);
0368:
0369:                // Predicate is a normal resource
0370:                Value predicate = parseValue();
0371:                if (predicate instanceof  URI) {
0372:                    return (URI) predicate;
0373:                } else {
0374:                    reportFatalError("Illegal predicate value: " + predicate);
0375:                    return null;
0376:                }
0377:            }
0378:
0379:            protected void parseObject() throws IOException, RDFParseException,
0380:                    RDFHandlerException {
0381:                int c = peek();
0382:
0383:                if (c == '(') {
0384:                    object = parseCollection();
0385:                } else if (c == '[') {
0386:                    object = parseImplicitBlank();
0387:                } else {
0388:                    object = parseValue();
0389:                }
0390:
0391:                reportStatement(subject, predicate, object);
0392:            }
0393:
0394:            /**
0395:             * Parses a collection, e.g. <tt>( item1 item2 item3 )</tt>.
0396:             */
0397:            protected Resource parseCollection() throws IOException,
0398:                    RDFParseException, RDFHandlerException {
0399:                verifyCharacter(read(), "(");
0400:
0401:                int c = skipWSC();
0402:
0403:                if (c == ')') {
0404:                    // Empty list
0405:                    read();
0406:                    return RDF.NIL;
0407:                } else {
0408:                    BNode listRoot = createBNode();
0409:
0410:                    // Remember current subject and predicate
0411:                    Resource oldSubject = subject;
0412:                    URI oldPredicate = predicate;
0413:
0414:                    // generated bNode becomes subject, predicate becomes rdf:first
0415:                    subject = listRoot;
0416:                    predicate = RDF.FIRST;
0417:
0418:                    parseObject();
0419:
0420:                    BNode bNode = listRoot;
0421:
0422:                    while (skipWSC() != ')') {
0423:                        // Create another list node and link it to the previous
0424:                        BNode newNode = createBNode();
0425:                        reportStatement(bNode, RDF.REST, newNode);
0426:
0427:                        // New node becomes the current
0428:                        subject = bNode = newNode;
0429:
0430:                        parseObject();
0431:                    }
0432:
0433:                    // Skip ')'
0434:                    read();
0435:
0436:                    // Close the list
0437:                    reportStatement(bNode, RDF.REST, RDF.NIL);
0438:
0439:                    // Restore previous subject and predicate
0440:                    subject = oldSubject;
0441:                    predicate = oldPredicate;
0442:
0443:                    return listRoot;
0444:                }
0445:            }
0446:
0447:            /**
0448:             * Parses an implicit blank node. This method parses the token <tt>[]</tt>
0449:             * and predicateObjectLists that are surrounded by square brackets.
0450:             */
0451:            protected Resource parseImplicitBlank() throws IOException,
0452:                    RDFParseException, RDFHandlerException {
0453:                verifyCharacter(read(), "[");
0454:
0455:                BNode bNode = createBNode();
0456:
0457:                int c = read();
0458:                if (c != ']') {
0459:                    unread(c);
0460:
0461:                    // Remember current subject and predicate
0462:                    Resource oldSubject = subject;
0463:                    URI oldPredicate = predicate;
0464:
0465:                    // generated bNode becomes subject
0466:                    subject = bNode;
0467:
0468:                    // Enter recursion with nested predicate-object list
0469:                    skipWSC();
0470:
0471:                    parsePredicateObjectList();
0472:
0473:                    skipWSC();
0474:
0475:                    // Read closing bracket
0476:                    verifyCharacter(read(), "]");
0477:
0478:                    // Restore previous subject and predicate
0479:                    subject = oldSubject;
0480:                    predicate = oldPredicate;
0481:                }
0482:
0483:                return bNode;
0484:            }
0485:
0486:            /**
0487:             * Parses an RDF value. This method parses uriref, qname, node ID, quoted
0488:             * literal, integer, double and boolean.
0489:             */
0490:            protected Value parseValue() throws IOException, RDFParseException {
0491:                int c = peek();
0492:
0493:                if (c == '<') {
0494:                    // uriref, e.g. <foo://bar>
0495:                    return parseURI();
0496:                } else if (c == ':' || TurtleUtil.isPrefixStartChar(c)) {
0497:                    // qname or boolean
0498:                    return parseQNameOrBoolean();
0499:                } else if (c == '_') {
0500:                    // node ID, e.g. _:n1
0501:                    return parseNodeID();
0502:                } else if (c == '"') {
0503:                    // quoted literal, e.g. "foo" or """foo"""
0504:                    return parseQuotedLiteral();
0505:                } else if (ASCIIUtil.isNumber(c) || c == '.' || c == '+'
0506:                        || c == '-') {
0507:                    // integer or double, e.g. 123 or 1.2e3
0508:                    return parseNumber();
0509:                } else if (c == -1) {
0510:                    throwEOFException();
0511:                    return null;
0512:                } else {
0513:                    reportFatalError("Expected an RDF value here, found '"
0514:                            + (char) c + "'");
0515:                    return null;
0516:                }
0517:            }
0518:
0519:            /**
0520:             * Parses a quoted string, optionally followed by a language tag or datatype.
0521:             */
0522:            protected Literal parseQuotedLiteral() throws IOException,
0523:                    RDFParseException {
0524:                String label = parseQuotedString();
0525:
0526:                // Check for presence of a language tag or datatype
0527:                int c = peek();
0528:
0529:                if (c == '@') {
0530:                    read();
0531:
0532:                    // Read language
0533:                    StringBuilder lang = new StringBuilder(8);
0534:
0535:                    c = read();
0536:                    if (c == -1) {
0537:                        throwEOFException();
0538:                    }
0539:                    if (!TurtleUtil.isLanguageStartChar(c)) {
0540:                        reportError("Expected a letter, found '" + (char) c
0541:                                + "'");
0542:                    }
0543:
0544:                    lang.append((char) c);
0545:
0546:                    c = read();
0547:                    while (TurtleUtil.isLanguageChar(c)) {
0548:                        lang.append((char) c);
0549:                        c = read();
0550:                    }
0551:
0552:                    unread(c);
0553:
0554:                    return createLiteral(label, lang.toString(), null);
0555:                } else if (c == '^') {
0556:                    read();
0557:
0558:                    // next character should be another '^'
0559:                    verifyCharacter(read(), "^");
0560:
0561:                    // Read datatype
0562:                    Value datatype = parseValue();
0563:                    if (datatype instanceof  URI) {
0564:                        return createLiteral(label, null, (URI) datatype);
0565:                    } else {
0566:                        reportFatalError("Illegal datatype value: " + datatype);
0567:                        return null;
0568:                    }
0569:                } else {
0570:                    return createLiteral(label, null, null);
0571:                }
0572:            }
0573:
0574:            /**
0575:             * Parses a quoted string, which is either a "normal string" or a """long
0576:             * string""".
0577:             */
0578:            protected String parseQuotedString() throws IOException,
0579:                    RDFParseException {
0580:                String result = null;
0581:
0582:                // First character should be '"'
0583:                verifyCharacter(read(), "\"");
0584:
0585:                // Check for long-string, which starts and ends with three double quotes
0586:                int c2 = read();
0587:                int c3 = read();
0588:
0589:                if (c2 == '"' && c3 == '"') {
0590:                    // Long string
0591:                    result = parseLongString();
0592:                } else {
0593:                    // Normal string
0594:                    unread(c3);
0595:                    unread(c2);
0596:
0597:                    result = parseString();
0598:                }
0599:
0600:                // Unescape any escape sequences
0601:                try {
0602:                    result = TurtleUtil.decodeString(result);
0603:                } catch (IllegalArgumentException e) {
0604:                    reportError(e.getMessage());
0605:                }
0606:
0607:                return result;
0608:            }
0609:
0610:            /**
0611:             * Parses a "normal string". This method assumes that the first double quote
0612:             * has already been parsed.
0613:             */
0614:            protected String parseString() throws IOException,
0615:                    RDFParseException {
0616:                StringBuilder sb = new StringBuilder(32);
0617:
0618:                while (true) {
0619:                    int c = read();
0620:
0621:                    if (c == '"') {
0622:                        break;
0623:                    } else if (c == -1) {
0624:                        throwEOFException();
0625:                    }
0626:
0627:                    sb.append((char) c);
0628:
0629:                    if (c == '\\') {
0630:                        // This escapes the next character, which might be a '"'
0631:                        c = read();
0632:                        if (c == -1) {
0633:                            throwEOFException();
0634:                        }
0635:                        sb.append((char) c);
0636:                    }
0637:                }
0638:
0639:                return sb.toString();
0640:            }
0641:
0642:            /**
0643:             * Parses a """long string""". This method assumes that the first three
0644:             * double quotes have already been parsed.
0645:             */
0646:            protected String parseLongString() throws IOException,
0647:                    RDFParseException {
0648:                StringBuilder sb = new StringBuilder(1024);
0649:
0650:                int doubleQuoteCount = 0;
0651:                int c;
0652:
0653:                while (doubleQuoteCount < 3) {
0654:                    c = read();
0655:
0656:                    if (c == -1) {
0657:                        throwEOFException();
0658:                    } else if (c == '"') {
0659:                        doubleQuoteCount++;
0660:                    } else {
0661:                        doubleQuoteCount = 0;
0662:                    }
0663:
0664:                    sb.append((char) c);
0665:
0666:                    if (c == '\\') {
0667:                        // This escapes the next character, which might be a '"'
0668:                        c = read();
0669:                        if (c == -1) {
0670:                            throwEOFException();
0671:                        }
0672:                        sb.append((char) c);
0673:                    }
0674:                }
0675:
0676:                return sb.substring(0, sb.length() - 3);
0677:            }
0678:
0679:            protected Literal parseNumber() throws IOException,
0680:                    RDFParseException {
0681:                StringBuilder value = new StringBuilder(8);
0682:                URI datatype = XMLSchema.INTEGER;
0683:
0684:                int c = read();
0685:
0686:                // read optional sign character
0687:                if (c == '+' || c == '-') {
0688:                    value.append((char) c);
0689:                    c = read();
0690:                }
0691:
0692:                while (ASCIIUtil.isNumber(c)) {
0693:                    value.append((char) c);
0694:                    c = read();
0695:                }
0696:
0697:                if (c == '.' || c == 'e' || c == 'E') {
0698:                    // We're parsing a decimal or a double
0699:                    datatype = XMLSchema.DECIMAL;
0700:
0701:                    // read optional fractional digits
0702:                    if (c == '.') {
0703:                        value.append((char) c);
0704:
0705:                        c = read();
0706:                        while (ASCIIUtil.isNumber(c)) {
0707:                            value.append((char) c);
0708:                            c = read();
0709:                        }
0710:
0711:                        if (value.length() == 1) {
0712:                            // We've only parsed a '.'
0713:                            reportFatalError("Object for statement missing");
0714:                        }
0715:                    } else {
0716:                        if (value.length() == 0) {
0717:                            // We've only parsed an 'e' or 'E'
0718:                            reportFatalError("Object for statement missing");
0719:                        }
0720:                    }
0721:
0722:                    // read optional exponent
0723:                    if (c == 'e' || c == 'E') {
0724:                        datatype = XMLSchema.DOUBLE;
0725:                        value.append((char) c);
0726:
0727:                        c = read();
0728:                        if (c == '+' || c == '-') {
0729:                            value.append((char) c);
0730:                            c = read();
0731:                        }
0732:
0733:                        if (!ASCIIUtil.isNumber(c)) {
0734:                            reportError("Exponent value missing");
0735:                        }
0736:
0737:                        value.append((char) c);
0738:
0739:                        c = read();
0740:                        while (ASCIIUtil.isNumber(c)) {
0741:                            value.append((char) c);
0742:                            c = read();
0743:                        }
0744:                    }
0745:                }
0746:
0747:                // Unread last character, it isn't part of the number
0748:                unread(c);
0749:
0750:                // String label = value.toString();
0751:                // if (datatype.equals(XMLSchema.INTEGER)) {
0752:                // try {
0753:                // label = XMLDatatypeUtil.normalizeInteger(label);
0754:                // }
0755:                // catch (IllegalArgumentException e) {
0756:                // // Note: this should never happen because of the parse constraints
0757:                // reportError("Illegal integer value: " + label);
0758:                // }
0759:                // }
0760:                // return createLiteral(label, null, datatype);
0761:
0762:                // Return result as a typed literal
0763:                return createLiteral(value.toString(), null, datatype);
0764:            }
0765:
0766:            protected URI parseURI() throws IOException, RDFParseException {
0767:                StringBuilder uriBuf = new StringBuilder(100);
0768:
0769:                // First character should be '<'
0770:                int c = read();
0771:                verifyCharacter(c, "<");
0772:
0773:                // Read up to the next '>' character
0774:                while (true) {
0775:                    c = read();
0776:
0777:                    if (c == '>') {
0778:                        break;
0779:                    } else if (c == -1) {
0780:                        throwEOFException();
0781:                    }
0782:
0783:                    uriBuf.append((char) c);
0784:
0785:                    if (c == '\\') {
0786:                        // This escapes the next character, which might be a '>'
0787:                        c = read();
0788:                        if (c == -1) {
0789:                            throwEOFException();
0790:                        }
0791:                        uriBuf.append((char) c);
0792:                    }
0793:                }
0794:
0795:                String uri = uriBuf.toString();
0796:
0797:                // Unescape any escape sequences
0798:                try {
0799:                    uri = TurtleUtil.decodeString(uri);
0800:                } catch (IllegalArgumentException e) {
0801:                    reportError(e.getMessage());
0802:                }
0803:
0804:                return super .resolveURI(uri);
0805:            }
0806:
0807:            /**
0808:             * Parses qnames and boolean values, which have equivalent starting
0809:             * characters.
0810:             */
0811:            protected Value parseQNameOrBoolean() throws IOException,
0812:                    RDFParseException {
0813:                // First character should be a ':' or a letter
0814:                int c = read();
0815:                if (c == -1) {
0816:                    throwEOFException();
0817:                }
0818:                if (c != ':' && !TurtleUtil.isPrefixStartChar(c)) {
0819:                    reportError("Expected a ':' or a letter, found '"
0820:                            + (char) c + "'");
0821:                }
0822:
0823:                String namespace = null;
0824:
0825:                if (c == ':') {
0826:                    // qname using default namespace
0827:                    namespace = getNamespace("");
0828:                    if (namespace == null) {
0829:                        reportError("Default namespace used but not defined");
0830:                    }
0831:                } else {
0832:                    // c is the first letter of the prefix
0833:                    StringBuilder prefix = new StringBuilder(8);
0834:                    prefix.append((char) c);
0835:
0836:                    c = read();
0837:                    while (TurtleUtil.isPrefixChar(c)) {
0838:                        prefix.append((char) c);
0839:                        c = read();
0840:                    }
0841:
0842:                    if (c != ':') {
0843:                        // prefix may actually be a boolean value
0844:                        String value = prefix.toString();
0845:
0846:                        if (value.equals("true") || value.equals("false")) {
0847:                            return createLiteral(value, null, XMLSchema.BOOLEAN);
0848:                        }
0849:                    }
0850:
0851:                    verifyCharacter(c, ":");
0852:
0853:                    namespace = getNamespace(prefix.toString());
0854:                    if (namespace == null) {
0855:                        reportError("Namespace prefix '" + prefix.toString()
0856:                                + "' used but not defined");
0857:                    }
0858:                }
0859:
0860:                // c == ':', read optional local name
0861:                StringBuilder localName = new StringBuilder(16);
0862:                c = read();
0863:                if (TurtleUtil.isNameStartChar(c)) {
0864:                    localName.append((char) c);
0865:
0866:                    c = read();
0867:                    while (TurtleUtil.isNameChar(c)) {
0868:                        localName.append((char) c);
0869:                        c = read();
0870:                    }
0871:                }
0872:
0873:                // Unread last character
0874:                unread(c);
0875:
0876:                // Note: namespace has already been resolved
0877:                return createURI(namespace + localName.toString());
0878:            }
0879:
0880:            /**
0881:             * Parses a blank node ID, e.g. <tt>_:node1</tt>.
0882:             */
0883:            protected BNode parseNodeID() throws IOException, RDFParseException {
0884:                // Node ID should start with "_:"
0885:                verifyCharacter(read(), "_");
0886:                verifyCharacter(read(), ":");
0887:
0888:                // Read the node ID
0889:                int c = read();
0890:                if (c == -1) {
0891:                    throwEOFException();
0892:                } else if (!TurtleUtil.isNameStartChar(c)) {
0893:                    reportError("Expected a letter, found '" + (char) c + "'");
0894:                }
0895:
0896:                StringBuilder name = new StringBuilder(32);
0897:                name.append((char) c);
0898:
0899:                // Read all following letter and numbers, they are part of the name
0900:                c = read();
0901:                while (TurtleUtil.isNameChar(c)) {
0902:                    name.append((char) c);
0903:                    c = read();
0904:                }
0905:
0906:                unread(c);
0907:
0908:                return createBNode(name.toString());
0909:            }
0910:
0911:            protected void reportStatement(Resource subj, URI pred, Value obj)
0912:                    throws RDFParseException, RDFHandlerException {
0913:                Statement st = createStatement(subj, pred, obj);
0914:                rdfHandler.handleStatement(st);
0915:            }
0916:
0917:            /**
0918:             * Verifies that the supplied character <tt>c</tt> is one of the expected
0919:             * characters specified in <tt>expected</tt>. This method will throw a
0920:             * <tt>ParseException</tt> if this is not the case.
0921:             */
0922:            protected void verifyCharacter(int c, String expected)
0923:                    throws RDFParseException {
0924:                if (c == -1) {
0925:                    throwEOFException();
0926:                } else if (expected.indexOf((char) c) == -1) {
0927:                    StringBuilder msg = new StringBuilder(32);
0928:                    msg.append("Expected ");
0929:                    for (int i = 0; i < expected.length(); i++) {
0930:                        if (i > 0) {
0931:                            msg.append(" or ");
0932:                        }
0933:                        msg.append('\'');
0934:                        msg.append(expected.charAt(i));
0935:                        msg.append('\'');
0936:                    }
0937:                    msg.append(", found '");
0938:                    msg.append((char) c);
0939:                    msg.append("'");
0940:
0941:                    reportError(msg.toString());
0942:                }
0943:            }
0944:
0945:            /**
0946:             * Consumes any white space characters (space, tab, line feed, newline) and
0947:             * comments (#-style) from <tt>reader</tt>. After this method has been
0948:             * called, the first character that is returned by <tt>reader</tt> is
0949:             * either a non-ignorable character, or EOF. For convenience, this character
0950:             * is also returned by this method.
0951:             * 
0952:             * @return The next character that will be returned by <tt>reader</tt>.
0953:             */
0954:            protected int skipWSC() throws IOException {
0955:                int c = read();
0956:                while (TurtleUtil.isWhitespace(c) || c == '#') {
0957:                    if (c == '#') {
0958:                        skipLine();
0959:                    }
0960:
0961:                    c = read();
0962:                }
0963:
0964:                unread(c);
0965:
0966:                return c;
0967:            }
0968:
0969:            /**
0970:             * Consumes characters from reader until the first EOL has been read.
0971:             */
0972:            protected void skipLine() throws IOException {
0973:                int c = read();
0974:                while (c != -1 && c != 0xD && c != 0xA) {
0975:                    c = read();
0976:                }
0977:
0978:                // c is equal to -1, \r or \n.
0979:                // In case c is equal to \r, we should also read a following \n.
0980:                if (c == 0xD) {
0981:                    c = read();
0982:
0983:                    if (c != 0xA) {
0984:                        unread(c);
0985:                    }
0986:                }
0987:
0988:                reportLocation();
0989:            }
0990:
0991:            protected int read() throws IOException {
0992:                return reader.read();
0993:            }
0994:
0995:            protected void unread(int c) throws IOException {
0996:                if (c != -1) {
0997:                    reader.unread(c);
0998:                }
0999:            }
1000:
1001:            protected int peek() throws IOException {
1002:                int result = read();
1003:                unread(result);
1004:                return result;
1005:            }
1006:
1007:            protected void reportLocation() {
1008:                reportLocation(lineReader.getLineNumber(), -1);
1009:            }
1010:
1011:            /**
1012:             * Overrides {@link RDFParserBase#reportWarning(String)}, adding line number
1013:             * information to the error.
1014:             */
1015:            @Override
1016:            protected void reportWarning(String msg) {
1017:                reportWarning(msg, lineReader.getLineNumber(), -1);
1018:            }
1019:
1020:            /**
1021:             * Overrides {@link RDFParserBase#reportError(String)}, adding line number
1022:             * information to the error.
1023:             */
1024:            @Override
1025:            protected void reportError(String msg) throws RDFParseException {
1026:                reportError(msg, lineReader.getLineNumber(), -1);
1027:            }
1028:
1029:            /**
1030:             * Overrides {@link RDFParserBase#reportFatalError(String)}, adding line
1031:             * number information to the error.
1032:             */
1033:            @Override
1034:            protected void reportFatalError(String msg)
1035:                    throws RDFParseException {
1036:                reportFatalError(msg, lineReader.getLineNumber(), -1);
1037:            }
1038:
1039:            /**
1040:             * Overrides {@link RDFParserBase#reportFatalError(Exception)}, adding line
1041:             * number information to the error.
1042:             */
1043:            @Override
1044:            protected void reportFatalError(Exception e)
1045:                    throws RDFParseException {
1046:                reportFatalError(e, lineReader.getLineNumber(), -1);
1047:            }
1048:
1049:            protected void throwEOFException() throws RDFParseException {
1050:                throw new RDFParseException("Unexpected end of file");
1051:            }
1052:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.