001: /* Copyright (c) 2006-2007, Vladimir Nikic
002: All rights reserved.
003:
004: Redistribution and use of this software in source and binary forms,
005: with or without modification, are permitted provided that the following
006: conditions are met:
007:
008: * Redistributions of source code must retain the above
009: copyright notice, this list of conditions and the
010: following disclaimer.
011:
012: * Redistributions in binary form must reproduce the above
013: copyright notice, this list of conditions and the
014: following disclaimer in the documentation and/or other
015: materials provided with the distribution.
016:
017: * The name of Web-Harvest may not be used to endorse or promote
018: products derived from this software without specific prior
019: written permission.
020:
021: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
022: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
023: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
025: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
026: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
027: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
029: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
030: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
031: POSSIBILITY OF SUCH DAMAGE.
032:
033: You can contact Vladimir Nikic by sending e-mail to
034: nikic_vladimir@yahoo.com. Please include the word "Web-Harvest" in the
035: subject line.
036: */
037: package org.webharvest.runtime.processors;
038:
039: import net.sf.saxon.Configuration;
040: import net.sf.saxon.om.Item;
041: import net.sf.saxon.om.SequenceIterator;
042: import net.sf.saxon.query.*;
043: import net.sf.saxon.trans.XPathException;
044: import org.webharvest.definition.XQueryDef;
045: import org.webharvest.definition.XQueryExternalParamDef;
046: import org.webharvest.definition.BaseElementDef;
047: import org.webharvest.exception.ScraperXQueryException;
048: import org.webharvest.runtime.Scraper;
049: import org.webharvest.runtime.ScraperContext;
050: import org.webharvest.runtime.variables.*;
051: import org.webharvest.utils.CommonUtil;
052:
053: import javax.xml.transform.stream.StreamSource;
054: import java.io.StringReader;
055: import java.util.*;
056:
057: /**
058: * XQuery processor.
059: */
060: public class XQueryProcessor extends BaseProcessor {
061:
062: public static Set ALLOWED_PARAM_TYPES = new TreeSet();
063: public static String DEFAULT_PARAM_TYPE = "node()";
064:
065: // initialize set of allowed parameter types
066: static {
067: ALLOWED_PARAM_TYPES.add("node()");
068: ALLOWED_PARAM_TYPES.add("node()*");
069: ALLOWED_PARAM_TYPES.add("integer");
070: ALLOWED_PARAM_TYPES.add("integer*");
071: ALLOWED_PARAM_TYPES.add("long");
072: ALLOWED_PARAM_TYPES.add("long*");
073: ALLOWED_PARAM_TYPES.add("float");
074: ALLOWED_PARAM_TYPES.add("float*");
075: ALLOWED_PARAM_TYPES.add("double");
076: ALLOWED_PARAM_TYPES.add("double*");
077: ALLOWED_PARAM_TYPES.add("boolean");
078: ALLOWED_PARAM_TYPES.add("boolean*");
079: ALLOWED_PARAM_TYPES.add("string");
080: ALLOWED_PARAM_TYPES.add("string*");
081: }
082:
083: private XQueryDef xqueryDef;
084:
085: public XQueryProcessor(XQueryDef xqueryDef) {
086: super (xqueryDef);
087: this .xqueryDef = xqueryDef;
088: }
089:
090: public IVariable execute(Scraper scraper, ScraperContext context) {
091: BaseElementDef xqueryElementDef = xqueryDef.getXqDef();
092: IVariable xq = getBodyTextContent(xqueryElementDef, scraper,
093: context);
094: debug(xqueryElementDef, scraper, xq);
095:
096: String xqExpression = xq.toString().trim();
097: XQueryExternalParamDef[] externalParamDefs = xqueryDef
098: .getExternalParamDefs();
099:
100: final Configuration config = new Configuration();
101: final StaticQueryContext sqc = new StaticQueryContext(config);
102:
103: try {
104: final XQueryExpression exp = sqc.compileQuery(xqExpression);
105: final DynamicQueryContext dynamicContext = new DynamicQueryContext(
106: config);
107:
108: // define external parameters
109: for (int i = 0; i < externalParamDefs.length; i++) {
110: XQueryExternalParamDef externalParamDef = externalParamDefs[i];
111: String externalParamType = externalParamDefs[i]
112: .getType();
113: if (externalParamType == null) {
114: externalParamType = DEFAULT_PARAM_TYPE;
115: }
116:
117: // check if param type is one of allowed
118: if (!ALLOWED_PARAM_TYPES.contains(externalParamType)) {
119: throw new ScraperXQueryException("Type "
120: + externalParamType
121: + " is not allowed. Use one of "
122: + ALLOWED_PARAM_TYPES.toString());
123: }
124:
125: if (externalParamType.endsWith("*")) {
126: ListVariable listVar = (ListVariable) getBodyListContent(
127: externalParamDef, scraper, context);
128: debug(externalParamDef, scraper, listVar);
129:
130: Iterator it = listVar.toList().iterator();
131: List paramList = new ArrayList();
132: while (it.hasNext()) {
133: IVariable currVar = (IVariable) it.next();
134: paramList.add(castSimpleValue(
135: externalParamType, currVar, sqc));
136: }
137:
138: dynamicContext.setParameter(externalParamDef
139: .getName(), paramList);
140: } else {
141: IVariable var = getBodyTextContent(
142: externalParamDef, scraper, context);
143:
144: debug(externalParamDef, scraper, var);
145:
146: Object value = castSimpleValue(externalParamType,
147: var, sqc);
148: dynamicContext.setParameter(externalParamDef
149: .getName(), value);
150: }
151: }
152:
153: final SequenceIterator iter = exp.iterator(dynamicContext);
154:
155: ListVariable listVariable = new ListVariable();
156: while (true) {
157: Item item = iter.next();
158: if (item == null) {
159: break;
160: }
161:
162: String value = CommonUtil.serializeItem(item, config);
163: listVariable.addVariable(new NodeVariable(value));
164: }
165:
166: return listVariable;
167: } catch (XPathException e) {
168: throw new ScraperXQueryException(
169: "Error executing XQuery expression (XQuery = ["
170: + xqExpression + "])!", e);
171: }
172: }
173:
174: /**
175: * For the specified type, value and static query context, returns proper Java typed value.
176: * @param type
177: * @param value
178: * @param sqc
179: * @return
180: * @throws XPathException
181: */
182: private Object castSimpleValue(String type, IVariable value,
183: StaticQueryContext sqc) throws XPathException {
184: type = type.toLowerCase();
185:
186: if (type.startsWith("node()")) {
187: StringReader reader = new StringReader(value.toString());
188: return sqc.buildDocument(new StreamSource(reader));
189: } else if (type.startsWith("integer")) {
190: return new Integer(value.toString());
191: } else if (type.startsWith("long")) {
192: return new Long(value.toString());
193: } else if (type.startsWith("float")) {
194: return new Float(value.toString());
195: } else if (type.startsWith("double")) {
196: return new Double(value.toString());
197: } else if (type.startsWith("boolean")) {
198: return CommonUtil.isBooleanTrue(value.toString()) ? Boolean.TRUE
199: : Boolean.FALSE;
200: } else {
201: return value.toString();
202: }
203: }
204:
205: }
|