001: // TidyPutFilter.java
002: // $Id: TidyPutFilter.java,v 1.15 2003/01/27 16:14:54 ylafon Exp $
003: // (c) COPYRIGHT MIT and INRIA, 2002.
004: // Please first read the full copyright statement in file COPYRIGHT.html
005:
006: package org.w3c.jigedit.filters;
007:
008: import java.io.IOException;
009: import java.io.InputStream;
010: import java.io.OutputStream;
011: import java.io.ByteArrayInputStream;
012: import java.io.ByteArrayOutputStream;
013: import java.io.Writer;
014: import java.io.PrintWriter;
015:
016: import org.w3c.tools.resources.Attribute;
017: import org.w3c.tools.resources.AttributeRegistry;
018: import org.w3c.tools.resources.FramedResource;
019: import org.w3c.tools.resources.InvalidResourceException;
020: import org.w3c.tools.resources.ProtocolException;
021: import org.w3c.tools.resources.ReplyInterface;
022: import org.w3c.tools.resources.RequestInterface;
023: import org.w3c.tools.resources.Resource;
024: import org.w3c.tools.resources.ResourceFilter;
025: import org.w3c.tools.resources.ResourceReference;
026: import org.w3c.tools.resources.BooleanAttribute;
027:
028: import org.w3c.www.mime.MimeType;
029:
030: import org.w3c.www.http.HTTP;
031: import org.w3c.www.http.HttpEntityMessage;
032: import org.w3c.www.http.HttpReplyMessage;
033: import org.w3c.www.http.HttpRequestMessage;
034: import org.w3c.www.http.HttpWarning;
035: import org.w3c.www.http.HttpFactory;
036:
037: import org.w3c.jigsaw.http.Client;
038: import org.w3c.jigsaw.http.Reply;
039: import org.w3c.jigsaw.http.Request;
040:
041: import org.w3c.jigsaw.html.HtmlGenerator;
042:
043: import org.w3c.jigsaw.frames.HTTPFrame;
044:
045: import org.w3c.tidy.Tidy;
046: import org.w3c.tidy.Configuration;
047:
048: public class TidyPutFilter extends ResourceFilter {
049:
050: /** attribute index */
051: protected static int ATTR_VALID_STRICT = -1;
052:
053: static {
054: Class c = null;
055: Attribute a = null;
056:
057: try {
058: c = Class.forName("org.w3c.jigedit.filters.TidyPutFilter");
059: } catch (Exception ex) {
060: ex.printStackTrace();
061: System.exit(1);
062: }
063:
064: a = new BooleanAttribute("valid-strict", Boolean.FALSE,
065: Attribute.EDITABLE);
066: ATTR_VALID_STRICT = AttributeRegistry.registerAttribute(c, a);
067: }
068:
069: HttpWarning hw = null;
070: MimeType xhtml_mt = null;
071:
072: boolean isXhtml = false;
073: int tidyCharEncoding = Configuration.LATIN1;
074:
075: protected boolean getValidStrict() {
076: return getBoolean(ATTR_VALID_STRICT, false);
077: }
078:
079: public ReplyInterface ingoingFilter(RequestInterface req) {
080: Request request = (Request) req;
081:
082: if (request.getMethod().equals("PUT")) {
083: try {
084: MimeType req_mt = request.getContentType();
085: if (xhtml_mt == null) {
086: xhtml_mt = new MimeType("application", "xhtml+xml");
087: }
088: if (req_mt.match(xhtml_mt) != MimeType.NO_MATCH) {
089: isXhtml = true;
090: } else {
091: isXhtml = false;
092: }
093: if (req_mt.match(MimeType.TEXT_HTML) == MimeType.NO_MATCH
094: && isXhtml == false) {
095: return null;
096: }
097: if (req_mt.hasParameter("charset")) {
098: String charset = req_mt
099: .getParameterValue("charset");
100: if (charset.equalsIgnoreCase("iso-2022-jp")) {
101: tidyCharEncoding = Configuration.ISO2022;
102: }
103: if (charset.equalsIgnoreCase("us-ascii")) {
104: tidyCharEncoding = Configuration.ASCII;
105: }
106: if (charset.equalsIgnoreCase("iso-8859-1")) {
107: tidyCharEncoding = Configuration.LATIN1;
108: }
109: if (charset.equalsIgnoreCase("utf8")) {
110: tidyCharEncoding = Configuration.UTF8;
111: }
112: if (charset.equalsIgnoreCase("macroman")) {
113: tidyCharEncoding = Configuration.MACROMAN;
114: }
115: }
116: } catch (NullPointerException ex) {
117: // no Content-Type sent! check anyway
118: }
119: InputStream in = null;
120: try {
121: in = request.getInputStream();
122: if (in == null) {
123: return null;
124: }
125: } catch (IOException ex) {
126: return null;
127: }
128:
129: // verify that the target resource is putable
130: ResourceReference rr = request.getTargetResource();
131: if (rr != null) {
132: try {
133: FramedResource target = (FramedResource) rr.lock();
134: HTTPFrame frame = null;
135: try {
136: frame = (HTTPFrame) target
137: .getFrame(Class
138: .forName("org.w3c.jigsaw.frames.HTTPFrame"));
139: } catch (ClassNotFoundException cex) {
140: cex.printStackTrace();
141: //big big problem ...
142: }
143: if (frame == null) // can't be putable
144: return null;
145: // now we can verify if the target resource is putable
146: if (!frame.getPutableFlag()) {
147: return null;
148: }
149: // and that the PUT can happen (taken from putFileResource
150: int cim = frame.checkIfMatch(request);
151: if ((cim == HTTPFrame.COND_FAILED)
152: || (cim == HTTPFrame.COND_WEAK)
153: || (frame.checkIfNoneMatch(request) == HTTPFrame.COND_FAILED)
154: || (frame.checkIfModifiedSince(request) == HTTPFrame.COND_FAILED)
155: || (frame.checkIfUnmodifiedSince(request) == HTTPFrame.COND_FAILED)) {
156:
157: Reply r = request
158: .makeReply(HTTP.PRECONDITION_FAILED);
159: r.setContent("Pre-condition failed.");
160: return r;
161: }
162: } catch (InvalidResourceException ex) {
163: ex.printStackTrace();
164: // problem ...
165: } finally {
166: rr.unlock();
167: }
168: }
169:
170: String expect = request.getExpect();
171: if (expect != null) {
172: if (expect.startsWith("100")) { // expect 100?
173: Client client = request.getClient();
174: if (client != null) {
175: try {
176: client.sendContinue();
177: } catch (java.io.IOException ex) {
178: return null;
179: }
180: }
181: }
182: }
183:
184: try {
185: ByteArrayOutputStream bout = new ByteArrayOutputStream();
186: Tidy tidy = new Tidy();
187: if (isXhtml) {
188: tidy.setXHTML(true);
189: } else {
190: tidy.setXHTML(false);
191: }
192: tidy.setErrout(new PrintWriter(System.err));
193: in.mark(65536000);
194:
195: // System.out.println(tidyCharEncoding);
196: tidy.setCharEncoding(tidyCharEncoding);
197: /* tidy.setIndentContent(false);
198: tidy.setIndentAttributes(false);
199: tidy.setSmartIndent(false);
200: */
201: tidy.parse(in, bout);
202:
203: if (tidy.getParseErrors() != 0) {
204: //System.out.println("too many errors, bailing out") ;
205: in.reset();
206: }
207:
208: // Need to call tidy in other thread, because of Piped streams.
209:
210: byte[] bufout = bout.toByteArray();
211:
212: ByteArrayInputStream tmpbin = new ByteArrayInputStream(
213: bufout);
214:
215: request.setContentLength(bufout.length);
216: if (bufout.length != 0) {
217: // tidy is happy let's apply transformation
218: request.setStream(tmpbin);
219: // add state to set warnings on the way back
220: request.setState("tidy", "ok");
221: } else {
222: if (getValidStrict() == true) {
223: // tidy failed and a file MUST validate -> refuse put
224: Reply reply = request.makeReply(HTTP.FORBIDDEN);
225: HtmlGenerator g = new HtmlGenerator(
226: "Not Acceptable");
227: g
228: .append("<p>This HTML code does not validate. Valid"
229: + "code is required here</p>"
230: + "<p>Warnings: "
231: + tidy.getParseWarnings()
232: + " Errors: "
233: + tidy.getParseErrors()
234: + "</p>");
235:
236: reply.setStream(g);
237: return reply;
238: } else {
239: return null;
240: }
241: }
242: } catch (Exception ex) {
243: ex.printStackTrace();
244: // problem
245: }
246: return null;
247: } else {
248: return null;
249: }
250: }
251:
252: /**
253: * @param request The original request.
254: * @param reply It's original reply.
255: * @return A Reply instance, or <strong>null</strong> if processing
256: * should continue normally.
257: * @exception ProtocolException If processing should be interrupted,
258: * because an abnormal situation occured.
259: */
260: public ReplyInterface outgoingFilter(RequestInterface req,
261: ReplyInterface rep) throws ProtocolException {
262: Request request = (Request) req;
263: Reply reply = (Reply) rep;
264:
265: if (request.hasState("tidy")) {
266: if (hw == null) {
267: hw = HttpFactory
268: .makeWarning(HttpWarning.TRANSFORMATION_APPLIED);
269: hw.setAgent("Jigsaw");
270: hw
271: .setText("Body modified for HTML conformance using JTidy");
272: reply.addWarning(hw);
273: }
274: }
275: return null;
276: }
277: }
|