Source Code Cross Referenced for MIMEAndEncodingParser.java in » Portal » Open-Portal » com » sun » portal » rproxy » rewriter » util » http » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Portal » Open Portal » com.sun.portal.rproxy.rewriter.util.http

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        /*
002:         * Copyright 2001 Sun Microsystems, Inc.  All rights reserved.
003:         * PROPRIETARY/CONFIDENTIAL.  Use of this product is subject to license terms.
004:         */
005:        package com.sun.portal.rproxy.rewriter.util.http;
006:
007:        import com.sun.portal.rewriter.engines.LanguageConstants;
008:        import com.sun.portal.log.common.PortalLogger;
009:        import com.sun.portal.rewriter.util.StringHelper;
010:        import com.sun.portal.rewriter.util.xml.Tag;
011:        import com.sun.portal.rewriter.util.xml.TagParser;
012:
013:        import java.io.IOException;
014:
015:        public final class MIMEAndEncodingParser {
016:            private static final String CONTENT_TYPE_PATTERN = "Content-type:";
017:            private static final String CONTENT_PATTERN = "Content";
018:            private static final String CHARSET_PATTERN = "charset";
019:            private static final String CHARSET_EQUALS_PATTERN = CHARSET_PATTERN
020:                    + "=";
021:            private static boolean charsetDetectionEnabled = false;
022:
023:            static {
024:
025:                /* Check if charecter detection is enabled. It is optional since it is a performance issue.
026:                   Right now we will just check if the chardet.jar is in classpath. If it is there then
027:                   set the flag to true.
028:                 */
029:                try {
030:                    Class.forName("org.mozilla.intl.chardet.nsDetector");
031:                    charsetDetectionEnabled = true;
032:                } catch (ClassNotFoundException e) {
033:                    // Character detection jar file not found.
034:                }
035:
036:            }
037:
038:            /**
039:             * Content type can contain the encoding seperated with ';'
040:             * eg. text/html;charset=UTF-8
041:             */
042:            private static String parseMIME(final String aContentType,
043:                    final String aContentTypeLowerCase) {
044:                int i = aContentTypeLowerCase.indexOf(';');
045:                if (i == -1) {
046:                    return StringHelper.trimQuotes(aContentType);
047:                } else {
048:                    return StringHelper
049:                            .trimQuotes(aContentType.substring(0, i));
050:                }
051:            }//parseMIME()
052:
053:            /**
054:             * Content type can contain the encoding seperated with ';'
055:             * eg. text/html;charset=UTF-8
056:             */
057:            private static final String parseEncoding(
058:                    final String aContentType,
059:                    final String aContentTypeLowerCase)
060:
061:            {
062:                String lEncoding = null;
063:                int start = aContentTypeLowerCase
064:                        .indexOf(CHARSET_EQUALS_PATTERN);
065:
066:                if (start != -1) {
067:                    /*
068:                    BugNo:4711193
069:                    When the webserver encoding is set ISO-8859-1, and the page
070:                    had meta tag with charset=UTF-8, the headder sent was of
071:                    the below pattern:
072:                    Content-type: text/html;charset=UTF-8; charset=ISO-8859-1
073:                    Netscape and IE takes the first one as charset i.e UTF-8
074:                     */
075:                    int end = aContentTypeLowerCase.indexOf(';', start);
076:                    if (end == -1) {
077:                        end = aContentTypeLowerCase.length();
078:                    }
079:
080:                    if (end - start > CHARSET_EQUALS_PATTERN.length()) {
081:                        lEncoding = aContentType.substring(start
082:                                + CHARSET_EQUALS_PATTERN.length(), end);
083:                    }
084:                }
085:
086:                return validateEncoding(lEncoding);
087:            }//parseEncoding()
088:
089:            private static String validateEncoding(String aPrabableEncoding) {
090:                if (aPrabableEncoding == null) {
091:                    return null;
092:                }
093:
094:                if (aPrabableEncoding.length() > 0) {
095:                    //BugNo:4750041
096:                    aPrabableEncoding = StringHelper
097:                            .trimQuotes(aPrabableEncoding);
098:                    //Bug No:4865959
099:                    //We found one charset within a <meta> tag
100:                    //the charset value can only contain letter, digit,
101:                    //charcter '-' or '_'
102:                    //Another important thing is we need to elimated such kind of stmt
103:                    //written in DHTML..
104:                    char ch;
105:                    int endCE = -1;
106:                    while (++endCE < aPrabableEncoding.length()) {
107:                        ch = aPrabableEncoding.charAt(endCE);
108:                        if (!(Character.isLetterOrDigit(ch) || (ch == '-') || (ch == '_'))) {
109:                            break;
110:                        }
111:                    }//while loop
112:
113:                    if (endCE > 0 && endCE <= aPrabableEncoding.length()) {
114:                        return aPrabableEncoding.substring(0, endCE);
115:                    }
116:                }
117:                return null;
118:            }//validateEncoding()
119:
120:            private static void parseMIMEAndEncoding(
121:                    final ContentInfo aContentTypeInfo,
122:                    final String aContentType) {
123:                String lContentType = StringHelper.trimQuotes(aContentType);
124:
125:                String lContentTypeLowerCase = lContentType.toLowerCase();
126:
127:                //may mime is present in protocol headder and not in meta tag
128:                //or wise versa
129:                aContentTypeInfo.setMIME(parseMIME(lContentType,
130:                        lContentTypeLowerCase));
131:
132:                //may be encoding is present in protocol headder and not in meta tag
133:                //or wise versa
134:                aContentTypeInfo.setEncoding(parseEncoding(lContentType,
135:                        lContentTypeLowerCase));
136:
137:            }//parseMIMEAndEncoding()
138:
139:            /**
140:             * This method gets the character encoding from the response
141:             * hearer. If not found, it trys to go through the response
142:             * aContent to figure out the character encoding setting
143:             * possiblely specified in the html <meta> tag.
144:             */
145:            public static final ContentInfo parse(HTTPData aHTTPData)
146:                    throws IOException {
147:                parseHeadder(aHTTPData);
148:                if (!isNeeds2ScanBody(aHTTPData)) {
149:                    return aHTTPData.getContentInfo();
150:                }
151:
152:                /**
153:                 * The character encoding info was not found in the response
154:                 * header. We have to parse through the aContent portion to
155:                 * figure it out. It may be specified in the html <meta> tag
156:                 * as the following;
157:                 *
158:                 * <html>
159:                 * <head>
160:                 * <meta http-equiv="xyz" type="yes">
161:                 * <meta http-equiv="Content-Type" Content="text/html; charset=gb2312">
162:                 * </head>
163:                 *  ...
164:                 * Make sure your logic would take of multiple meta tags and one of
165:                 * the tag would have content type details
166:                 */
167:                //initialize the RawXXX vars
168:                String lContent = aHTTPData.getRawEncodedString();
169:                String lContentLowerCase = aHTTPData
170:                        .getRawEncodedStringLowerCase();
171:
172:                int bMetaTagStartIndex;
173:                int bMetaTagEndIndex;
174:                int cursorIndex = 0;
175:
176:                int bCharsetStartIndex;
177:                while (true) {
178:                    bMetaTagStartIndex = lContentLowerCase.indexOf("<meta",
179:                            cursorIndex);
180:
181:                    if (bMetaTagStartIndex == -1) {
182:                        //There are no meta tags found
183:                        break;
184:                    }
185:
186:                    bMetaTagEndIndex = lContentLowerCase.indexOf(">",
187:                            bMetaTagStartIndex);
188:                    if (bMetaTagEndIndex == -1) {
189:                        break;
190:                    }
191:
192:                    bCharsetStartIndex = lContentLowerCase.indexOf(
193:                            CHARSET_EQUALS_PATTERN, bMetaTagStartIndex);
194:                    if (bCharsetStartIndex == -1) {
195:                        break;
196:                    }
197:
198:                    if (bMetaTagEndIndex < bCharsetStartIndex) {
199:                        //charset index is out side end index so igore this meta tag
200:                        cursorIndex = bMetaTagEndIndex;
201:                        continue;
202:                    } else {
203:                        String bMetaTag = lContent.substring(
204:                                bMetaTagStartIndex, bMetaTagEndIndex - 1);
205:                        Tag bTag = TagParser.parse(bMetaTag);
206:                        String bContentValue = bTag.get(CONTENT_PATTERN);
207:                        parseMIMEAndEncoding(aHTTPData.getContentInfo(),
208:                                bContentValue);
209:                        if (aHTTPData.getContentInfo().isInvalidEncoding()) {
210:                            //Handle case of kind
211:                            // <meta http-equiv="content-type" content="text/html" charset="windows-1251" />
212:                            aHTTPData.getContentInfo()
213:                                    .setEncoding(
214:                                            validateEncoding(bTag
215:                                                    .get(CHARSET_PATTERN)));
216:                        }
217:                        break;
218:                    }
219:                }//while loop
220:
221:                // If mozilla character detection module is installed, use it to check for the encoding.
222:                if ((aHTTPData.getContentInfo().isInvalidEncoding())) {
223:                    if (charsetDetectionEnabled) {
224:                        CharsetDetector cdet = new CharsetDetector();
225:                        String aEncoding = cdet.detectCharset(aHTTPData);
226:                        aHTTPData.getContentInfo().setEncoding(aEncoding);
227:                    }
228:                }
229:
230:                return aHTTPData.getContentInfo();
231:            }//parse()
232:
233:            public static void parseHeadder(HTTPData aHTTPData) {
234:                String headder = aHTTPData.getContentType();
235:                /**
236:                 * At first, try the Content-Type header in the response
237:                 * header portion.
238:                 */
239:                if (headder != null && headder.length() > 0)
240:
241:                {
242:                    int index = headder.indexOf(CONTENT_TYPE_PATTERN);
243:                    String bContentType;
244:                    if (index == -1) {
245:                        bContentType = headder;
246:                    } else {
247:                        bContentType = headder.substring(index
248:                                + CONTENT_TYPE_PATTERN.length());
249:                    }
250:
251:                    parseMIMEAndEncoding(aHTTPData.getContentInfo(),
252:                            bContentType);
253:                }
254:            }//parseHeadder()
255:
256:            private static boolean isNeeds2ScanBody(HTTPData aHTTPData)
257:                    throws IOException {
258:                ContentInfo lContentInfo = aHTTPData.getContentInfo();
259:                //for images and other binary formats encoding has no meaning..
260:                if (lContentInfo.isInvalidMIME()
261:                        || (lContentInfo.isInvalidEncoding() && (lContentInfo
262:                                .getMIME().equalsIgnoreCase(
263:                                        LanguageConstants.HTML_MIME) || lContentInfo
264:                                .getMIME().equalsIgnoreCase("text/htm")))) {
265:                    if ((aHTTPData.getContentBytes() == null)
266:                            || (aHTTPData.getContentBytes().length == 0)) {
267:                        return false;
268:                    }
269:
270:                    return true;
271:                }
272:
273:                return false;
274:            }//isNeeds2ScanBody()
275:
276:        }//class MIMEAndEncodingParser

ww___w__.j__a_v__a__2___s___.__co_m | Contact Us

All other trademarks are property of their respective owners.