Source Code Cross Referenced for ParserComparator.java in  » HTML-Parser » Mozilla-Html-Parser » com » dappit » Dapper » parser » test » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » HTML Parser » Mozilla Html Parser » com.dappit.Dapper.parser.test 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /**
002:         * 
003:         */package com.dappit.Dapper.parser.test;
004:
005:        import java.io.ByteArrayOutputStream;
006:        import java.io.File;
007:        import java.io.FileInputStream;
008:        import java.io.FileNotFoundException;
009:        import java.io.IOException;
010:        import java.io.StringReader;
011:        import java.net.MalformedURLException;
012:        import java.util.Hashtable;
013:        import java.util.concurrent.ExecutorService;
014:        import java.util.concurrent.Executors;
015:        import java.util.concurrent.TimeUnit;
016:        import java.util.zip.ZipEntry;
017:        import java.util.zip.ZipInputStream;
018:
019:        import org.ccil.cowan.tagsoup.Parser;
020:        import org.dom4j.DocumentException;
021:        import org.dom4j.io.DOMWriter;
022:        import org.dom4j.io.SAXReader;
023:        import org.w3c.dom.Document;
024:
025:        import com.dappit.Dapper.parser.MozillaParser;
026:        import com.dappit.Dapper.parser.profiler.SimpleTimeProfiler;
027:        import com.dappit.Dapper.parser.test.util.ProgressLogger;
028:
029:        /**
030:         * @author Ohad Serfaty
031:         *
032:         */
033:        public class ParserComparator {
034:
035:            private static volatile double mozillaParsingTime;
036:            private static volatile double tagsoupParsingTime;
037:
038:            public static byte[] fileGetContentsInBytes(File file)
039:                    throws FileNotFoundException, IOException {
040:                FileInputStream fIS = new FileInputStream(file);
041:                ByteArrayOutputStream bIS = new ByteArrayOutputStream();
042:                byte[] temp = new byte[256];
043:                int bytesRead = 0;
044:                while ((bytesRead = fIS.read(temp)) != -1) {
045:                    bIS.write(temp, 0, bytesRead);
046:                }
047:                fIS.close();
048:                bIS.close();
049:
050:                return bIS.toByteArray();
051:            }
052:
053:            /**
054:             * @param youTubeContent
055:             * @throws DocumentException 
056:             * @throws NetworkErrorException 
057:             * @throws IOException 
058:             * @throws MalformedURLException 
059:             */
060:            private static void compareMozillaAndTagsoup(String content)
061:                    throws Exception {
062:
063:                SimpleTimeProfiler profiler = new SimpleTimeProfiler();
064:
065:                // profile mozilla :
066:                profiler.start();
067:                //		System.out.println("Parsing content : "+ content);
068:                MozillaParser parser = new MozillaParser();
069:                System.out.println("Mozilla Parsing...");
070:                parser.parse(content);
071:                mozillaParsingTime += profiler.report("Mozilla:");
072:
073:                profiler = new SimpleTimeProfiler();
074:                // profile tagsoup :
075:                System.out.println("Tagsoup Parsing...");
076:                profiler.start();
077:                tagSoupParse(content);
078:                tagsoupParsingTime += profiler.report("tagsoup:");
079:            }
080:
081:            private static Document tagSoupParse(String content) {
082:                Parser htmlParser = new Parser();
083:
084:                SAXReader saxReader = new SAXReader(htmlParser);
085:                saxReader.setMergeAdjacentText(true);
086:                DOMWriter domWriter = new DOMWriter();
087:                try {
088:                    return domWriter.write(saxReader.read(new StringReader(
089:                            content)));
090:                } catch (Exception e) {
091:                    e.printStackTrace();
092:                }
093:                return null;
094:            }
095:
096:            private static void testZippedContent() throws Exception {
097:                ZipInputStream zippedInputStream = new ZipInputStream(
098:                        new FileInputStream("./test.content.zip"));
099:                int counter = 0;
100:                int maxCount = 1000;
101:                ProgressLogger progressLogger = new ProgressLogger(maxCount);
102:                while (counter++ < maxCount) {
103:
104:                    ZipEntry nextZippedEntry = zippedInputStream.getNextEntry();
105:                    if (nextZippedEntry == null)
106:                        break;
107:                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
108:                    System.out.println("Reading zipped file :"
109:                            + nextZippedEntry.getName());
110:                    byte[] buf = new byte[1024];
111:                    int len;
112:                    while ((len = zippedInputStream.read(buf)) > 0) {
113:                        bos.write(buf, 0, len);
114:                    }
115:                    String content = new String(bos.toByteArray());
116:                    //		        System.out.println("Content : "+ content);
117:                    bos.close();
118:                    compareMozillaAndTagsoup(content);
119:
120:                    progressLogger.incrementCount();
121:                }
122:                System.out.println("Mozilla Parsing time :"
123:                        + mozillaParsingTime + " sec");
124:                System.out.println("Tagsoup Parsing time :"
125:                        + tagsoupParsingTime + " sec");
126:
127:            }
128:
129:            public static class ZipFileReader {
130:
131:                private final String fileName;
132:                private ZipInputStream zippedInputStream;
133:
134:                public ZipFileReader(String fileName)
135:                        throws FileNotFoundException {
136:                    this .fileName = fileName;
137:                    zippedInputStream = new ZipInputStream(new FileInputStream(
138:                            this .fileName));
139:                }
140:
141:                public synchronized String nextContent() throws Exception {
142:                    ZipEntry nextZippedEntry = zippedInputStream.getNextEntry();
143:                    if (nextZippedEntry == null)
144:                        return null;
145:                    ByteArrayOutputStream bos = new ByteArrayOutputStream();
146:                    System.out.println("Reading zipped file :"
147:                            + nextZippedEntry.getName());
148:                    byte[] buf = new byte[1024];
149:                    int len;
150:                    while ((len = zippedInputStream.read(buf)) > 0) {
151:                        bos.write(buf, 0, len);
152:                    }
153:                    String content = new String(bos.toByteArray());
154:                    //	        System.out.println("Content : "+ content);
155:                    bos.close();
156:                    return content;
157:                }
158:
159:            }
160:
161:            private static void testZippedContentMultithreaded()
162:                    throws Exception {
163:                int maxThreads = 10;
164:                ExecutorService mozillThreadPool = Executors
165:                        .newFixedThreadPool(maxThreads);
166:                ExecutorService tagsoupThreadPool = Executors
167:                        .newFixedThreadPool(maxThreads);
168:
169:                ZipFileReader mozillaFileReader = new ZipFileReader(
170:                        "./test.content.zip");
171:                ZipFileReader tagsoupFileReader = new ZipFileReader(
172:                        "./test.content.zip");
173:                int counter = 0;
174:                int maxCount = 530;
175:
176:                SimpleTimeProfiler mozillaProfiler = new SimpleTimeProfiler();
177:                mozillaProfiler.start();
178:                // first have Mozilla : 
179:                while (counter++ < maxCount) {
180:                    mozillThreadPool.execute(new MozillaParsingThread(
181:                            mozillaFileReader));
182:                }
183:                mozillThreadPool.shutdown();
184:                mozillThreadPool.awaitTermination(10000, TimeUnit.SECONDS);
185:                double mozillaTime = mozillaProfiler
186:                        .report("Mozilla total time");
187:
188:                counter = 0;
189:                // then have tagsoup :
190:                SimpleTimeProfiler tagsoupProfiler = new SimpleTimeProfiler();
191:                tagsoupProfiler.start();
192:                while (counter++ < maxCount) {
193:                    tagsoupThreadPool.execute(new TagsoupParsingThread(
194:                            tagsoupFileReader));
195:                }
196:                tagsoupThreadPool.shutdown();
197:                tagsoupThreadPool.awaitTermination(10000, TimeUnit.SECONDS);
198:
199:                double tagsoupTime = tagsoupProfiler
200:                        .report("Tagsoup total time");
201:
202:                System.out.println("Mozilla Parsing multithreaded time :"
203:                        + mozillaParsingTime + " sec");
204:                System.out.println("Tagsoup Parsing multithreaded time :"
205:                        + tagsoupParsingTime + " sec");
206:
207:                System.out.println("Mozilla Parsing Total time :" + mozillaTime
208:                        + " sec");
209:                System.out.println("Tagsoup Parsing Total time :" + tagsoupTime
210:                        + " sec");
211:
212:            }
213:
214:            public static class MozillaParsingThread extends Thread {
215:
216:                private final ZipFileReader mozillaFileReader;
217:                private boolean synchronize;
218:                private static Object SynchronizationObject = new Object();
219:                private static Hashtable<String, Document> documentHashTable = new Hashtable<String, Document>();
220:
221:                /**
222:                 * @param tagsoupFileReader
223:                 */
224:                public MozillaParsingThread(ZipFileReader tagsoupFileReader) {
225:                    this (tagsoupFileReader, false);
226:                }
227:
228:                /**
229:                 * @param tagsoupFileReader2
230:                 * @param b
231:                 */
232:                public MozillaParsingThread(ZipFileReader tagsoupFileReader,
233:                        boolean synchronize) {
234:                    this .synchronize = synchronize;
235:                    this .mozillaFileReader = tagsoupFileReader;
236:                }
237:
238:                public void run() {
239:                    String content;
240:                    try {
241:                        content = mozillaFileReader.nextContent();
242:                        SimpleTimeProfiler profiler = new SimpleTimeProfiler();
243:                        profiler.start();
244:                        MozillaParser parser = new MozillaParser();
245:                        org.dom4j.Document document;
246:                        if (this .synchronize) {
247:                            synchronized (SynchronizationObject) {
248:                                document = (org.dom4j.Document) parser
249:                                        .parse(content);
250:                            }
251:                        } else {
252:                            document = (org.dom4j.Document) parser
253:                                    .parse(content);
254:                        }
255:
256:                        mozillaParsingTime += profiler.report("Mozilla");
257:                        //				org.dom4j.Document document2 = (org.dom4j.Document) parser.parse(content);
258:                        //				if (!document2.asXML().equals(document.asXML()))
259:                        //				{
260:                        //					System.err.println("------------------------->>> content not equals ????");
261:                        //				}
262:
263:                        documentHashTable.put(content.hashCode()
264:                                + Boolean.toString(synchronize),
265:                                (Document) document);
266:                    } catch (Exception e) {
267:                        e.printStackTrace();
268:                    }
269:
270:                }
271:
272:                public static Hashtable<String, Document> getDocumentsHashTable() {
273:                    return documentHashTable;
274:                }
275:
276:            }
277:
278:            public static class TagsoupParsingThread extends Thread {
279:
280:                private final ZipFileReader tagsoupFileReader;
281:                private final boolean synchronize;
282:                private static Object SynchronizationObject = new Object();
283:
284:                /**
285:                 * @param tagsoupFileReader
286:                 */
287:                public TagsoupParsingThread(ZipFileReader tagsoupFileReader) {
288:                    this (tagsoupFileReader, false);
289:                }
290:
291:                /**
292:                 * @param tagsoupFileReader2
293:                 * @param b
294:                 */
295:                public TagsoupParsingThread(ZipFileReader tagsoupFileReader,
296:                        boolean synchronize) {
297:                    this .synchronize = synchronize;
298:                    this .tagsoupFileReader = tagsoupFileReader;
299:                }
300:
301:                public void run() {
302:                    try {
303:                        String content = tagsoupFileReader.nextContent();
304:                        SimpleTimeProfiler profiler = new SimpleTimeProfiler();
305:                        profiler.start();
306:                        if (synchronize) {
307:                            synchronized (SynchronizationObject) {
308:                                tagSoupParse(content);
309:                            }
310:                        } else
311:                            tagSoupParse(content);
312:                        tagsoupParsingTime += profiler.report("Tagsoup");
313:
314:                    } catch (Exception e) {
315:                        // TODO Auto-generated catch block
316:                        e.printStackTrace();
317:                    }
318:
319:                }
320:
321:            }
322:
323:            public static void main(String[] args) throws Exception {
324:                TestMozillaParser.initTestingXPCOM();
325:
326:                // Scheme 1 :
327:                testZippedContentMultithreaded();
328:
329:                // Scheme 2 :
330:                //		testTagsoupSynchronizedParsing();
331:
332:                // Scheme 3 :
333:                //		testMozillaSynchronizedParsing();
334:                //		System.out.println( MozillaParsingThread.getDocumentsHashTable());
335:                //		Hashtable<String, Document> documentHashTable = MozillaParsingThread.getDocumentsHashTable();
336:                //		for (String contentType:documentHashTable.keySet())
337:                //		{
338:                //			if (contentType.endsWith("true"))
339:                //			{
340:                //				org.dom4j.Document synchronizedDocumentResult = (org.dom4j.Document) documentHashTable.get(contentType);
341:                //				System.out.println(contentType +"->" + synchronizedDocumentResult);
342:                //				
343:                //				String parralelScontent = contentType.replace("true", "false");
344:                //				org.dom4j.Document unsynchronizedDocumentResult = (org.dom4j.Document) documentHashTable.get(parralelScontent);
345:                //				System.out.println( parralelScontent+"->" +unsynchronizedDocumentResult );
346:                //				if (!unsynchronizedDocumentResult.asXML().equals(synchronizedDocumentResult.asXML()))
347:                //					System.err.println("Not Good : "  + contentType);
348:                //			}
349:                //		}
350:                //		
351:            }
352:
353:            /**
354:             * @throws Exception 
355:             * 
356:             */
357:            private static void testTagsoupSynchronizedParsing()
358:                    throws Exception {
359:                tagsoupMultithreadedParse(true, "Tagsoup Synchronized ");
360:                tagsoupMultithreadedParse(false, "Tagsoup Parallel ");
361:            }
362:
363:            /**
364:             * @throws Exception 
365:             * 
366:             */
367:            private static void testMozillaSynchronizedParsing()
368:                    throws Exception {
369:                mozillaMultithreadedParse(true, "Mozilla Synchronized ");
370:                mozillaMultithreadedParse(false, "Mozilla Parallel ");
371:            }
372:
373:            /**
374:             * @throws FileNotFoundException 
375:             * @throws Exception 
376:             * 
377:             */
378:            private static void mozillaMultithreadedParse(
379:                    final boolean synchronize, String reportString)
380:                    throws Exception {
381:                int maxThreads = 30;
382:                ExecutorService mozillaThreadPool = Executors
383:                        .newFixedThreadPool(maxThreads);
384:                mozillaParsingTime = 0;
385:                ZipFileReader tagsoupFileReader = new ZipFileReader(
386:                        "./test.content.zip");
387:                int counter = 0;
388:                int maxCount = 530;
389:
390:                // then have tagsoup :
391:                SimpleTimeProfiler mozillaProfiler = new SimpleTimeProfiler();
392:                mozillaProfiler.start();
393:                while (counter++ < maxCount) {
394:                    mozillaThreadPool.execute(new MozillaParsingThread(
395:                            tagsoupFileReader, synchronize));
396:                }
397:                mozillaThreadPool.shutdown();
398:                mozillaThreadPool.awaitTermination(10000, TimeUnit.SECONDS);
399:
400:                double mozillaTime = mozillaProfiler
401:                        .report("Tagsoup synchronized total time");
402:
403:                System.out.println(reportString + " time :"
404:                        + mozillaParsingTime + " sec");
405:                System.out.println(reportString + " Total time :" + mozillaTime
406:                        + " sec");
407:
408:            }
409:
410:            /**
411:             * @throws FileNotFoundException 
412:             * @throws Exception 
413:             * 
414:             */
415:            private static void tagsoupMultithreadedParse(
416:                    final boolean synchronize, String reportString)
417:                    throws Exception {
418:                int maxThreads = 10;
419:                ExecutorService tagsoupThreadPool = Executors
420:                        .newFixedThreadPool(maxThreads);
421:                tagsoupParsingTime = 0;
422:                ZipFileReader tagsoupFileReader = new ZipFileReader(
423:                        "./test.content.zip");
424:                int counter = 0;
425:                int maxCount = 530;
426:
427:                // then have tagsoup :
428:                SimpleTimeProfiler tagsoupProfiler = new SimpleTimeProfiler();
429:                tagsoupProfiler.start();
430:                while (counter++ < maxCount) {
431:                    tagsoupThreadPool.execute(new TagsoupParsingThread(
432:                            tagsoupFileReader, synchronize));
433:                }
434:                tagsoupThreadPool.shutdown();
435:                tagsoupThreadPool.awaitTermination(10000, TimeUnit.SECONDS);
436:
437:                double tagsoupTime = tagsoupProfiler
438:                        .report("Tagsoup synchronized total time");
439:
440:                System.out.println(reportString + " time :"
441:                        + tagsoupParsingTime + " sec");
442:                System.out.println(reportString + " Total time :" + tagsoupTime
443:                        + " sec");
444:
445:            }
446:
447:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.