Source Code Cross Referenced for BrokenLinkCheckerRun.java in  » Content-Management-System » hippo-cms » nl » hippo » cms » brokenlinkchecker » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Content Management System » hippo cms » nl.hippo.cms.brokenlinkchecker 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * Copyright 2007 Hippo.
0003:         * 
0004:         * Licensed under the Apache License, Version 2.0 (the "License");
0005:         * you may not use this file except in compliance with the License.
0006:         * You may obtain a copy of the License at
0007:         * 
0008:         *      http://www.apache.org/licenses/LICENSE-2.0
0009:         * 
0010:         * Unless required by applicable law or agreed to in writing, software
0011:         * distributed under the License is distributed on an "AS IS" BASIS,
0012:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013:         * See the License for the specific language governing permissions and
0014:         * limitations under the License.
0015:         */
0016:        package nl.hippo.cms.brokenlinkchecker;
0017:
0018:        import java.io.BufferedReader;
0019:        import java.io.ByteArrayOutputStream;
0020:        import java.io.IOException;
0021:        import java.io.InputStream;
0022:        import java.io.InputStreamReader;
0023:        import java.io.OutputStream;
0024:        import java.io.OutputStreamWriter;
0025:        import java.io.UnsupportedEncodingException;
0026:        import java.net.MalformedURLException;
0027:        import java.net.URL;
0028:        import java.util.Collection;
0029:        import java.util.Enumeration;
0030:        import java.util.HashMap;
0031:        import java.util.HashSet;
0032:        import java.util.Iterator;
0033:        import java.util.Map;
0034:        import java.util.Set;
0035:        import java.util.StringTokenizer;
0036:        import nl.hippo.cms.brokenlinkchecker.log.BrokenLinkCheckerLog;
0037:        import nl.hippo.cms.brokenlinkchecker.util.MethodCleanup;
0038:        import nl.hippo.cms.brokenlinkchecker.util.ReaderCleanup;
0039:        import nl.hippo.cms.brokenlinkchecker.util.StreamCleanup;
0040:        import nl.hippo.cms.brokenlinkchecker.util.Validation;
0041:        import nl.hippo.cms.brokenlinkchecker.util.WriterCleanup;
0042:        import org.apache.commons.httpclient.Credentials;
0043:        import org.apache.commons.httpclient.HttpClient;
0044:        import org.apache.commons.httpclient.HttpState;
0045:        import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
0046:        import org.apache.commons.httpclient.UsernamePasswordCredentials;
0047:        import org.apache.webdav.lib.Property;
0048:        import org.apache.webdav.lib.methods.SearchMethod;
0049:
0050:        /**
0051:         * <p>
0052:         * This class searches a tree in a WebDAV repository for broken links. It does
0053:         * so by executing a DASL that finds documents that have property
0054:         * <code>links</code> in namespace <code>http://hippo.nl/cms/1.0</code>
0055:         * defined. The result of the check is stored in a document in the repository
0056:         * with the following structure:
0057:         * </p>
0058:         * 
0059:         * <pre>
0060:         * &lt;broken-links date=&quot;...&quot;&gt;
0061:         *   &lt;page url=&quot;...&quot;&gt;
0062:         *     &lt;link url=&quot;...&quot;&gt;[error message]&lt;/link&gt;
0063:         *   &lt;/page&gt;
0064:         * &lt;/broken-links&gt;
0065:         * </pre>
0066:         * 
0067:         * <p>
0068:         * The error message can be a simple text, or an element with namespace
0069:         * <code>http://apache.org/cocoon/i18n/2.1</code> and name <code>text</code>
0070:         * which contains a localization key.
0071:         * </p>
0072:         * 
0073:         * <p>
0074:         * If a link exists in multiple documents it is checked only once.
0075:         * </p>
0076:         * 
0077:         * <p>
0078:         * Checking links is a time-consuming task. It does not require a lot of
0079:         * resources though. That is why the checking of links is performed using
0080:         * multiple threads.
0081:         * </p>
0082:         * 
0083:         * <p>
0084:         * NOTE: this class assumes that the result document will be written to the
0085:         * WebDAV repository that is checked for broken links. Therefore only one set of
0086:         * credentials have to be given.
0087:         * </p>
0088:         */
0089:        public class BrokenLinkCheckerRun {
0090:            /**
0091:             * <p>
0092:             * The name of the resource containing the template for the DASL to find
0093:             * documents containing links.
0094:             * </p>
0095:             */
0096:            private static final String FIND_LINKS_DASL_TEMPLATE_RESOURCE_NAME = "findLinks.xml";
0097:
0098:            /**
0099:             * <p>
0100:             * The tag in the find links DASL template which marks the location at
0101:             * which to set the results offset.
0102:             * </p>
0103:             */
0104:            private static final String RESULTS_OFFSET_TAG = "@results-offset@";
0105:
0106:            /**
0107:             * <p>
0108:             * The tag in the find links DASL template which marks the location at
0109:             * which to set the number of results limit.
0110:             * </p>
0111:             */
0112:            private static final String NUMBER_OF_RESULTS_LIMIT_TAG = "@number-of-results-limit@";
0113:
0114:            /**
0115:             * <p>
0116:             * The URI of the namespace for Hippo CMS specific WebDAV properties.
0117:             * </p>
0118:             */
0119:            private static final String HIPPO_CMS_NAMESPACE_URI = "http://hippo.nl/cms/1.0";
0120:
0121:            /**
0122:             * <p>
0123:             * The name of the WebDAV property that contains a space-separated list
0124:             * of the links of the document.
0125:             * </p>
0126:             */
0127:            private static final String LINKS_PROPERTY_NAME = "links";
0128:
0129:            /**
0130:             * <p>
0131:             * The separator, a space, use to separate links in the WebDAV property
0132:             * that contains the links of the document.
0133:             * </p>
0134:             */
0135:            private static final String LINK_SEPARATORS = " ";
0136:
0137:            /**
0138:             * <p>
0139:             * The number of milliseconds in a second.
0140:             * </p>
0141:             */
0142:            private static final int NUMBER_OF_MILLIS_IN_A_SECOND = 1000;
0143:
0144:            /**
0145:             * <p>
0146:             * The minimum number of documents to retrieve in one request.
0147:             * </p>
0148:             */
0149:            public static final int MINIMUM_DOCUMENT_BATCH_SIZE = 1;
0150:
0151:            /**
0152:             * <p>
0153:             * The maximum number of documents to retrieve in one request.
0154:             * </p>
0155:             */
0156:            public static final int MAXIMUM_DOCUMENT_BATCH_SIZE = 1000;
0157:
0158:            /**
0159:             * <p>
0160:             * The minimum number of thread to use for checking for broken links.
0161:             * </p>
0162:             */
0163:            public static final int MINIMUM_NUMBER_OF_LINK_CHECKING_THREADS = 1;
0164:
0165:            /**
0166:             * <p>
0167:             * The minimum timeout for requests used to check for broken links.
0168:             * </p>
0169:             */
0170:            public static final int MINIMUM_LINK_CHECK_TIMEOUT_SECONDS = 1;
0171:
0172:            /**
0173:             * <p>
0174:             * The HTTP client timeout is specified in milliseconds as an integer.
0175:             * This limits the maximum value for the number of seconds for the
0176:             * timeout that can be specified in the configuration to this value.
0177:             * </p>
0178:             */
0179:            public static final int MAXIMUM_LINK_CHECK_TIMEOUT_SECONDS = Integer.MAX_VALUE
0180:                    / NUMBER_OF_MILLIS_IN_A_SECOND;
0181:
0182:            /**
0183:             * <p>
0184:             * The name of the UTF-8 character encoding.
0185:             * </p>
0186:             */
0187:            private static final String UTF_8_ENCODING_NAME = "UTF-8";
0188:
0189:            /**
0190:             * <p>
0191:             * The object holding the information and resources needed by this
0192:             * object.
0193:             * </p>
0194:             */
0195:            private BrokenLinkCheckerRunConfiguration configuration;
0196:
0197:            /**
0198:             * <p>
0199:             * This flag is used to check if an instance of this class is not used
0200:             * twice.
0201:             * </p>
0202:             */
0203:            private boolean hasBeenStarted;
0204:
0205:            /**
0206:             * <p>
0207:             * The set of links that should be checked by background checks.
0208:             * </p>
0209:             * 
0210:             * <p>
0211:             * <code>Map&lt;String&gt;</code>
0212:             * </p>
0213:             */
0214:            private Set linksToCheck = new HashSet();
0215:
0216:            /**
0217:             * <p>
0218:             * The set of links that have already been checked.
0219:             * </p>
0220:             * 
0221:             * <p>
0222:             * <code>Map&lt;String&gt;</code>
0223:             * </p>
0224:             */
0225:            private Set checkedLinks = new HashSet();
0226:
0227:            /**
0228:             * <p>
0229:             * The map containing the broken links. A broken link maps to the error
0230:             * message returned during the brokenness check.
0231:             * </p>
0232:             * 
0233:             * <p>
0234:             * <code>Map&lt;String, BrokenLinkErrorMessage&gt;</code>
0235:             * </p>
0236:             */
0237:            private Map brokenLinks = new HashMap();
0238:
0239:            /**
0240:             * <p>
0241:             * The map containing the URLs of the documents that have broken links.
0242:             * Each document maps to the set of links that are broken.
0243:             * </p>
0244:             * 
0245:             * <p>
0246:             * <code>Map&lt;String, Set&lt;String&gt;&gt;</code>
0247:             * </p>
0248:             */
0249:            private Map documentsWithBrokenLinks = new HashMap();
0250:
0251:            /**
0252:             * <p>
0253:             * The HTTP client that is used to communicate with the WebDAV
0254:             * repository.
0255:             * </p>
0256:             */
0257:            private HttpClient httpClient;
0258:
0259:            /**
0260:             * <p>
0261:             * The configuration to use for the tasks that check the links. This is
0262:             * created once before the actual checking of links is started because
0263:             * the same configuration can be used by all tasks.
0264:             * </p>
0265:             */
0266:            private BrokenLinkCheckerTaskConfiguration taskConfiguration;
0267:
0268:            /**
0269:             * <p>
0270:             * The currently active tasks that check if links are borken.
0271:             * </p>
0272:             */
0273:            private Set activeTasks = new HashSet();
0274:
0275:            /**
0276:             * <p>
0277:             * Check that the configuration is valid. If it is not an
0278:             * {@link IllegalArgumentException} is thrown.
0279:             * </p>
0280:             * <p>
0281:             * A configuration is valid if the following rules are followed:
0282:             * </p>
0283:             * <table>
0284:             * <tr>
0285:             * <th>Attribute</th>
0286:             * <th>Rules</th>
0287:             * </tr>
0288:             * <tr>
0289:             * <td><code>documentTreeToCheckRootUrl</code></td>
0290:             * <td>Cannot be <code>null</code>.</td>
0291:             * </tr>
0292:             * <tr>
0293:             * <td><code>documentsBaseUrl</code></td>
0294:             * <td>Cannot be <code>null</code>.</td>
0295:             * </tr>
0296:             * <tr>
0297:             * <td><code>internalLinksBaseUrl</code></td>
0298:             * <td>Cannot be <code>null</code>.</td>
0299:             * </tr>
0300:             * <tr>
0301:             * <td><code>repositoryUsername</code></td>
0302:             * <td>Cannot be <code>null</code>.</td>
0303:             * </tr>
0304:             * <tr>
0305:             * <td><code>repositoryPassword</code></td>
0306:             * <td>Cannot be <code>null</code>.</td>
0307:             * </tr>
0308:             * <tr>
0309:             * <td><code>resultDocumentUrl</code></td>
0310:             * <td>Cannot be <code>null</code>.</td>
0311:             * </tr>
0312:             * <tr>
0313:             * <td><code>documentBatchSize</code></td>
0314:             * <td>Greater than or equal to 1 and less than or equal to 1000.</td>
0315:             * </tr>
0316:             * <tr>
0317:             * <td><code>numberOfLinkCheckingThreads</code></td>
0318:             * <td>Greater than or equal to 1.</td>
0319:             * </tr>
0320:             * <tr>
0321:             * <td><code>linkCheckTimeoutSeconds</code></td>
0322:             * <td>Greater than or equal to 1 and less than or equal to 2147483.</td>
0323:             * </tr>
0324:             * <tr>
0325:             * <td><code>log</code></td>
0326:             * <td>Cannot be <code>null</code>.</td>
0327:             * </tr>
0328:             * </table>
0329:             * 
0330:             * @param configuration
0331:             *                the configuration to check.
0332:             */
0333:            public static void assertConfigurationIsValid(
0334:                    BrokenLinkCheckerRunConfiguration configuration) {
0335:                boolean isValid = true;
0336:                StringBuffer validationErrorMessage = new StringBuffer(1000);
0337:
0338:                isValid &= Validation
0339:                        .assertTrue(configuration
0340:                                .getDocumentTreeToCheckRootUrl() != null,
0341:                                validationErrorMessage,
0342:                                "The document tree to check root URL cannot be 'null'.");
0343:
0344:                isValid &= Validation.assertTrue(configuration
0345:                        .getDocumentsBaseUrl() != null, validationErrorMessage,
0346:                        "The documents base URL cannot be 'null'.");
0347:
0348:                isValid &= Validation.assertTrue(configuration
0349:                        .getInternalLinksBaseUrl() != null,
0350:                        validationErrorMessage,
0351:                        "The internal links base URL cannot be 'null'.");
0352:
0353:                isValid &= Validation.assertTrue(configuration
0354:                        .getRepositoryUsername() != null,
0355:                        validationErrorMessage,
0356:                        "The username cannot be 'null'.");
0357:
0358:                isValid &= Validation.assertTrue(configuration
0359:                        .getRepositoryPassword() != null,
0360:                        validationErrorMessage,
0361:                        "The password cannot be 'null'.");
0362:
0363:                isValid &= Validation.assertTrue(configuration
0364:                        .getResultDocumentUrl() != null,
0365:                        validationErrorMessage,
0366:                        "The result document URL cannot be 'null'.");
0367:
0368:                int documentBatchSize = configuration.getDocumentBatchSize();
0369:                isValid &= Validation
0370:                        .assertTrue(
0371:                                MINIMUM_DOCUMENT_BATCH_SIZE <= documentBatchSize
0372:                                        && documentBatchSize <= MAXIMUM_DOCUMENT_BATCH_SIZE,
0373:                                validationErrorMessage,
0374:                                "The number of documents in a batch must be greater than or equal to "
0375:                                        + MINIMUM_DOCUMENT_BATCH_SIZE
0376:                                        + " and less than or equal to "
0377:                                        + MAXIMUM_DOCUMENT_BATCH_SIZE + ".");
0378:
0379:                isValid &= Validation
0380:                        .assertTrue(
0381:                                MINIMUM_NUMBER_OF_LINK_CHECKING_THREADS <= configuration
0382:                                        .getNumberOfLinkCheckingThreads(),
0383:                                validationErrorMessage,
0384:                                "The number of link checking threads must be greater than or equal to "
0385:                                        + MINIMUM_NUMBER_OF_LINK_CHECKING_THREADS
0386:                                        + ".");
0387:
0388:                int linkCheckTimeoutSeconds = configuration
0389:                        .getLinkCheckTimeoutSeconds();
0390:                isValid &= Validation
0391:                        .assertTrue(
0392:                                MINIMUM_LINK_CHECK_TIMEOUT_SECONDS <= linkCheckTimeoutSeconds
0393:                                        && linkCheckTimeoutSeconds <= MAXIMUM_LINK_CHECK_TIMEOUT_SECONDS,
0394:                                validationErrorMessage,
0395:                                "The number of seconds to wait for a response must be greater than or equal to "
0396:                                        + MINIMUM_LINK_CHECK_TIMEOUT_SECONDS
0397:                                        + " and less than or equal to "
0398:                                        + MAXIMUM_LINK_CHECK_TIMEOUT_SECONDS
0399:                                        + ".");
0400:
0401:                isValid &= Validation.assertTrue(
0402:                        configuration.getLog() != null, validationErrorMessage,
0403:                        "The log cannot be 'null'.");
0404:
0405:                if (!isValid) {
0406:                    throw new IllegalArgumentException(validationErrorMessage
0407:                            .toString());
0408:                }
0409:            }
0410:
0411:            /**
0412:             * <p>
0413:             * Create an instance of this class passing it the information and
0414:             * resources it needs.
0415:             * </p>
0416:             * 
0417:             * <p>
0418:             * The broken link checking is not started by this constructor. Invoke
0419:             * {@link #execute()} to start the process.
0420:             * </p>
0421:             * 
0422:             * @param configuration
0423:             *                the object holding the information and resources.
0424:             */
0425:            public BrokenLinkCheckerRun(
0426:                    BrokenLinkCheckerRunConfiguration configuration) {
0427:                super ();
0428:
0429:                assertConfigurationIsValid(configuration);
0430:
0431:                this .configuration = configuration;
0432:            }
0433:
0434:            /**
0435:             * <p>
0436:             * Start the broken link checking process. This method can only be
0437:             * invoked once.
0438:             * </p>
0439:             */
0440:            public void execute() {
0441:                synchronized (this ) {
0442:                    assertHasNotBeenStarted();
0443:
0444:                    hasBeenStarted = true;
0445:                }
0446:
0447:                createHttpClient();
0448:
0449:                findBrokenLinks();
0450:
0451:                storeResult();
0452:            }
0453:
0454:            /**
0455:             * <p>
0456:             * Throw an {@link IllegalStateException} if this run has already been
0457:             * started.
0458:             * </p>
0459:             */
0460:            private void assertHasNotBeenStarted() {
0461:                if (hasBeenStarted) {
0462:                    throw new IllegalStateException(
0463:                            "This instance has already been started. Use a new instance for each run.");
0464:                }
0465:            }
0466:
0467:            /**
0468:             * <p>
0469:             * Find broken links by querying the repository for all documents with
0470:             * links, discarding links that should not be checked, and checking the
0471:             * remaining links.
0472:             * </p>
0473:             */
0474:            private void findBrokenLinks() {
0475:                taskConfiguration = createBrokenLinkCheckerTaskConfiguration();
0476:
0477:                int numberOfResults;
0478:                int offset = 0;
0479:                do {
0480:                    byte[] findLinksDasl = generateFindLinksDasl(offset);
0481:
0482:                    offset += getDocumentBatchSize();
0483:
0484:                    Map documentsWithLinks = new HashMap();
0485:                    numberOfResults = executeFindLinksDasl(findLinksDasl,
0486:                            documentsWithLinks);
0487:
0488:                    addLinksToLinksToCheck(documentsWithLinks.values());
0489:
0490:                    checkLinks();
0491:
0492:                    determineBrokenLinksOfDocuments(documentsWithLinks);
0493:                } while (!shouldStop(numberOfResults));
0494:            }
0495:
0496:            /**
0497:             * <p>
0498:             * Determine the broken links of documents and add the broken ones to
0499:             * the set of broken links of the document.
0500:             * </p>
0501:             * 
0502:             * @param documentsWithLinks
0503:             *                a set of URLs of documents mapping to the links
0504:             *                contained in a document.
0505:             */
0506:            private void determineBrokenLinksOfDocuments(Map documentsWithLinks) {
0507:                Iterator documentUrlsIterator = documentsWithLinks.keySet()
0508:                        .iterator();
0509:                while (documentUrlsIterator.hasNext()) {
0510:                    String documentUrl = (String) documentUrlsIterator.next();
0511:
0512:                    Set links = (Set) documentsWithLinks.get(documentUrl);
0513:                    determineBrokenLinksOfDocument(documentUrl, links);
0514:                }
0515:            }
0516:
0517:            /**
0518:             * <p>
0519:             * Determine which links of a document are broken and add the broken
0520:             * ones to the set of broken links of a document.
0521:             * </p>
0522:             * 
0523:             * @param documentUrl
0524:             *                the URL of the document containing the links.
0525:             * @param links
0526:             *                the links contained in the document.
0527:             */
0528:            private void determineBrokenLinksOfDocument(String documentUrl,
0529:                    Set links) {
0530:                Iterator linksIterator = links.iterator();
0531:                while (linksIterator.hasNext()) {
0532:                    String link = (String) linksIterator.next();
0533:
0534:                    if (isLinkBroken(link)) {
0535:                        addBrokenLinkToDocument(documentUrl, link);
0536:                    }
0537:                }
0538:            }
0539:
0540:            /**
0541:             * <p>
0542:             * Determine if a link was found to be broken.
0543:             * </p>
0544:             * 
0545:             * @param link
0546:             *                the link for which to determine whether or not it is
0547:             *                broken.
0548:             * @return <code>true</code> if the link is broken, <code>false</code>
0549:             *         otherwise.
0550:             */
0551:            private boolean isLinkBroken(String link) {
0552:                return brokenLinks.containsKey(link);
0553:            }
0554:
0555:            /**
0556:             * <p>
0557:             * Add a link to the set of broken links of a document.
0558:             * </p>
0559:             * 
0560:             * @param documentUrl
0561:             *                the URL of the document to which to add the broken
0562:             *                link.
0563:             * @param link
0564:             *                the link to add.
0565:             */
0566:            private void addBrokenLinkToDocument(String documentUrl, String link) {
0567:                Set brokenLinks = getBrokenLinksForDocument(documentUrl);
0568:
0569:                brokenLinks.add(link);
0570:            }
0571:
0572:            /**
0573:             * <p>
0574:             * Get the set of broken links for a specific document.
0575:             * </p>
0576:             * 
0577:             * @param documentUrl
0578:             *                the URL of the document for which to get the set of
0579:             *                broken links.
0580:             * @return the set of broken links. If there are no broken links for a
0581:             *         document an empty set is returned.
0582:             */
0583:            private Set getBrokenLinksForDocument(String documentUrl) {
0584:                Set result = (Set) documentsWithBrokenLinks.get(documentUrl);
0585:                if (result == null) {
0586:                    result = new HashSet();
0587:                    documentsWithBrokenLinks.put(documentUrl, result);
0588:                }
0589:
0590:                return result;
0591:            }
0592:
0593:            /**
0594:             * <p>
0595:             * Determine whether or not the finding of broken links should be
0596:             * stopped.
0597:             * </p>
0598:             * 
0599:             * @param numberOfResults
0600:             *                the number of results returned on the current page of
0601:             *                the find links DASL.
0602:             * @return <code>true</code> if the finding of broken links should be
0603:             *         stopped, <code>false</code> otherwise.
0604:             */
0605:            private boolean shouldStop(int numberOfResults) {
0606:                return isLastPageOfDaslResults(numberOfResults)
0607:                        || Thread.currentThread().isInterrupted();
0608:            }
0609:
0610:            /**
0611:             * <p>
0612:             * Determine whether the last page of results was retrieved using the
0613:             * find links DASL.
0614:             * </p>
0615:             * 
0616:             * @param numberOfResults
0617:             *                the number of results returned on the current page of
0618:             *                the find links DASL.
0619:             * @return <code>true</code> if the last page of the search results
0620:             *         has been reached, <code>false</code> otherwise.
0621:             */
0622:            private boolean isLastPageOfDaslResults(int numberOfResults) {
0623:                return numberOfResults != getDocumentBatchSize();
0624:            }
0625:
0626:            /**
0627:             * <p>
0628:             * Add the links to the links to check if the link has not been checked
0629:             * before.
0630:             * </p>
0631:             * 
0632:             * @param links
0633:             *                the link to add to the links to check (<code>Collection&lt;Set&lt;String&gt;&gt;</code>).
0634:             */
0635:            private void addLinksToLinksToCheck(Collection links) {
0636:                Iterator linksSetIterator = links.iterator();
0637:                while (linksSetIterator.hasNext()) {
0638:                    Set linksSet = (Set) linksSetIterator.next();
0639:
0640:                    Iterator linksIterator = linksSet.iterator();
0641:                    while (linksIterator.hasNext()) {
0642:                        String link = (String) linksIterator.next();
0643:
0644:                        if (!checkedLinks.contains(link)) {
0645:                            checkedLinks.add(link);
0646:                            linksToCheck.add(link);
0647:                        }
0648:                    }
0649:                }
0650:            }
0651:
0652:            /**
0653:             * <p>
0654:             * Check if the links that are currently in {@link #linksToCheck} are
0655:             * broken.
0656:             * </p>
0657:             */
0658:            private void checkLinks() {
0659:                try {
0660:                    startTasksToCheckLinks();
0661:
0662:                    waitForLinkCheckingToFinish();
0663:                } finally {
0664:                    stopActiveTasks();
0665:                }
0666:            }
0667:
0668:            /**
0669:             * <p>
0670:             * Start the tasks that will do the actual link checking.
0671:             * </p>
0672:             */
0673:            private synchronized void startTasksToCheckLinks() {
0674:                /*
0675:                 * Implementation note: a new thread is created for each task for each
0676:                 * DASL. This means that during one run lots of threads will be created.
0677:                 * Using a thread pool was considered, but the idea was rejected because
0678:                 * the broken links check runs infrequently (typically daily) and at a
0679:                 * time when the load on the system is low. Therefore the current
0680:                 * implementation should not have a noticeable effect on the system.
0681:                 */
0682:
0683:                int numberOfTasksToStart = getNumberOfLinkCheckingThreads();
0684:                for (int taskIndex = 0; taskIndex < numberOfTasksToStart; taskIndex++) {
0685:                    BrokenLinkCheckerTask task = new BrokenLinkCheckerTask(
0686:                            taskConfiguration);
0687:
0688:                    Thread thread = new Thread(task);
0689:                    thread.start();
0690:
0691:                    activeTasks.add(task);
0692:                }
0693:            }
0694:
0695:            /**
0696:             * <p>
0697:             * Create the configuration for a {@link BrokenLinkCheckerTask}.
0698:             * </p>
0699:             */
0700:            private BrokenLinkCheckerTaskConfiguration createBrokenLinkCheckerTaskConfiguration() {
0701:                BrokenLinkCheckerTaskConfigurationBean result = new BrokenLinkCheckerTaskConfigurationBean();
0702:
0703:                result.setBrokenLinkCheckerRun(this );
0704:                HttpClient httpClient = createHttpClientToUseForCheckingLinks();
0705:                result.setHttpClient(httpClient);
0706:                result.setInternalLinksBaseUrl(getInternalLinksBaseUrl());
0707:                result.setLog(getLog());
0708:
0709:                return result;
0710:            }
0711:
0712:            /**
0713:             * <p>
0714:             * Wait for all the tasks that do the actual checking of links to
0715:             * finish. When a task ends it will let this object know, and this
0716:             * object will notify itself once all tasks indicated that they
0717:             * finished. When this object notifies itself, this method returns.
0718:             * </p>
0719:             */
0720:            private void waitForLinkCheckingToFinish() {
0721:                synchronized (this ) {
0722:                    try {
0723:                        this .wait();
0724:                    } catch (InterruptedException e) {
0725:                        /*
0726:                         * The thread was interrupted before all broken links checking
0727:                         * tasks finished. Stop waiting for those tasks.
0728:                         * 
0729:                         * Reinterrupt the thread so methods down the stack get notified
0730:                         * so they can stop too.
0731:                         */
0732:                        Thread.currentThread().interrupt();
0733:                    }
0734:                }
0735:            }
0736:
0737:            /**
0738:             * <p>
0739:             * Stop tasks that are still active. This method will not wait for the
0740:             * tasks to stop because the current thread could be interrupted.
0741:             * </p>
0742:             */
0743:            private synchronized void stopActiveTasks() {
0744:                while (!activeTasks.isEmpty()) {
0745:                    BrokenLinkCheckerTask activeTask = (BrokenLinkCheckerTask) activeTasks
0746:                            .iterator().next();
0747:                    activeTasks.remove(activeTask);
0748:
0749:                    activeTask.stop();
0750:                }
0751:            }
0752:
0753:            /**
0754:             * <p>
0755:             * Get the URL of a link to check.
0756:             * </p>
0757:             * 
0758:             * @return a link to check, or <code>null</code> if there are no more
0759:             *         links to check.
0760:             */
0761:            synchronized String getLinkToCheck() {
0762:                String result = null;
0763:
0764:                if (!linksToCheck.isEmpty()) {
0765:                    result = (String) linksToCheck.iterator().next();
0766:                    linksToCheck.remove(result);
0767:                }
0768:
0769:                return result;
0770:            }
0771:
0772:            /**
0773:             * <p>
0774:             * Determine whether or not the links to check have been exhausted. If
0775:             * the links to check have been exhausted the task checking the links
0776:             * can stop.
0777:             * </p>
0778:             * 
0779:             * @return <code>true</code> if the links to check have been
0780:             *         exhausted, <code>false</code> otherwise.
0781:             */
0782:            synchronized boolean haveLinksToCheckBeenExhausted() {
0783:                return linksToCheck.isEmpty();
0784:            }
0785:
0786:            /**
0787:             * <p>
0788:             * Add a link to the set of broken links.
0789:             * </p>
0790:             * 
0791:             * @param link
0792:             *                the link that is broken.
0793:             * @param errorMessage
0794:             *                the message of the error causing the brokenness.
0795:             */
0796:            synchronized void addBrokenLink(String link,
0797:                    BrokenLinkErrorMessage errorMessage) {
0798:                brokenLinks.put(link, errorMessage);
0799:            }
0800:
0801:            /**
0802:             * <p>
0803:             * Handle the event that a broken links checking task has finished. If
0804:             * all tasks have finished, notify this object that it can stop waiting
0805:             * for the tasks to finish.
0806:             * </p>
0807:             */
0808:            synchronized void handleBrokenLinksCheckingTaskFinished(
0809:                    BrokenLinkCheckerTask task) {
0810:                activeTasks.remove(task);
0811:                if (activeTasks.isEmpty()) {
0812:                    notify();
0813:                }
0814:            }
0815:
0816:            /**
0817:             * <p>
0818:             * Execute the DASL to find links and get the documents with their
0819:             * links. The number of documents added to
0820:             * <code>documentWithLinks</code> can be less than the total number of
0821:             * documents returned by the DASL, because only documents with at least
0822:             * one link eligible for checking for brokenness are added.
0823:             * </p>
0824:             * 
0825:             * @param dasl
0826:             *                the DASL to execute.
0827:             * @param documentsWithLinks
0828:             *                the map in which to store the documents found by the
0829:             *                DASL and their links (<code>Map&lt;String, Set&lt;String&gt;&gt;</code>).
0830:             *                Only documents with at least one link eligble for
0831:             *                checking for brokenness will be added.
0832:             * @return the total number of documents returned by the DASL.
0833:             * @throws IOException
0834:             *                 if an I/O error occurs.
0835:             */
0836:            private int executeFindLinksDasl(byte[] dasl, Map documentsWithLinks) {
0837:                int result;
0838:
0839:                SearchMethod searchMethod = new SearchMethod(
0840:                        getDocumentTreeToCheckRootUrl());
0841:                try {
0842:                    searchMethod.setDoAuthentication(true);
0843:                    searchMethod.setRequestBody(dasl);
0844:
0845:                    int searchResultCode;
0846:                    try {
0847:                        searchResultCode = httpClient
0848:                                .executeMethod(searchMethod);
0849:                    } catch (IOException e) {
0850:                        throw new IllegalStateException(
0851:                                "I/O error occurred during execution of find links DASL: "
0852:                                        + e);
0853:                    }
0854:
0855:                    if (searchResultCode != 207) {
0856:                        throw new IllegalStateException(
0857:                                "The execution of the find links DASL returned an unexpected result code: "
0858:                                        + searchResultCode + ", with message: "
0859:                                        + searchMethod.getStatusText());
0860:                    }
0861:
0862:                    result = retrieveFindLinksDaslResults(searchMethod,
0863:                            documentsWithLinks);
0864:                } finally {
0865:                    MethodCleanup.releaseConnection(searchMethod,
0866:                            "find links DASL", getLog());
0867:                }
0868:
0869:                return result;
0870:            }
0871:
0872:            /**
0873:             * <p>
0874:             * Retrieve the results from a (successful) find links DASL.
0875:             * </p>
0876:             * 
0877:             * @param searchMethod
0878:             *                the search method used to execute the DASL.
0879:             * @param documentsWithLinks
0880:             *                the map in which to store the documents and their
0881:             *                links (<code>Map&lt;String, Set&lt;String&gt;&gt;</code>).
0882:             *                Only documents with at least one link eligble for
0883:             *                checking for brokenness will be added.
0884:             * @return the total number of documents returned by the DASL.
0885:             */
0886:            private int retrieveFindLinksDaslResults(SearchMethod searchMethod,
0887:                    Map documentsWithLinks) {
0888:                int result = 0;
0889:
0890:                Enumeration responseUrlsEnum = searchMethod
0891:                        .getAllResponseURLs();
0892:                while (responseUrlsEnum.hasMoreElements()) {
0893:                    String documentUrl = (String) responseUrlsEnum
0894:                            .nextElement();
0895:
0896:                    result += 1;
0897:
0898:                    Enumeration propertiesEnum = searchMethod
0899:                            .getResponseProperties(documentUrl);
0900:                    while (propertiesEnum.hasMoreElements()) {
0901:                        Property property = (Property) propertiesEnum
0902:                                .nextElement();
0903:
0904:                        if (isUsableLinksProperty(property)) {
0905:                            String documentRelativeUrl = getDocumentResultUrl(documentUrl);
0906:
0907:                            Set allLinks = parseLinks(property
0908:                                    .getPropertyAsString());
0909:                            Set linksEligibleForCheckingForBrokenness = getLinksEligibleForCheckingForBrokenness(allLinks);
0910:
0911:                            if (!linksEligibleForCheckingForBrokenness
0912:                                    .isEmpty()) {
0913:                                documentsWithLinks.put(documentRelativeUrl,
0914:                                        linksEligibleForCheckingForBrokenness);
0915:                            }
0916:                        }
0917:                    }
0918:                }
0919:
0920:                return result;
0921:            }
0922:
0923:            /**
0924:             * <p>
0925:             * Get the URL of a document which should be used in the result. This is
0926:             * done by making the document URL relative to the base URL. If the
0927:             * document is not part of the tree to which the base URL points, then
0928:             * the unmodified document URL is returned.
0929:             * </p>
0930:             * 
0931:             * @param documentUrl
0932:             *                the document URL from which to derive the result URL.
0933:             * @return the URL that should be used for the document in the result.
0934:             */
0935:            private String getDocumentResultUrl(String documentUrl) {
0936:                String result;
0937:
0938:                if (documentUrl.startsWith(getDocumentsBaseUrl())) {
0939:                    int baseUrlLength = getDocumentsBaseUrl().length();
0940:                    result = documentUrl.substring(baseUrlLength);
0941:                } else {
0942:                    result = documentUrl;
0943:                }
0944:
0945:                return result;
0946:            }
0947:
0948:            /**
0949:             * <p>
0950:             * Parse the string containg a space-separated list of links and return
0951:             * a set containing the links in the string.
0952:             * </p>
0953:             * 
0954:             * @param spaceSeparatedLinks
0955:             *                the space-separated list of links to parse.
0956:             * @return the set containing the links in the string.
0957:             */
0958:            private Set parseLinks(String spaceSeparatedLinks) {
0959:                Set result = new HashSet();
0960:
0961:                StringTokenizer linksTokenizer = new StringTokenizer(
0962:                        spaceSeparatedLinks, LINK_SEPARATORS);
0963:                while (linksTokenizer.hasMoreTokens()) {
0964:                    String link = linksTokenizer.nextToken();
0965:
0966:                    result.add(link);
0967:                }
0968:
0969:                return result;
0970:            }
0971:
0972:            /**
0973:             * <p>
0974:             * Determine which links are eligible for checking for brokenness.
0975:             * </p>
0976:             * 
0977:             * @param the
0978:             *                set of links to check for eligibility.
0979:             * @return the set of links eligible for checking for brokenness. If no
0980:             *         links are eligible and empty set is returned.
0981:             */
0982:            private Set getLinksEligibleForCheckingForBrokenness(Set links) {
0983:                Set result = new HashSet();
0984:
0985:                Iterator linksIterator = links.iterator();
0986:                while (linksIterator.hasNext()) {
0987:                    String link = (String) linksIterator.next();
0988:
0989:                    if (isLinkEligibleForCheckingForBrokenness(link)) {
0990:                        result.add(link);
0991:                    }
0992:                }
0993:
0994:                return result;
0995:            }
0996:
0997:            /**
0998:             * <p>
0999:             * Determine whether or not a link is eligible for checking for
1000:             * brokenness.
1001:             * </p>
1002:             * 
1003:             * @param link
1004:             *                the link to check.
1005:             * @return <code>true</code> if the link should be checked for
1006:             *         brokenness, <code>false</code> otherwise.
1007:             */
1008:            private boolean isLinkEligibleForCheckingForBrokenness(String link) {
1009:                boolean result;
1010:
1011:                result = LinkClassifier.isHttpLink(link)
1012:                        || LinkClassifier.isHttpsLink(link)
1013:                        || (LinkClassifier.isInternalLink(link) && !shouldInternalLinkBeIgnored(link));
1014:
1015:                return result;
1016:            }
1017:
1018:            /**
1019:             * <p>
1020:             * Determine if an internal link should be ignored.
1021:             * </p>
1022:             * 
1023:             * @param link
1024:             *                the link of which to determine whether it should be
1025:             *                ignored.
1026:             * @return <code>true</code> if the link should be ignored,
1027:             *         <code>false</code> otherwise.
1028:             */
1029:            private boolean shouldInternalLinkBeIgnored(String link) {
1030:                boolean result = false;
1031:
1032:                Iterator prefixesIterator = internalUrlPrefixesToIgnoreIterator();
1033:                while (!result && prefixesIterator.hasNext()) {
1034:                    String prefix = (String) prefixesIterator.next();
1035:
1036:                    result = link.startsWith(prefix);
1037:                }
1038:
1039:                return result;
1040:            }
1041:
1042:            /**
1043:             * <p>
1044:             * Return whether or not a WebDAV property is a property containing
1045:             * links and if it is usable.
1046:             * </p>
1047:             * 
1048:             * @param property
1049:             *                the property to check.
1050:             * @return <code>true</code> if the property is the property
1051:             *         containing links and it is usable, <code>false</code>.
1052:             */
1053:            private boolean isUsableLinksProperty(Property property) {
1054:                return property != null
1055:                        && property.getNamespaceURI().equals(
1056:                                HIPPO_CMS_NAMESPACE_URI)
1057:                        && property.getLocalName().equals(LINKS_PROPERTY_NAME)
1058:                        && 0 < property.getPropertyAsString().length();
1059:            }
1060:
1061:            /**
1062:             * <p>
1063:             * Store the result of the broken link checks in a document in the
1064:             * WebDAV repository.
1065:             * </p>
1066:             */
1067:            private void storeResult() {
1068:                BrokenLinksToXmlDocumentInRepositoryWriter resultWriter = new BrokenLinksToXmlDocumentInRepositoryWriter(
1069:                        documentsWithBrokenLinks, brokenLinks,
1070:                        getResultDocumentUrl(), httpClient, getLog());
1071:                resultWriter.writeResult();
1072:            }
1073:
1074:            /**
1075:             * <p>
1076:             * Create the HTTP client to use for executing the DASLs based on
1077:             * information passed in through the configuration.
1078:             * </p>
1079:             */
1080:            private void createHttpClient() {
1081:                HttpState httpState = createHttpState();
1082:
1083:                MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
1084:
1085:                httpClient = new HttpClient(connectionManager);
1086:                httpClient.setState(httpState);
1087:            }
1088:
1089:            /**
1090:             * <p>
1091:             * Create the HTTP client to use for checking the links based on
1092:             * information passed in through the configuration.
1093:             * </p>
1094:             * 
1095:             * @return the HTTP client to use for checking the links.
1096:             */
1097:            private HttpClient createHttpClientToUseForCheckingLinks() {
1098:                HttpClient result;
1099:
1100:                HttpState httpState = createHttpState();
1101:
1102:                MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
1103:
1104:                result = new HttpClient(connectionManager);
1105:                int timeoutMillis = getLinkCheckTimeoutSeconds()
1106:                        * NUMBER_OF_MILLIS_IN_A_SECOND;
1107:                result.setConnectionTimeout(timeoutMillis);
1108:                result.setTimeout(timeoutMillis);
1109:                result.setState(httpState);
1110:
1111:                return result;
1112:            }
1113:
1114:            /**
1115:             * <p>
1116:             * Create the HTTP state based on information passed in through the
1117:             * configuration.
1118:             * </p>
1119:             * 
1120:             * @return the HTTP state.
1121:             */
1122:            private HttpState createHttpState() {
1123:                HttpState httpState = new HttpState();
1124:
1125:                httpState.setAuthenticationPreemptive(true);
1126:
1127:                String hostname = determineHostnameOfRepository();
1128:                Credentials credentials = new UsernamePasswordCredentials(
1129:                        getRepositoryUsername(), getRepositoryPassword());
1130:                httpState.setCredentials(null, hostname, credentials);
1131:
1132:                return httpState;
1133:            }
1134:
1135:            /**
1136:             * <p>
1137:             * Determine the hostname of the repository. The hostname will be
1138:             * determined from the result document URL.
1139:             * </p>
1140:             * 
1141:             * @return the hostname of the repository.
1142:             */
1143:            private String determineHostnameOfRepository() {
1144:                URL resultDocumentUrl;
1145:                try {
1146:                    resultDocumentUrl = new URL(getResultDocumentUrl());
1147:                } catch (MalformedURLException e) {
1148:                    throw new IllegalStateException(
1149:                            "The result document URL is not valid: "
1150:                                    + getResultDocumentUrl());
1151:                }
1152:                return resultDocumentUrl.getHost();
1153:            }
1154:
1155:            /**
1156:             * <p>
1157:             * Generate a DASL to retrieve a set of documents containing links. The
1158:             * DASL is returned as a byte array containing the UTF-8 representation
1159:             * of the XML of the DASL.
1160:             * </p>
1161:             * 
1162:             * @return a byte array representation of the XML of the DASL.
1163:             * @throws IOException
1164:             *                 if an I/O error occurs.
1165:             */
1166:            private byte[] generateFindLinksDasl(int offset) {
1167:                byte[] result;
1168:
1169:                try {
1170:                    String findLinksDasl = generateFindLinksDaslAsString(offset);
1171:
1172:                    ByteArrayOutputStream findLinksDaslOutput = new ByteArrayOutputStream();
1173:                    try {
1174:                        OutputStreamWriter findLinksDaslWriter = createUtf8Writer(findLinksDaslOutput);
1175:                        try {
1176:                            findLinksDaslWriter.write(findLinksDasl);
1177:                        } finally {
1178:                            WriterCleanup.close(findLinksDaslWriter,
1179:                                    "find links DASL", getLog());
1180:                        }
1181:                    } finally {
1182:                        StreamCleanup.close(findLinksDaslOutput,
1183:                                "find links DASL", getLog());
1184:                    }
1185:
1186:                    result = findLinksDaslOutput.toByteArray();
1187:                } catch (IOException e) {
1188:                    throw new IllegalStateException(
1189:                            "Unable to generate the find links DASL: " + e);
1190:                }
1191:
1192:                return result;
1193:            }
1194:
1195:            /**
1196:             * <p>
1197:             * Generate a DASL to retrieve a set of documents containing links. The
1198:             * DASL is returned as String representation of the XML of the DASL.
1199:             * </p>
1200:             * 
1201:             * @return a String representation of the XML of the DASL.
1202:             * @throws IOException
1203:             *                 if an I/O error occurs.
1204:             */
1205:            private String generateFindLinksDaslAsString(int offset)
1206:                    throws IOException {
1207:                String result = loadFindLinksDaslTemplate();
1208:
1209:                result = result.replaceAll(RESULTS_OFFSET_TAG, String
1210:                        .valueOf(offset));
1211:                result = result.replaceAll(NUMBER_OF_RESULTS_LIMIT_TAG, String
1212:                        .valueOf(getDocumentBatchSize()));
1213:
1214:                return result;
1215:            }
1216:
1217:            /**
1218:             * <p>
1219:             * Load the find links DASL template.
1220:             * </p>
1221:             * 
1222:             * @return the find links DASL template.
1223:             * @throws IOException
1224:             *                 if an I/O error occurs.
1225:             */
1226:            private String loadFindLinksDaslTemplate() throws IOException {
1227:                String result;
1228:
1229:                InputStream findLinksDaslTemplateInput = getClass()
1230:                        .getResourceAsStream(
1231:                                FIND_LINKS_DASL_TEMPLATE_RESOURCE_NAME);
1232:                try {
1233:                    InputStreamReader findLinksDaslTemplateReader = createUtf8Reader(findLinksDaslTemplateInput);
1234:                    try {
1235:                        BufferedReader bufferedFindLinksDaslTemplateReader = new BufferedReader(
1236:                                findLinksDaslTemplateReader);
1237:                        try {
1238:                            StringBuffer findLinksDaslTemplateBuffer = new StringBuffer(
1239:                                    2000);
1240:                            String line = bufferedFindLinksDaslTemplateReader
1241:                                    .readLine();
1242:                            while (line != null) {
1243:                                findLinksDaslTemplateBuffer.append(line);
1244:
1245:                                line = bufferedFindLinksDaslTemplateReader
1246:                                        .readLine();
1247:                            }
1248:
1249:                            result = findLinksDaslTemplateBuffer.toString();
1250:                        } finally {
1251:                            ReaderCleanup.close(
1252:                                    bufferedFindLinksDaslTemplateReader,
1253:                                    "buffered find links DASL template",
1254:                                    getLog());
1255:                        }
1256:                    } finally {
1257:                        ReaderCleanup.close(findLinksDaslTemplateReader,
1258:                                "find links DASL template", getLog());
1259:                    }
1260:                } finally {
1261:                    StreamCleanup.close(findLinksDaslTemplateInput,
1262:                            "find links DASL template", getLog());
1263:                }
1264:
1265:                return result;
1266:            }
1267:
1268:            /**
1269:             * <p>
1270:             * Create an input stream reader that uses the UTF-8 encoding to convert
1271:             * bytes to characters.
1272:             * </p>
1273:             * 
1274:             * @param input
1275:             *                the input stream to wrap.
1276:             * @return a reader reading the input stream as a UTF-8 encoded string.
1277:             */
1278:            private InputStreamReader createUtf8Reader(InputStream input) {
1279:                InputStreamReader findLinksDaslTemplateReader;
1280:                try {
1281:                    findLinksDaslTemplateReader = new InputStreamReader(input,
1282:                            UTF_8_ENCODING_NAME);
1283:                } catch (UnsupportedEncodingException e) {
1284:                    throw new IllegalStateException("JVM must support UTF-8: "
1285:                            + e);
1286:                }
1287:                return findLinksDaslTemplateReader;
1288:            }
1289:
1290:            /**
1291:             * <p>
1292:             * Create an output stream writer that uses the UTF-8 encoding to
1293:             * convert characters to bytes.
1294:             * </p>
1295:             * 
1296:             * @param output
1297:             *                the output stream to wrap.
1298:             * @return a writer writing characters to the output stream as UTF-8
1299:             *         encoded bytes.
1300:             */
1301:            private OutputStreamWriter createUtf8Writer(OutputStream output) {
1302:                OutputStreamWriter result;
1303:
1304:                try {
1305:                    result = new OutputStreamWriter(output, UTF_8_ENCODING_NAME);
1306:                } catch (UnsupportedEncodingException e) {
1307:                    throw new IllegalStateException("JVM must support UTF-8: "
1308:                            + e);
1309:                }
1310:
1311:                return result;
1312:            }
1313:
1314:            /**
1315:             * <p>
1316:             * Get the document tree to check root URL from the configuration.
1317:             * </p>
1318:             * 
1319:             * @return the repository username.
1320:             */
1321:            private String getDocumentTreeToCheckRootUrl() {
1322:                return configuration.getDocumentTreeToCheckRootUrl();
1323:            }
1324:
1325:            /**
1326:             * <p>
1327:             * Get the base URL to which the URLs of the documents must be relative
1328:             * from the configuration.
1329:             * </p>
1330:             * 
1331:             * @return the base URL to which the URLs of the documents must be
1332:             *         relative.
1333:             */
1334:            private String getDocumentsBaseUrl() {
1335:                return configuration.getDocumentsBaseUrl();
1336:            }
1337:
1338:            /**
1339:             * <p>
1340:             * Get the iterator over the prefixes of internal links to ignore from
1341:             * the configuration.
1342:             * </p>
1343:             * 
1344:             * @return an iterator over the prefixes to ignore.
1345:             */
1346:            private Iterator internalUrlPrefixesToIgnoreIterator() {
1347:                return configuration.internalUrlPrefixesToIgnoreIterator();
1348:            }
1349:
1350:            /**
1351:             * <p>
1352:             * Get the base URL for internal links from the configuration.
1353:             * </p>
1354:             * 
1355:             * @return the base URL for internal links.
1356:             */
1357:            private String getInternalLinksBaseUrl() {
1358:                return configuration.getInternalLinksBaseUrl();
1359:            }
1360:
1361:            /**
1362:             * <p>
1363:             * Get the repository username from the configuration.
1364:             * </p>
1365:             * 
1366:             * @return the repository username.
1367:             */
1368:            private String getRepositoryUsername() {
1369:                return configuration.getRepositoryUsername();
1370:            }
1371:
1372:            /**
1373:             * <p>
1374:             * Get the repository password from the configuration.
1375:             * </p>
1376:             * 
1377:             * @return the repository password.
1378:             */
1379:            private String getRepositoryPassword() {
1380:                return configuration.getRepositoryPassword();
1381:            }
1382:
1383:            /**
1384:             * <p>
1385:             * Get the result document URL from the configuration.
1386:             * </p>
1387:             * 
1388:             * @return the result document URL.
1389:             */
1390:            private String getResultDocumentUrl() {
1391:                return configuration.getResultDocumentUrl();
1392:            }
1393:
1394:            /**
1395:             * <p>
1396:             * Get the document batch size from the configuration.
1397:             * </p>
1398:             * 
1399:             * @return the document batch size.
1400:             */
1401:            private int getDocumentBatchSize() {
1402:                return configuration.getDocumentBatchSize();
1403:            }
1404:
1405:            /**
1406:             * <p>
1407:             * Get the number of threads to use for checking links from the
1408:             * configuration.
1409:             * </p>
1410:             * 
1411:             * @return the number of threads to use for checking links.
1412:             */
1413:            private int getNumberOfLinkCheckingThreads() {
1414:                return configuration.getNumberOfLinkCheckingThreads();
1415:            }
1416:
1417:            /**
1418:             * <p>
1419:             * Get the maximum number of seconds to wait for a response when
1420:             * checking a link from the configuration.
1421:             * </p>
1422:             * 
1423:             * @return the maximum number of seconds to wait for a response.
1424:             */
1425:            private int getLinkCheckTimeoutSeconds() {
1426:                return configuration.getLinkCheckTimeoutSeconds();
1427:            }
1428:
1429:            /**
1430:             * <p>
1431:             * Get the log from the configuration.
1432:             * </p>
1433:             * 
1434:             * @return the log.
1435:             */
1436:            private BrokenLinkCheckerLog getLog() {
1437:                return configuration.getLog();
1438:            }
1439:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.