001: /*
002: * Copyright (c) 2004-2005, Hewlett-Packard Company and Massachusetts
003: * Institute of Technology. All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are
007: * met:
008: *
009: * - Redistributions of source code must retain the above copyright
010: * notice, this list of conditions and the following disclaimer.
011: *
012: * - Redistributions in binary form must reproduce the above copyright
013: * notice, this list of conditions and the following disclaimer in the
014: * documentation and/or other materials provided with the distribution.
015: *
016: * - Neither the name of the Hewlett-Packard Company nor the name of the
017: * Massachusetts Institute of Technology nor the names of their
018: * contributors may be used to endorse or promote products derived from
019: * this software without specific prior written permission.
020: *
021: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
022: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
023: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
024: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
025: * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
026: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
027: * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
028: * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
029: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
030: * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
031: * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
032: * DAMAGE.
033: */
034: package org.dspace.checker;
035:
036: import java.io.File;
037: import java.io.IOException;
038: import java.io.InputStream;
039: import java.security.DigestInputStream;
040: import java.security.MessageDigest;
041: import java.security.NoSuchAlgorithmException;
042: import java.sql.SQLException;
043: import java.util.Date;
044:
045: import org.apache.log4j.Logger;
046: import org.apache.log4j.PropertyConfigurator;
047: import org.dspace.core.ConfigurationManager;
048: import org.dspace.core.Utils;
049:
050: /**
051: * <p>
052: * Main class for the checksum checker tool, which calculates checksums for each
053: * bitstream whose ID is in the most_recent_checksum table, and compares it
054: * against the last calculated checksum for that bitstream.
055: * </p>
056: *
057: * @author Jim Downing
058: * @author Grace Carpenter
059: * @author Nathan Sarr
060: *
061: *
062: * @todo the accessor methods are currently unused - are they useful?
063: * @todo check for any existing resource problems
064: */
065: public final class CheckerCommand {
066: /** Usual Log4J logger. */
067: private static final Logger LOG = Logger
068: .getLogger(CheckerCommand.class);
069:
070: /** Default digest algorithm (MD5). */
071: private static final String DEFAULT_DIGEST_ALGORITHM = "MD5";
072:
073: /** 4 Meg byte array for reading file. */
074: private int BYTE_ARRAY_SIZE = 4 * 1024;
075:
076: /** BitstreamInfoDAO dependency. */
077: private BitstreamInfoDAO bitstreamInfoDAO = null;
078:
079: /** BitstreamDAO dependency. */
080: private BitstreamDAO bitstreamDAO = null;
081:
082: /**
083: * Checksum history Data access object
084: */
085: private ChecksumHistoryDAO checksumHistoryDAO = null;
086:
087: /** start time for current process. */
088: private Date processStartDate = null;
089:
090: /**
091: * Dispatcher to be used for processing run.
092: */
093: private BitstreamDispatcher dispatcher = null;
094:
095: /**
096: * Container/logger with details about each bitstream and checksum results.
097: */
098: private ChecksumResultsCollector collector = null;
099:
100: /** Report all processing */
101: private boolean reportVerbose = false;
102:
103: /**
104: * Default constructor uses DSpace plugin manager to construct dependencies.
105: */
106: public CheckerCommand() {
107: bitstreamInfoDAO = new BitstreamInfoDAO();
108: bitstreamDAO = new BitstreamDAO();
109: checksumHistoryDAO = new ChecksumHistoryDAO();
110: }
111:
112: /**
113: * <p>
114: * Uses the options set up on this checker to determine a mode of execution,
115: * and then accepts bitstream ids from the dispatcher and checks their
116: * bitstreams against the db records.
117: * </p>
118: *
119: * <p>
120: * N.B. a valid BitstreamDispatcher must be provided using
121: * setBitstreamDispatcher before calling this method
122: * </p>
123: */
124: public void process() {
125: LOG.debug("Begin Checker Processing");
126:
127: if (dispatcher == null) {
128: throw new IllegalStateException(
129: "No BitstreamDispatcher provided");
130: }
131:
132: if (collector == null) {
133: collector = new ResultsLogger(processStartDate);
134: }
135:
136: // update missing bitstreams that were entered into the
137: // bitstream table - this always done.
138: bitstreamInfoDAO.updateMissingBitstreams();
139:
140: int id = dispatcher.next();
141:
142: while (id != BitstreamDispatcher.SENTINEL) {
143: LOG.debug("Processing bitstream id = " + id);
144: BitstreamInfo info = checkBitstream(id);
145:
146: if (reportVerbose
147: || (info.getChecksumCheckResult() != ChecksumCheckResults.CHECKSUM_MATCH)) {
148: collector.collect(info);
149: }
150:
151: id = dispatcher.next();
152: }
153: }
154:
155: /**
156: * Check a specified bitstream.
157: *
158: * @param id
159: * the bitstream id
160: *
161: * @return the information about the bitstream and its checksum data
162: */
163: private BitstreamInfo checkBitstream(final int id) {
164: // get bitstream info from bitstream table
165: BitstreamInfo info = bitstreamInfoDAO.findByBitstreamId(id);
166:
167: // requested id was not found in bitstream
168: // or most_recent_checksum table
169: if (info == null) {
170: // Note: this case should only occur if id is requested at
171: // command line, since ref integrity checks should
172: // prevent id from appearing in most_recent_checksum
173: // but not bitstream table, or vice versa
174: info = new BitstreamInfo(id);
175: processNullInfoBitstream(info);
176: } else if (!info.getToBeProcessed()) {
177: // most_recent_checksum.to_be_processed is marked
178: // 'false' for this bitstream id.
179: // Do not do any db updates
180: info
181: .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_NOT_PROCESSED);
182: } else if (info.getDeleted()) {
183: // bitstream id is marked 'deleted' in bitstream table.
184: processDeletedBitstream(info);
185: } else {
186: processBitstream(info);
187: }
188:
189: return info;
190: }
191:
192: /**
193: * Digest the stream and get the checksum value.
194: *
195: * @param stream
196: * InputStream to digest.
197: * @param algorithm
198: * the algorithm to use when digesting.
199: * @todo Document the algorithm parameter
200: * @return digest
201: *
202: * @throws java.security.NoSuchAlgorithmException
203: * if the requested algorithm is not provided by the system
204: * security provider.
205: * @throws java.io.IOException
206: * If an exception arises whilst reading the stream
207: */
208: private String digestStream(InputStream stream, String algorithm)
209: throws java.security.NoSuchAlgorithmException,
210: java.io.IOException {
211: // create the digest stream
212: DigestInputStream dStream = new DigestInputStream(stream,
213: MessageDigest.getInstance(algorithm));
214:
215: byte[] bytes = new byte[BYTE_ARRAY_SIZE];
216:
217: // make sure all the data is read by the digester
218: while (dStream.read(bytes, 0, BYTE_ARRAY_SIZE) != -1) {
219: // no-op
220: }
221:
222: return Utils.toHex(dStream.getMessageDigest().digest());
223: }
224:
225: /**
226: * Compares two checksums.
227: *
228: * @param checksumA
229: * the first checksum
230: * @param checksumB
231: * the second checksum
232: *
233: * @return a result code (constants defined in Util)
234: */
235: private String compareChecksums(String checksumA, String checksumB) {
236: String result = ChecksumCheckResults.CHECKSUM_NO_MATCH;
237:
238: if ((checksumA == null) || (checksumB == null)) {
239: result = ChecksumCheckResults.CHECKSUM_PREV_NOT_FOUND;
240: } else if (checksumA.equals(checksumB)) {
241: result = ChecksumCheckResults.CHECKSUM_MATCH;
242: }
243:
244: return result;
245: }
246:
247: /**
248: * Process bitstream that was marked 'deleted' in bitstream table. A deleted
249: * bitstream should only be checked once afterwards it should be marked
250: * 'to_be_processed=false'. Note that to_be_processed must be manually
251: * updated in db to allow for future processing.
252: *
253: * @param info
254: * a deleted bitstream.
255: */
256: private void processDeletedBitstream(BitstreamInfo info) {
257: info.setProcessStartDate(new Date());
258: info
259: .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_MARKED_DELETED);
260: info.setProcessStartDate(new Date());
261: info.setProcessEndDate(new Date());
262: info.setToBeProcessed(false);
263: bitstreamInfoDAO.update(info);
264: checksumHistoryDAO.insertHistory(info);
265: }
266:
267: /**
268: * Process bitstream whose ID was not found in most_recent_checksum or
269: * bitstream table. No updates can be done. The missing bitstream is output
270: * to the log file.
271: *
272: * @param info
273: * A not found BitStreamInfo
274: * @todo is this method required?
275: */
276: private void processNullInfoBitstream(BitstreamInfo info) {
277: info.setInfoFound(false);
278: info.setProcessStartDate(new Date());
279: info.setProcessEndDate(new Date());
280: info
281: .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_INFO_NOT_FOUND);
282: }
283:
284: /**
285: * <p>
286: * Process general case bistream.
287: * </p>
288: *
289: * <p>
290: * Note: bitstream will have timestamp indicating it was "checked", even if
291: * actual checksumming never took place.
292: * </p>
293: *
294: * @todo Why does bitstream have a timestamp indicating it's checked if
295: * checksumming doesn't occur?
296: *
297: * @param info
298: * BitstreamInfo to handle
299: */
300: private void processBitstream(BitstreamInfo info) {
301: info.setProcessStartDate(new Date());
302:
303: if (info.getChecksumAlgorithm() == null) {
304: info.setChecksumAlgorithm(DEFAULT_DIGEST_ALGORITHM);
305: }
306:
307: try {
308: InputStream bitstream = bitstreamDAO.getBitstream(info
309: .getBitstreamId());
310:
311: info.setBitstreamFound(true);
312:
313: String checksum = digestStream(bitstream, info
314: .getChecksumAlgorithm());
315:
316: info.setCalculatedChecksum(checksum);
317:
318: // compare new checksum to previous checksum
319: info
320: .setChecksumCheckResult(compareChecksums(info
321: .getStoredChecksum(), info
322: .getCalculatedChecksum()));
323: } catch (IOException e) {
324: // bitstream located, but file missing from asset store
325: info
326: .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_NOT_FOUND);
327: info.setToBeProcessed(false);
328: LOG.error(
329: "Error retrieving bitstream ID "
330: + info.getBitstreamId() + " from "
331: + "asset store.", e);
332: } catch (SQLException e) {
333: // ??this code only executes if an sql
334: // exception occurs in *DSpace* code, probably
335: // indicating a general db problem?
336: info
337: .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_INFO_NOT_FOUND);
338: LOG.error("Error retrieving metadata for bitstream ID "
339: + info.getBitstreamId(), e);
340: } catch (NoSuchAlgorithmException e) {
341: info
342: .setChecksumCheckResult(ChecksumCheckResults.CHECKSUM_ALGORITHM_INVALID);
343: info.setToBeProcessed(false);
344: LOG.error("Invalid digest algorithm type for bitstream ID"
345: + info.getBitstreamId(), e);
346: } finally {
347: info.setProcessEndDate(new Date());
348:
349: // record new checksum and comparison result in db
350: bitstreamInfoDAO.update(info);
351: checksumHistoryDAO.insertHistory(info);
352: }
353: }
354:
355: /**
356: * Get dispatcher being used by this run of the checker.
357: *
358: * @return the dispatcher being used by this run.
359: */
360: public BitstreamDispatcher getDispatcher() {
361: return dispatcher;
362: }
363:
364: /**
365: * Set the dispatcher to be used by this run of the checker.
366: *
367: * @param dispatcher
368: * Dispatcher to use.
369: */
370: public void setDispatcher(BitstreamDispatcher dispatcher) {
371: this .dispatcher = dispatcher;
372: }
373:
374: /**
375: * Get the collector that holds/logs the results for this process run.
376: *
377: * @return The ChecksumResultsCollecter being used.
378: */
379: public ChecksumResultsCollector getCollector() {
380: return collector;
381: }
382:
383: /**
384: * Set the collector that holds/logs the results for this process run.
385: *
386: * @param collector
387: * the collector to be used for this run
388: */
389: public void setCollector(ChecksumResultsCollector collector) {
390: this .collector = collector;
391: }
392:
393: /**
394: * Get time at which checker process began.
395: *
396: * @return start time
397: */
398: public Date getProcessStartDate() {
399: return processStartDate;
400: }
401:
402: /**
403: * Set time at which checker process began.
404: *
405: * @param startDate
406: * start time
407: */
408: public void setProcessStartDate(Date startDate) {
409: processStartDate = startDate;
410: }
411:
412: /**
413: * Determine if any errors are reported
414: *
415: * @return true if only errors reported
416: */
417: public boolean isReportVerbose() {
418: return reportVerbose;
419: }
420:
421: /**
422: * Set report errors only
423: *
424: * @param reportErrorsOnly
425: * true to report only errors in the logs.
426: */
427: public void setReportVerbose(boolean reportVerbose) {
428: this.reportVerbose = reportVerbose;
429: }
430: }
|