001: /*
002: * ContainerAdapter.java
003: *
004: * Version: $Revision: 1.6 $
005: *
006: * Date: $Date: 2006/05/02 01:24:11 $
007: *
008: * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
009: * Institute of Technology. All rights reserved.
010: *
011: * Redistribution and use in source and binary forms, with or without
012: * modification, are permitted provided that the following conditions are
013: * met:
014: *
015: * - Redistributions of source code must retain the above copyright
016: * notice, this list of conditions and the following disclaimer.
017: *
018: * - Redistributions in binary form must reproduce the above copyright
019: * notice, this list of conditions and the following disclaimer in the
020: * documentation and/or other materials provided with the distribution.
021: *
022: * - Neither the name of the Hewlett-Packard Company nor the name of the
023: * Massachusetts Institute of Technology nor the names of their
024: * contributors may be used to endorse or promote products derived from
025: * this software without specific prior written permission.
026: *
027: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
028: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
029: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
030: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
031: * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
032: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
033: * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
034: * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
035: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
036: * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
037: * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
038: * DAMAGE.
039: */
040:
041: package org.dspace.app.xmlui.objectmanager;
042:
043: import java.io.ByteArrayInputStream;
044: import java.io.IOException;
045: import java.sql.SQLException;
046:
047: import org.dspace.app.xmlui.wing.AttributeMap;
048: import org.dspace.app.xmlui.wing.WingException;
049: import org.dspace.authorize.AuthorizeException;
050: import org.dspace.content.Bitstream;
051: import org.dspace.content.Collection;
052: import org.dspace.content.Community;
053: import org.dspace.content.DSpaceObject;
054: import org.dspace.content.crosswalk.CrosswalkException;
055: import org.dspace.content.crosswalk.DisseminationCrosswalk;
056: import org.dspace.core.Constants;
057: import org.jdom.Document;
058: import org.jdom.Element;
059: import org.jdom.JDOMException;
060: import org.jdom.input.SAXBuilder;
061: import org.jdom.output.SAXOutputter;
062: import org.xml.sax.SAXException;
063:
064: /**
065: * This is an adapter which translates DSpace containers
066: * (communities & collections) into METS documents. This adapter follows
067: * the DSpace METS profile however that profile does not define how a
068: * community or collection should be described, but we make the obvious
069: * decisions to deviate when nessasary from the profile.
070: *
071: * The METS document consists of three parts: descriptive metadata section,
072: * file section, and a structural map. The descriptive metadata sections holds
073: * metadata about the item being adapted using DSpace crosswalks. This is the
074: * same way the item adapter works.
075: *
076: * However the file section and structural map are a bit different. In these
077: * casses the the only files listed is the one logo that may be attached to
078: * a community or collection.
079: *
080: * @author Scott Phillips
081: */
082: public class ContainerAdapter extends AbstractAdapter {
083:
084: /** The community or collection this adapter represents. */
085: private DSpaceObject dso;
086:
087: /** A space seperated list of descriptive metadata sections */
088: private StringBuffer dmdSecIDS;
089:
090: /**
091: * Construct a new CommunityCollectionMETSAdapter.
092: *
093: * @param dso
094: * A DSpace Community or Collection to adapt.
095: * @param contextPath
096: * The contextPath of this webapplication.
097: */
098: public ContainerAdapter(DSpaceObject dso, String contextPath) {
099: super (contextPath);
100: this .dso = dso;
101: }
102:
103: /** Return the container, community or collection, object */
104: public DSpaceObject getContainer() {
105: return this .dso;
106: }
107:
108: /**
109: *
110: *
111: *
112: * Required abstract methods
113: *
114: *
115: *
116: */
117:
118: /**
119: * Return the URL of this community/collection in the interface
120: */
121: protected String getMETSOBJID() {
122: if (dso.getHandle() != null)
123: return contextPath + "/handle/" + dso.getHandle();
124: return null;
125: }
126:
127: /**
128: * @return Return the URL for editing this item
129: */
130: protected String getMETSOBJEDIT() {
131: return null;
132: }
133:
134: /**
135: * Use the handle as the id for this METS document
136: */
137: protected String getMETSID() {
138: if (dso.getHandle() == null) {
139: if (dso instanceof Collection)
140: return "collection:" + dso.getID();
141: else
142: return "community:" + dso.getID();
143: } else
144: return "hdl:" + dso.getHandle();
145: }
146:
147: /**
148: * Return the profile to use for communities and collections.
149: *
150: */
151: protected String getMETSProfile() throws WingException {
152: return "DSPACE METS SIP Profile 1.0";
153: }
154:
155: /**
156: * Return a friendly label for the METS document to say we are a community
157: * or collection.
158: */
159: protected String getMETSLabel() {
160: if (dso instanceof Community)
161: return "DSpace Community";
162: else
163: return "DSpace Collection";
164: }
165:
166: /**
167: * Return a unique id for the given bitstream
168: */
169: protected String getFileID(Bitstream bitstream) {
170: return "file_" + bitstream.getID();
171: }
172:
173: /**
174: * Return a group id for the given bitstream
175: */
176: protected String getGroupFileID(Bitstream bitstream) {
177: return "group_file_" + bitstream.getID();
178: }
179:
180: /**
181: *
182: *
183: *
184: * METS structural methods
185: *
186: *
187: *
188: */
189:
190: /**
191: * Render the METS descriptive section. This will create a new metadata
192: * section for each crosswalk configured.
193: *
194: * Example:
195: * <dmdSec>
196: * <mdWrap MDTYPE="MODS">
197: * <xmlData>
198: * ... content from the crosswalk ...
199: * </xmlDate>
200: * </mdWrap>
201: * </dmdSec
202: */
203: protected void renderDescriptiveSection() throws WingException,
204: SAXException, CrosswalkException, IOException, SQLException {
205: AttributeMap attributes;
206:
207: String groupID = getGenericID("group_dmd_");
208: dmdSecIDS = new StringBuffer();
209:
210: // Add DIM descriptive metadata if it was requested or if no metadata types
211: // were specified. Further more since this is the default type we also use a
212: // faster rendering method that the crosswalk API.
213: if (dmdTypes.size() == 0 || dmdTypes.contains("DIM")) {
214: // Metadata element's ID
215: String dmdID = getGenericID("dmd_");
216:
217: // Keep track of all descriptive sections
218: dmdSecIDS.append(dmdID);
219:
220: // ////////////////////////////////
221: // Start a new dmdSec for each crosswalk.
222: attributes = new AttributeMap();
223: attributes.put("ID", dmdID);
224: attributes.put("GROUPID", groupID);
225: startElement(METS, "dmdSec", attributes);
226:
227: // ////////////////////////////////
228: // Start metadata wrapper
229: attributes = new AttributeMap();
230: attributes.put("MDTYPE", "OTHER");
231: attributes.put("OTHERMDTYPE", "DIM");
232: startElement(METS, "mdWrap", attributes);
233:
234: // ////////////////////////////////
235: // Start the xml data
236: startElement(METS, "xmlData");
237:
238: // ///////////////////////////////
239: // Start the DIM element
240: attributes = new AttributeMap();
241: attributes.put("dspaceType", Constants.typeText[dso
242: .getType()]);
243: startElement(DIM, "dim", attributes);
244:
245: // Add each field for this collection
246: if (dso.getType() == Constants.COLLECTION) {
247: Collection collection = (Collection) dso;
248:
249: String description = collection
250: .getMetadata("introductory_text");
251: String description_abstract = collection
252: .getMetadata("short_description");
253: String description_table = collection
254: .getMetadata("side_bar_text");
255: String identifier_uri = "http://hdl.handle.net/"
256: + collection.getHandle();
257: String provenance = collection
258: .getMetadata("provenance_description");
259: String rights = collection
260: .getMetadata("copyright_text");
261: String rights_license = collection
262: .getMetadata("license");
263: String title = collection.getMetadata("name");
264:
265: createField("dc", "description", null, null,
266: description);
267: createField("dc", "description", "abstract", null,
268: description_abstract);
269: createField("dc", "description", "tableofcontents",
270: null, description_table);
271: createField("dc", "identifier", "uri", null,
272: identifier_uri);
273: createField("dc", "provenance", null, null, provenance);
274: createField("dc", "rights", null, null, rights);
275: createField("dc", "rights", "license", null,
276: rights_license);
277: createField("dc", "title", null, null, title);
278: } else if (dso.getType() == Constants.COMMUNITY) {
279: Community community = (Community) dso;
280:
281: String description = community
282: .getMetadata("introductory_text");
283: String description_abstract = community
284: .getMetadata("short_description");
285: String description_table = community
286: .getMetadata("side_bar_text");
287: String identifier_uri = "http://hdl.handle.net/"
288: + community.getHandle();
289: String rights = community.getMetadata("copyright_text");
290: String title = community.getMetadata("name");
291:
292: createField("dc", "description", null, null,
293: description);
294: createField("dc", "description", "abstract", null,
295: description_abstract);
296: createField("dc", "description", "tableofcontents",
297: null, description_table);
298: createField("dc", "identifier", "uri", null,
299: identifier_uri);
300: createField("dc", "rights", null, null, rights);
301: createField("dc", "title", null, null, title);
302: }
303:
304: // ///////////////////////////////
305: // End the DIM element
306: endElement(DIM, "dim");
307:
308: // ////////////////////////////////
309: // End elements
310: endElement(METS, "xmlData");
311: endElement(METS, "mdWrap");
312: endElement(METS, "dmdSec");
313:
314: }
315:
316: for (String dmdType : dmdTypes) {
317: // If DIM was requested then it was generated above without using
318: // the crosswalk API. So we can skip this one.
319: if ("DIM".equals(dmdType))
320: continue;
321:
322: DisseminationCrosswalk crosswalk = getDisseminationCrosswalk(dmdType);
323:
324: if (crosswalk == null)
325: continue;
326:
327: String dmdID = getGenericID("dmd_");
328: // Add our id to the list.
329: dmdSecIDS.append(" " + dmdID);
330:
331: // ////////////////////////////////
332: // Start a new dmdSec for each crosswalk.
333: attributes = new AttributeMap();
334: attributes.put("ID", dmdID);
335: attributes.put("GROUPID", groupID);
336: startElement(METS, "dmdSec", attributes);
337:
338: // ////////////////////////////////
339: // Start metadata wrapper
340: attributes = new AttributeMap();
341: if (isDefinedMETStype(dmdType)) {
342: attributes.put("MDTYPE", dmdType);
343: } else {
344: attributes.put("MDTYPE", "OTHER");
345: attributes.put("OTHERMDTYPE", dmdType);
346: }
347: startElement(METS, "mdWrap", attributes);
348:
349: // ////////////////////////////////
350: // Start the xml data
351: startElement(METS, "xmlData");
352:
353: // ///////////////////////////////
354: // Send the actual XML content
355: try {
356: Element dissemination = crosswalk
357: .disseminateElement(dso);
358:
359: SAXFilter filter = new SAXFilter(contentHandler,
360: lexicalHandler, namespaces);
361: // Allow the basics for XML
362: filter.allowElements().allowIgnorableWhitespace()
363: .allowCharacters().allowCDATA()
364: .allowPrefixMappings();
365:
366: SAXOutputter outputter = new SAXOutputter();
367: outputter.setContentHandler(filter);
368: outputter.setLexicalHandler(filter);
369: outputter.output(dissemination);
370: } catch (JDOMException jdome) {
371: throw new WingException(jdome);
372: } catch (AuthorizeException ae) {
373: // just ignore the authorize exception and continue on with
374: //out parsing the xml document.
375: }
376:
377: // ////////////////////////////////
378: // End elements
379: endElement(METS, "xmlData");
380: endElement(METS, "mdWrap");
381: endElement(METS, "dmdSec");
382:
383: // Record keeping
384: if (dmdSecIDS == null) {
385: dmdSecIDS = new StringBuffer(dmdID);
386: } else {
387: dmdSecIDS.append(" " + dmdID);
388:
389: }
390: }
391: }
392:
393: /**
394: * Render the METS file section. If a logo is present for this
395: * container then that single bitstream is listed in the
396: * file section.
397: *
398: * Example:
399: * <fileSec>
400: * <fileGrp USE="LOGO">
401: * <file ... >
402: * <fLocate ... >
403: * </file>
404: * </fileGrp>
405: * </fileSec>
406: */
407: protected void renderFileSection() throws SAXException {
408: AttributeMap attributes;
409:
410: // Get the Community or Collection logo.
411: Bitstream logo = getLogo();
412:
413: if (logo != null) {
414: // ////////////////////////////////
415: // Start the file section
416: startElement(METS, "fileSec");
417:
418: // ////////////////////////////////
419: // Start a new fileGrp for the logo.
420: attributes = new AttributeMap();
421: attributes.put("USE", "LOGO");
422: startElement(METS, "fileGrp", attributes);
423:
424: // ////////////////////////////////
425: // Add the actual file element
426: String fileID = getFileID(logo);
427: String groupID = getGroupFileID(logo);
428: renderFile(null, logo, fileID, groupID);
429:
430: // ////////////////////////////////
431: // End th file group and file section
432: endElement(METS, "fileGrp");
433: endElement(METS, "fileSec");
434: }
435: }
436:
437: /**
438: * Render the container's structural map. This includes a refrence
439: * to the container's logo, if available, otherwise it is an empty
440: * division that just states it is a DSpace community or Collection.
441: *
442: * Examlpe:
443: * <structMap TYPE="LOGICAL" LABEL="DSpace">
444: * <div TYPE="DSpace Collection" DMDID="space seperated list of ids">
445: * <fptr FILEID="logo id"/>
446: * </div>
447: * </structMap>
448: */
449: protected void renderStructureMap() throws SQLException,
450: SAXException {
451: AttributeMap attributes;
452:
453: // ///////////////////////
454: // Start a new structure map
455: attributes = new AttributeMap();
456: attributes.put("TYPE", "LOGICAL");
457: attributes.put("LABEL", "DSpace");
458: startElement(METS, "structMap", attributes);
459:
460: // ////////////////////////////////
461: // Start the special first division
462: attributes = new AttributeMap();
463: attributes.put("TYPE", getMETSLabel());
464: // add references to the Descriptive metadata
465: if (dmdSecIDS != null)
466: attributes.put("DMDID", dmdSecIDS.toString());
467: startElement(METS, "div", attributes);
468:
469: // add a fptr pointer to the logo.
470: Bitstream logo = getLogo();
471: if (logo != null) {
472: // ////////////////////////////////
473: // Add a refrence to the logo as the primary bitstream.
474: attributes = new AttributeMap();
475: attributes.put("FILEID", getFileID(logo));
476: startElement(METS, "fptr", attributes);
477: endElement(METS, "fptr");
478:
479: // ///////////////////////////////////////////////
480: // Add a div for the publicaly viewable bitstreams (i.e. the logo)
481: attributes = new AttributeMap();
482: attributes.put("ID", getGenericID("div_"));
483: attributes.put("TYPE", "DSpace Content Bitstream");
484: startElement(METS, "div", attributes);
485:
486: // ////////////////////////////////
487: // Add a refrence to the logo as the primary bitstream.
488: attributes = new AttributeMap();
489: attributes.put("FILEID", getFileID(logo));
490: startElement(METS, "fptr", attributes);
491: endElement(METS, "fptr");
492:
493: // //////////////////////////
494: // End the logo division
495: endElement(METS, "div");
496: }
497:
498: // ////////////////////////////////
499: // End the special first division
500: endElement(METS, "div");
501:
502: // ///////////////////////
503: // End the structure map
504: endElement(METS, "structMap");
505: }
506:
507: /**
508: *
509: *
510: *
511: * Private helpfull methods
512: *
513: *
514: *
515: */
516:
517: /**
518: * Return the logo bitstream associated with this community or collection.
519: * If there is no logo then null is returned.
520: */
521: private Bitstream getLogo() {
522: if (dso instanceof Community) {
523: Community community = (Community) dso;
524: return community.getLogo();
525: } else if (dso instanceof Collection) {
526:
527: Collection collection = (Collection) dso;
528: return collection.getLogo();
529: }
530: return null;
531: }
532:
533: /**
534: * Count how many occurance there is of the given
535: * character in the given string.
536: *
537: * @param string The string value to be counted.
538: * @param character the character to count in the string.
539: */
540: private int countOccurances(String string, char character) {
541: if (string == null || string.length() == 0)
542: return 0;
543:
544: int fromIndex = -1;
545: int count = 0;
546:
547: while (true) {
548: fromIndex = string.indexOf('>', fromIndex + 1);
549:
550: if (fromIndex == -1)
551: break;
552:
553: count++;
554: }
555:
556: return count;
557: }
558:
559: /**
560: * Check if the given character sequence is located in the given
561: * string at the specified index. If it is then return true, otherwise false.
562: *
563: * @param string The string to test against
564: * @param index The location within the string
565: * @param characters The character sequence to look for.
566: * @return true if the character sequence was found, otherwise false.
567: */
568: private boolean substringCompare(String string, int index,
569: char... characters) {
570: // Is the string long enough?
571: if (string.length() <= index + characters.length)
572: return false;
573:
574: // Do all the characters match?
575: for (char character : characters) {
576: if (string.charAt(index) != character)
577: return false;
578: index++;
579: }
580:
581: return false;
582: }
583:
584: /**
585: * Create a new DIM field element with the given attributes.
586: *
587: * @param schema The schema the DIM field belongs too.
588: * @param element The element the DIM field belongs too.
589: * @param qualifier The qualifier the DIM field belongs too.
590: * @param language The language the DIM field belongs too.
591: * @param value The value of the DIM field.
592: * @return A new DIM field element
593: * @throws SAXException
594: */
595: private void createField(String schema, String element,
596: String qualifier, String language, String value)
597: throws SAXException {
598: // ///////////////////////////////
599: // Field element for each metadata field.
600: AttributeMap attributes = new AttributeMap();
601: attributes.put("mdschema", schema);
602: attributes.put("element", element);
603: if (qualifier != null)
604: attributes.put("qualifier", qualifier);
605: if (language != null)
606: attributes.put("language", language);
607: startElement(DIM, "field", attributes);
608:
609: // Only try and add the metadata's value, but only if it is non null.
610: if (value != null) {
611: // First, preform a queck check to see if the value may be XML.
612: int countOpen = countOccurances(value, '<');
613: int countClose = countOccurances(value, '>');
614:
615: // If it passed the quick test, then try and parse the value.
616: Element xmlDocument = null;
617: if (countOpen > 0 && countOpen == countClose) {
618: // This may be XML, First try and remove any bad entity refrences.
619: int amp = -1;
620: while ((amp = value.indexOf('&', amp + 1)) > -1) {
621: // Is it an xml entity named by number?
622: if (substringCompare(value, amp + 1, '#'))
623: continue;
624:
625: // &
626: if (substringCompare(value, amp + 1, 'a', 'm', 'p',
627: ';'))
628: continue;
629:
630: // '
631: if (substringCompare(value, amp + 1, 'a', 'p', 'o',
632: 's', ';'))
633: continue;
634:
635: // "
636: if (substringCompare(value, amp + 1, 'q', 'u', 'o',
637: 't', ';'))
638: continue;
639:
640: // <
641: if (substringCompare(value, amp + 1, 'l', 't', ';'))
642: continue;
643:
644: // >
645: if (substringCompare(value, amp + 1, 'g', 't', ';'))
646: continue;
647:
648: // Replace the ampersand with an XML entity.
649: value = value.substring(0, amp) + "&"
650: + value.substring(amp + 1);
651: }
652:
653: // Second try and parse the XML into a mini-dom
654: try {
655: // Wrap the value inside a root element (which will be trimed out
656: // by the SAX filter and set the default namespace to XHTML.
657: String xml = "<fragment xmlns=\"http://www.w3.org/1999/xhtml\">"
658: + value + "</fragment>";
659:
660: ByteArrayInputStream inputStream = new ByteArrayInputStream(
661: xml.getBytes());
662:
663: SAXBuilder builder = new SAXBuilder();
664: Document document = builder.build(inputStream);
665:
666: xmlDocument = document.getRootElement();
667: } catch (Exception e) {
668: // ignore any errors we get, and just add the string literaly.
669: }
670: }
671:
672: // Third, If we have xml, attempt to serialize the dom.
673: if (xmlDocument != null) {
674: SAXFilter filter = new SAXFilter(contentHandler,
675: lexicalHandler, namespaces);
676: // Allow the basics for XML
677: filter.allowElements().allowIgnorableWhitespace()
678: .allowCharacters().allowCDATA()
679: .allowPrefixMappings();
680: // Special option, only allow elements below the second level to pass through. This
681: // will trim out the METS declaration and only leave the actual METS parts to be
682: // included.
683: filter.allowElements(1);
684:
685: SAXOutputter outputter = new SAXOutputter();
686: outputter.setContentHandler(filter);
687: outputter.setLexicalHandler(filter);
688: try {
689: outputter.output(xmlDocument);
690: } catch (JDOMException jdome) {
691: // serialization failed so let's just fallback sending the plain characters.
692: sendCharacters(value);
693: }
694: } else {
695: // We don't have XML, so just send the plain old characters.
696: sendCharacters(value);
697: }
698: }
699:
700: // //////////////////////////////
701: // Close out field
702: endElement(DIM, "field");
703: }
704: }
|