001: package it.unimi.dsi.mg4j.index.cluster;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2006-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.util.Properties;
025:
026: import java.io.Serializable;
027: import java.util.Arrays;
028:
029: /** A documental partitioning and clustering strategy that partitions an index into contiguous segments.
030: *
031: * <p>To partition an index in contiguous parts, you must provide an array
032: * of <em>cutpoints</em>, which define each part. More precisely, given
033: * cutpoints <var>c</var><sub>0</sub>,<var>c</var><sub>2</sub>,…,<var>c</var><sub><var>k</var></sub>,
034: * the global index will be partitioned into <var>k</var> local indices containing the documents
035: * from <var>c</var><sub>0</sub> (included) to <var>c</var><sub>1</sub> (excluded), from
036: * <var>c</var><sub>1</sub> (included) to <var>c</var><sub>2</sub> and so on. Note that
037: * necessarily <var>c</var><sub>0</sub>=0 and <var>c</var><sub><var>k</var></sub>=<var>N</var>,
038: * where <var>N</var> is the number of globally indexed documents.
039: *
040: * <p>The {@link #properties()} method provides two properties, <samp>pointerfrom</samp> and <samp>pointerto</samp>,
041: * that contain the first (included) and last (excluded) pointer in the local index.
042: *
043: * @author Alessandro Arrabito
044: * @author Sebastiano Vigna
045: */
046:
047: public class ContiguousDocumentalStrategy implements
048: DocumentalPartitioningStrategy, DocumentalClusteringStrategy,
049: Serializable {
050:
051: private static final long serialVersionUID = 0L;
052:
053: /** The cutpoints.*/
054: private final int[] cutPoint;
055: /** The (cached) number of segments. */
056: private final int k;
057:
058: /** Creates a new contiguous strategy with the given cutpoints.
059: *
060: * <P>Note that {@link DocumentalStrategies} has ready-made factory
061: * methods for the common cases.
062: *
063: * @param cutPoint an array of cutpoints (see the class description}.
064: */
065:
066: public ContiguousDocumentalStrategy(final int... cutPoint) {
067: if (cutPoint.length == 0)
068: throw new IllegalArgumentException("Empty cutpoint array");
069: if (cutPoint[0] != 0)
070: throw new IllegalArgumentException(
071: "The first cutpoint must be 0");
072: this .cutPoint = cutPoint;
073: this .k = cutPoint.length - 1;
074: }
075:
076: public int numberOfLocalIndices() {
077: return k;
078: }
079:
080: public int localIndex(final int globalPointer) {
081: if (globalPointer >= cutPoint[k])
082: throw new IndexOutOfBoundsException(Integer
083: .toString(globalPointer));
084: for (int i = k; i-- != 0;)
085: if (globalPointer >= cutPoint[i])
086: return i;
087: throw new IndexOutOfBoundsException(Integer
088: .toString(globalPointer));
089: }
090:
091: public int localPointer(final int globalPointer) {
092: return globalPointer - cutPoint[localIndex(globalPointer)];
093: }
094:
095: public int globalPointer(final int localIndex,
096: final int localPointer) {
097: return localPointer + cutPoint[localIndex];
098: }
099:
100: public int numberOfDocuments(final int localIndex) {
101: return cutPoint[localIndex + 1] - cutPoint[localIndex];
102: }
103:
104: public Properties[] properties() {
105: Properties[] properties = new Properties[k];
106: for (int i = 0; i < k; i++) {
107: properties[i] = new Properties();
108: properties[i].addProperty("pointerfrom", cutPoint[i]);
109: properties[i].addProperty("pointerto", cutPoint[i + 1]);
110: }
111: return properties;
112: }
113:
114: public String toString() {
115: return Arrays.toString(cutPoint);
116: }
117: }
|