001: package prefuse.data.util;
002:
003: import java.util.Comparator;
004: import java.util.Iterator;
005:
006: import prefuse.data.Table;
007: import prefuse.data.expression.AndPredicate;
008: import prefuse.data.expression.ColumnExpression;
009: import prefuse.data.expression.ComparisonPredicate;
010: import prefuse.data.expression.Expression;
011: import prefuse.data.expression.ExpressionAnalyzer;
012: import prefuse.data.expression.NotPredicate;
013: import prefuse.data.expression.OrPredicate;
014: import prefuse.data.expression.Predicate;
015: import prefuse.data.expression.RangePredicate;
016: import prefuse.data.tuple.TupleSet;
017: import prefuse.util.PrefuseConfig;
018: import prefuse.util.collections.CompositeIntIterator;
019: import prefuse.util.collections.IntIterator;
020:
021: /**
022: * Factory class that creates optimized filter iterators. When possible,
023: * this factory will attempt to create an optimized query plan by using
024: * available indexes, in many incrasing performance by only visiting
025: * the tuples which will pass the filter condition.
026: *
027: * @author <a href="http://jheer.org">jeffrey heer</a>
028: */
029: public class FilterIteratorFactory {
030:
031: private static final int OPTIMIZATION_THRESHOLD = PrefuseConfig
032: .getInt("data.filter.optimizeThreshold");
033:
034: // we can stash our query plan generation and optimization here to deal
035: // with it all in one spot, and keep the rest of the classes clean
036:
037: /**
038: * Get a filtered iterator over the tuples in the given set,
039: * filtered by the given predicate.
040: * @param ts the TupleSet to iterate over
041: * @param p the filter predicate
042: * @return a filtered iterator over the tuples
043: */
044: public static Iterator tuples(TupleSet ts, Predicate p) {
045: // no predicate means no filtering
046: if (p == null)
047: return ts.tuples();
048:
049: // attempt to generate an optimized query plan
050: Iterator iter = null;
051: if (ts instanceof Table) {
052: Table t = (Table) ts;
053: IntIterator ii = getOptimizedIterator(t, p);
054: if (ii != null)
055: iter = t.tuples(ii);
056: }
057:
058: // optimization fails, scan the entire table
059: if (iter == null) {
060: iter = new FilterIterator(ts.tuples(), p);
061: }
062:
063: return iter;
064: }
065:
066: /**
067: * Get a filtered iterator over the rows in the given table,
068: * filtered by the given predicate.
069: * @param t the Table to iterate over
070: * @param p the filter predicate
071: * @return a filtered iterator over the table rows
072: */
073: public static IntIterator rows(Table t, Predicate p) {
074: // attempt to generate an optimized query plan
075: IntIterator iter = null;
076: iter = getOptimizedIterator(t, p);
077:
078: // optimization fails, scan the entire table
079: if (iter == null) {
080: iter = new FilterRowIterator(t.rows(), t, p);
081: }
082: return iter;
083: }
084:
085: /**
086: * Get an optimized iterator over the rows of a table, if possible.
087: * @param t the Table to iterator over
088: * @param p the filter predicate
089: * @return an optimized iterator, or null if no optimization was found
090: */
091: protected static IntIterator getOptimizedIterator(Table t,
092: Predicate p) {
093: if (t.getRowCount() < OPTIMIZATION_THRESHOLD)
094: return null; // avoid overhead for small tables
095:
096: if (p instanceof ColumnExpression) {
097: // try to optimize a boolean column
098: return getColumnIterator(t, ((ColumnExpression) p)
099: .getColumnName(), true);
100: } else if (p instanceof NotPredicate) {
101: // try to optimize the negation a boolean column
102: Predicate pp = ((NotPredicate) p).getPredicate();
103: if (pp instanceof ColumnExpression) {
104: return getColumnIterator(t, ((ColumnExpression) pp)
105: .getColumnName(), false);
106: }
107: } else if (p instanceof AndPredicate) {
108: // try to optimize an and clause
109: return getAndIterator(t, (AndPredicate) p);
110: } else if (p instanceof OrPredicate) {
111: // try to optimize an or clause
112: return getOrIterator(t, (OrPredicate) p);
113: } else if (p instanceof ComparisonPredicate) {
114: // try to optimize a comparison (=, !=, <, > ,etc)
115: return getComparisonIterator(t, (ComparisonPredicate) p);
116: } else if (p instanceof RangePredicate) {
117: // try to optimize a bounded range of values
118: return getRangeIterator(t, (RangePredicate) p);
119: }
120:
121: return null;
122: }
123:
124: protected static IntIterator getColumnIterator(Table t,
125: String field, boolean val) {
126: if (t.getColumnType(field) != boolean.class)
127: return null; // only works for boolean-valued columns
128:
129: Index index = t.getIndex(field);
130: if (index == null) {
131: return null;
132: } else {
133: return index.rows(val);
134: }
135: }
136:
137: protected static IntIterator getOrIterator(Table t, OrPredicate op) {
138: int size = op.size();
139: if (size > 1) {
140: // if all subclauses can be optimized, we can optimize the query
141: IntIterator[] rows = new IntIterator[size];
142: for (int i = 0; i < rows.length; ++i) {
143: rows[i] = getOptimizedIterator(t, op.get(i));
144:
145: // all clauses must be optimized to avoid linear scan
146: if (rows[i] == null)
147: return null;
148: }
149: // group iterators, and filter for uniqueness
150: return new UniqueRowIterator(new CompositeIntIterator(rows));
151: } else if (size == 1) {
152: // only one clause, optimize for that
153: return getOptimizedIterator(t, op.get(0));
154: } else {
155: // no woman, no cry
156: return null;
157: }
158: }
159:
160: protected static IntIterator getAndIterator(Table t, AndPredicate ap) {
161: // possible TODO: add scoring to select best optimized iterator
162: // for now just work from the end backwards and take the first
163: // optimized iterator we find
164: IntIterator rows = null;
165: Predicate clause = null;
166: for (int i = ap.size(); --i >= 0;) {
167: clause = ap.get(i);
168: if ((rows = getOptimizedIterator(t, clause)) != null)
169: break;
170: }
171:
172: // exit if we didn't optimize
173: if (rows == null)
174: return null;
175:
176: // if only one clause, no extras needed
177: if (ap.size() == 1)
178: return rows;
179:
180: // otherwise get optimized source, run through other clauses
181: return new FilterRowIterator(rows, t, ap
182: .getSubPredicate(clause));
183: }
184:
185: protected static IntIterator getComparisonIterator(Table t,
186: ComparisonPredicate cp) {
187: Expression l = cp.getLeftExpression();
188: Expression r = cp.getRightExpression();
189: int operation = cp.getOperation();
190:
191: // not equals operations aren't handled by the index
192: if (operation == ComparisonPredicate.NEQ)
193: return null;
194:
195: ColumnExpression col;
196: Expression lit;
197:
198: // make sure columns are of the right type
199: if (l instanceof ColumnExpression
200: && !ExpressionAnalyzer.hasDependency(r)) {
201: col = (ColumnExpression) l;
202: lit = r;
203: } else if (r instanceof ColumnExpression
204: && !ExpressionAnalyzer.hasDependency(l)) {
205: col = (ColumnExpression) r;
206: lit = l;
207: } else {
208: return null;
209: }
210:
211: // if table has index of the right type, use it
212: Comparator cmp = cp.getComparator();
213: Index index = t.getIndex(col.getColumnName());
214:
215: if (index == null || !cmp.equals(index.getComparator()))
216: return null;
217:
218: Class ltype = lit.getClass();
219: if (ltype == int.class) {
220: int val = lit.getInt(null); // literal value, so null is safe
221: switch (operation) {
222: case ComparisonPredicate.LT:
223: return index.rows(Integer.MIN_VALUE, val,
224: Index.TYPE_AIE);
225: case ComparisonPredicate.GT:
226: return index.rows(val, Integer.MAX_VALUE,
227: Index.TYPE_AEI);
228: case ComparisonPredicate.EQ:
229: return index.rows(val, val, Index.TYPE_AII);
230: case ComparisonPredicate.LTEQ:
231: return index.rows(Integer.MIN_VALUE, val,
232: Index.TYPE_AII);
233: case ComparisonPredicate.GTEQ:
234: return index.rows(val, Integer.MAX_VALUE,
235: Index.TYPE_AII);
236: default:
237: throw new IllegalStateException(); // should never occur
238: }
239: } else if (ltype == long.class) {
240: long val = lit.getLong(null); // literal value, so null is safe
241: switch (operation) {
242: case ComparisonPredicate.LT:
243: return index.rows(Long.MIN_VALUE, val, Index.TYPE_AIE);
244: case ComparisonPredicate.GT:
245: return index.rows(val, Long.MAX_VALUE, Index.TYPE_AEI);
246: case ComparisonPredicate.EQ:
247: return index.rows(val, val, Index.TYPE_AII);
248: case ComparisonPredicate.LTEQ:
249: return index.rows(Long.MIN_VALUE, val, Index.TYPE_AII);
250: case ComparisonPredicate.GTEQ:
251: return index.rows(val, Long.MAX_VALUE, Index.TYPE_AII);
252: default:
253: throw new IllegalStateException(); // should never occur
254: }
255: } else if (ltype == float.class) {
256: float val = lit.getFloat(null); // literal value, so null is safe
257: switch (operation) {
258: case ComparisonPredicate.LT:
259: return index.rows(Float.MIN_VALUE, val, Index.TYPE_AIE);
260: case ComparisonPredicate.GT:
261: return index.rows(val, Float.MAX_VALUE, Index.TYPE_AEI);
262: case ComparisonPredicate.EQ:
263: return index.rows(val, val, Index.TYPE_AII);
264: case ComparisonPredicate.LTEQ:
265: return index.rows(Float.MIN_VALUE, val, Index.TYPE_AII);
266: case ComparisonPredicate.GTEQ:
267: return index.rows(val, Float.MAX_VALUE, Index.TYPE_AII);
268: default:
269: throw new IllegalStateException(); // should never occur
270: }
271: } else if (ltype == double.class) {
272: double val = lit.getDouble(null); // literal value, so null is safe
273: switch (operation) {
274: case ComparisonPredicate.LT:
275: return index
276: .rows(Double.MIN_VALUE, val, Index.TYPE_AIE);
277: case ComparisonPredicate.GT:
278: return index
279: .rows(val, Double.MAX_VALUE, Index.TYPE_AEI);
280: case ComparisonPredicate.EQ:
281: return index.rows(val, val, Index.TYPE_AII);
282: case ComparisonPredicate.LTEQ:
283: return index
284: .rows(Double.MIN_VALUE, val, Index.TYPE_AII);
285: case ComparisonPredicate.GTEQ:
286: return index
287: .rows(val, Double.MAX_VALUE, Index.TYPE_AII);
288: default:
289: throw new IllegalStateException(); // should never occur
290: }
291: } else {
292: Object val = lit.get(null); // literal value, so null is safe
293: switch (operation) {
294: case ComparisonPredicate.LT:
295: return index.rows(null, val, Index.TYPE_AIE);
296: case ComparisonPredicate.GT:
297: return index.rows(val, null, Index.TYPE_AEI);
298: case ComparisonPredicate.EQ:
299: return index.rows(val, val, Index.TYPE_AII);
300: case ComparisonPredicate.LTEQ:
301: return index.rows(null, val, Index.TYPE_AII);
302: case ComparisonPredicate.GTEQ:
303: return index.rows(val, null, Index.TYPE_AII);
304: default:
305: throw new IllegalStateException(); // should never occur
306: }
307: }
308: }
309:
310: protected static IntIterator getRangeIterator(Table t,
311: RangePredicate rp) {
312: ColumnExpression col;
313: Expression l, r;
314:
315: // make sure columns are of the right type
316: if (!(rp.getMiddleExpression() instanceof ColumnExpression)
317: || ExpressionAnalyzer.hasDependency(rp
318: .getLeftExpression())
319: || ExpressionAnalyzer.hasDependency(rp
320: .getRightExpression())) {
321: return null;
322: }
323:
324: // assign variables
325: col = (ColumnExpression) rp.getMiddleExpression();
326: l = rp.getLeftExpression();
327: r = rp.getRightExpression();
328:
329: // if table has index of the right type, use it
330: Comparator cmp = rp.getComparator();
331: Index index = t.getIndex(col.getColumnName());
332:
333: if (index == null || !cmp.equals(index.getComparator()))
334: return null;
335:
336: int operation = rp.getOperation();
337: Class ltype = t.getColumnType(col.getColumnName());
338:
339: // TODO safety check literal types
340:
341: // get the index type
342: int indexType;
343: switch (operation) {
344: case RangePredicate.IN_IN:
345: indexType = Index.TYPE_AII;
346: break;
347: case RangePredicate.IN_EX:
348: indexType = Index.TYPE_AIE;
349: break;
350: case RangePredicate.EX_IN:
351: indexType = Index.TYPE_AEI;
352: break;
353: case RangePredicate.EX_EX:
354: indexType = Index.TYPE_AEE;
355: break;
356: default:
357: throw new IllegalStateException(); // should never occur
358: }
359:
360: // get the indexed rows
361: if (ltype == int.class) {
362: return index
363: .rows(l.getInt(null), r.getInt(null), indexType);
364: } else if (ltype == long.class) {
365: return index.rows(l.getLong(null), r.getLong(null),
366: indexType);
367: } else if (ltype == float.class) {
368: return index.rows(l.getFloat(null), r.getFloat(null),
369: indexType);
370: } else if (ltype == double.class) {
371: return index.rows(l.getDouble(null), r.getDouble(null),
372: indexType);
373: } else {
374: return index.rows(l.get(null), r.get(null), indexType);
375: }
376: }
377:
378: } // end of class FilterIteratorFactory
|