001: /*
002: * Copyright 2003-2005 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package com.sun.java.util.jar.pack;
027:
028: import java.util.*;
029: import java.io.*;
030:
031: /**
032: * Population-based coding.
033: * See the section "Encodings of Uncorrelated Values" in the Pack200 spec.
034: * @author John Rose
035: * @version 1.17, 05/05/07
036: */
037: // This tactic alone reduces the final zipped rt.jar by about a percent.
038: class PopulationCoding implements Constants, CodingMethod {
039: Histogram vHist; // histogram of all values
040: int[] fValues; // list of favored values
041: int fVlen; // inclusive max index
042: long[] symtab; // int map of favored value -> token [1..#fValues]
043:
044: CodingMethod favoredCoding;
045: CodingMethod tokenCoding;
046: CodingMethod unfavoredCoding;
047:
048: int L = -1; //preferred L value for tokenCoding
049:
050: public void setFavoredValues(int[] fValues, int fVlen) {
051: // Note: {f} is allFavoredValues[1..fvlen], not [0..fvlen-1].
052: // This is because zero is an exceptional favored value index.
053: assert (fValues[0] == 0); // must be empty
054: assert (this .fValues == null); // do not do this twice
055: this .fValues = fValues;
056: this .fVlen = fVlen;
057: if (L >= 0) {
058: setL(L); // reassert
059: }
060: }
061:
062: public void setFavoredValues(int[] fValues) {
063: int fVlen = fValues.length - 1;
064: setFavoredValues(fValues, fVlen);
065: }
066:
067: public void setHistogram(Histogram vHist) {
068: this .vHist = vHist;
069: }
070:
071: public void setL(int L) {
072: this .L = L;
073: if (L >= 0 && fValues != null && tokenCoding == null) {
074: tokenCoding = fitTokenCoding(fVlen, L);
075: assert (tokenCoding != null);
076: }
077: }
078:
079: public static Coding fitTokenCoding(int fVlen, int L) {
080: // Find the smallest B s.t. (B,H,0) covers fVlen.
081: if (fVlen < 256)
082: // H/L do not matter when B==1
083: return BandStructure.BYTE1;
084: Coding longest = BandStructure.UNSIGNED5.setL(L);
085: if (!longest.canRepresentUnsigned(fVlen))
086: return null; // failure; L is too sharp and fVlen too large
087: Coding tc = longest;
088: for (Coding shorter = longest;;) {
089: shorter = shorter.setB(shorter.B() - 1);
090: if (shorter.umax() < fVlen)
091: break;
092: tc = shorter; // shorten it by reducing B
093: }
094: return tc;
095: }
096:
097: public void setFavoredCoding(CodingMethod favoredCoding) {
098: this .favoredCoding = favoredCoding;
099: }
100:
101: public void setTokenCoding(CodingMethod tokenCoding) {
102: this .tokenCoding = tokenCoding;
103: this .L = -1;
104: if (tokenCoding instanceof Coding && fValues != null) {
105: Coding tc = (Coding) tokenCoding;
106: if (tc == fitTokenCoding(fVlen, tc.L()))
107: this .L = tc.L();
108: ;
109: // Otherwise, it's a non-default coding.
110: }
111: }
112:
113: public void setUnfavoredCoding(CodingMethod unfavoredCoding) {
114: this .unfavoredCoding = unfavoredCoding;
115: }
116:
117: public int favoredValueMaxLength() {
118: if (L == 0)
119: return Integer.MAX_VALUE;
120: else
121: return BandStructure.UNSIGNED5.setL(L).umax();
122: }
123:
124: public void resortFavoredValues() {
125: Coding tc = (Coding) tokenCoding;
126: // Make a local copy before reordering.
127: fValues = BandStructure.realloc(fValues, 1 + fVlen);
128: // Resort favoredValues within each byte-size cadre.
129: int fillp = 1; // skip initial zero
130: for (int n = 1; n <= tc.B(); n++) {
131: int nmax = tc.byteMax(n);
132: if (nmax > fVlen)
133: nmax = fVlen;
134: if (nmax < tc.byteMin(n))
135: break;
136: int low = fillp;
137: int high = nmax + 1;
138: if (high == low)
139: continue;
140: assert (high > low) : high + "!>" + low;
141: assert (tc.getLength(low) == n) : n + " != len(" + (low)
142: + ") == " + tc.getLength(low);
143: assert (tc.getLength(high - 1) == n) : n + " != len("
144: + (high - 1) + ") == " + tc.getLength(high - 1);
145: int midTarget = low + (high - low) / 2;
146: int mid = low;
147: // Divide the values into cadres, and sort within each.
148: int prevCount = -1;
149: int prevLimit = low;
150: for (int i = low; i < high; i++) {
151: int val = fValues[i];
152: int count = vHist.getFrequency(val);
153: if (prevCount != count) {
154: if (n == 1) {
155: // For the single-byte encoding, keep strict order
156: // among frequency groups.
157: Arrays.sort(fValues, prevLimit, i);
158: } else if (Math.abs(mid - midTarget) > Math.abs(i
159: - midTarget)) {
160: // Find a single inflection point
161: // close to the middle of the byte-size cadre.
162: mid = i;
163: }
164: prevCount = count;
165: prevLimit = i;
166: }
167: }
168: if (n == 1) {
169: Arrays.sort(fValues, prevLimit, high);
170: } else {
171: // Sort up to the midpoint, if any.
172: Arrays.sort(fValues, low, mid);
173: Arrays.sort(fValues, mid, high);
174: }
175: assert (tc.getLength(low) == tc.getLength(mid));
176: assert (tc.getLength(low) == tc.getLength(high - 1));
177: fillp = nmax + 1;
178: }
179: assert (fillp == fValues.length);
180:
181: // Reset symtab.
182: symtab = null;
183: }
184:
185: public int getToken(int value) {
186: if (symtab == null)
187: symtab = makeSymtab();
188: int pos = Arrays.binarySearch(symtab, (long) value << 32);
189: if (pos < 0)
190: pos = -pos - 1;
191: if (pos < symtab.length && value == (int) (symtab[pos] >>> 32))
192: return (int) symtab[pos];
193: else
194: return 0;
195: }
196:
197: public int[][] encodeValues(int[] values, int start, int end) {
198: // Compute token sequence.
199: int[] tokens = new int[end - start];
200: int nuv = 0;
201: for (int i = 0; i < tokens.length; i++) {
202: int val = values[start + i];
203: int tok = getToken(val);
204: if (tok != 0)
205: tokens[i] = tok;
206: else
207: nuv += 1;
208: }
209: // Compute unfavored value sequence.
210: int[] unfavoredValues = new int[nuv];
211: nuv = 0; // reset
212: for (int i = 0; i < tokens.length; i++) {
213: if (tokens[i] != 0)
214: continue; // already covered
215: int val = values[start + i];
216: unfavoredValues[nuv++] = val;
217: }
218: assert (nuv == unfavoredValues.length);
219: return new int[][] { tokens, unfavoredValues };
220: }
221:
222: private long[] makeSymtab() {
223: long[] symtab = new long[fVlen];
224: for (int token = 1; token <= fVlen; token++) {
225: symtab[token - 1] = ((long) fValues[token] << 32) | token;
226: }
227: // Index by value:
228: Arrays.sort(symtab);
229: return symtab;
230: }
231:
232: private Coding getTailCoding(CodingMethod c) {
233: while (c instanceof AdaptiveCoding)
234: c = ((AdaptiveCoding) c).tailCoding;
235: return (Coding) c;
236: }
237:
238: // CodingMethod methods.
239: public void writeArrayTo(OutputStream out, int[] a, int start,
240: int end) throws IOException {
241: int[][] vals = encodeValues(a, start, end);
242: writeSequencesTo(out, vals[0], vals[1]);
243: }
244:
245: void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues)
246: throws IOException {
247: favoredCoding.writeArrayTo(out, fValues, 1, 1 + fVlen);
248: getTailCoding(favoredCoding).writeTo(out,
249: computeSentinelValue());
250: tokenCoding.writeArrayTo(out, tokens, 0, tokens.length);
251: if (uValues.length > 0)
252: unfavoredCoding.writeArrayTo(out, uValues, 0,
253: uValues.length);
254: }
255:
256: int computeSentinelValue() {
257: Coding fc = getTailCoding(favoredCoding);
258: if (fc.isDelta()) {
259: // repeat the last favored value, using delta=0
260: return 0;
261: } else {
262: // else repeat the shorter of the min or last value
263: int min = fValues[1];
264: int last = min;
265: // (remember that fVlen is an inclusive limit in fValues)
266: for (int i = 2; i <= fVlen; i++) {
267: last = fValues[i];
268: min = moreCentral(min, last);
269: }
270: int endVal;
271: if (fc.getLength(min) <= fc.getLength(last))
272: return min;
273: else
274: return last;
275: }
276: }
277:
278: public void readArrayFrom(InputStream in, int[] a, int start,
279: int end) throws IOException {
280: // Parameters are fCode, L, uCode.
281: setFavoredValues(readFavoredValuesFrom(in, end - start));
282: // Read the tokens. Read them into the final array, for the moment.
283: tokenCoding.readArrayFrom(in, a, start, end);
284: // Decode the favored tokens.
285: int headp = 0, tailp = -1;
286: int uVlen = 0;
287: for (int i = start; i < end; i++) {
288: int tok = a[i];
289: if (tok == 0) {
290: // Make a linked list, and decode in a second pass.
291: if (tailp < 0) {
292: headp = i;
293: } else {
294: a[tailp] = i;
295: }
296: tailp = i;
297: uVlen += 1;
298: } else {
299: a[i] = fValues[tok];
300: }
301: }
302: // Walk the linked list of "zero" locations, decoding unfavored vals.
303: int[] uValues = new int[uVlen];
304: if (uVlen > 0)
305: unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen);
306: for (int i = 0; i < uVlen; i++) {
307: int nextp = a[headp];
308: a[headp] = uValues[i];
309: headp = nextp;
310: }
311: }
312:
313: int[] readFavoredValuesFrom(InputStream in, int maxForDebug)
314: throws IOException {
315: int[] fValues = new int[1000]; // realloc as needed
316: // The set uniqueValuesForDebug records all favored values.
317: // As each new value is added, we assert that the value
318: // was not already in the set.
319: HashSet uniqueValuesForDebug = null;
320: assert ((uniqueValuesForDebug = new HashSet()) != null);
321: int fillp = 1;
322: maxForDebug += fillp;
323: int min = Integer.MIN_VALUE; // farthest from the center
324: //int min2 = Integer.MIN_VALUE; // emulate buggy 150.7 spec.
325: int last = 0;
326: CodingMethod fcm = favoredCoding;
327: while (fcm instanceof AdaptiveCoding) {
328: AdaptiveCoding ac = (AdaptiveCoding) fcm;
329: int len = ac.headLength;
330: while (fillp + len > fValues.length)
331: fValues = BandStructure.realloc(fValues);
332: int newFillp = fillp + len;
333: ac.headCoding.readArrayFrom(in, fValues, fillp, newFillp);
334: while (fillp < newFillp) {
335: int val = fValues[fillp++];
336: assert (uniqueValuesForDebug.add(new Integer(val)));
337: assert (fillp <= maxForDebug);
338: last = val;
339: min = moreCentral(min, val);
340: //min2 = moreCentral2(min2, val, min);
341: }
342: fcm = ac.tailCoding;
343: }
344: Coding fc = (Coding) fcm;
345: if (fc.isDelta()) {
346: for (long state = 0;;) {
347: // Read a new value:
348: state += fc.readFrom(in);
349: int val;
350: if (fc.isSubrange())
351: val = fc.reduceToUnsignedRange(state);
352: else
353: val = (int) state;
354: state = val;
355: if (fillp > 1 && (val == last || val == min)) //|| val == min2
356: break;
357: if (fillp == fValues.length)
358: fValues = BandStructure.realloc(fValues);
359: fValues[fillp++] = val;
360: assert (uniqueValuesForDebug.add(new Integer(val)));
361: assert (fillp <= maxForDebug);
362: last = val;
363: min = moreCentral(min, val);
364: //min2 = moreCentral(min2, val);
365: }
366: } else {
367: for (;;) {
368: int val = fc.readFrom(in);
369: if (fillp > 1 && (val == last || val == min)) //|| val == min2
370: break;
371: if (fillp == fValues.length)
372: fValues = BandStructure.realloc(fValues);
373: fValues[fillp++] = val;
374: assert (uniqueValuesForDebug.add(new Integer(val)));
375: assert (fillp <= maxForDebug);
376: last = val;
377: min = moreCentral(min, val);
378: //min2 = moreCentral2(min2, val, min);
379: }
380: }
381: return BandStructure.realloc(fValues, fillp);
382: }
383:
384: private static int moreCentral(int x, int y) {
385: int kx = (x >> 31) ^ (x << 1);
386: int ky = (y >> 31) ^ (y << 1);
387: // bias kx/ky to get an unsigned comparison:
388: kx -= Integer.MIN_VALUE;
389: ky -= Integer.MIN_VALUE;
390: int xy = (kx < ky ? x : y);
391: // assert that this ALU-ish version is the same:
392: assert (xy == moreCentralSlow(x, y));
393: return xy;
394: }
395:
396: // private static int moreCentral2(int x, int y, int min) {
397: // // Strict implementation of buggy 150.7 specification.
398: // // The bug is that the spec. says absolute-value ties are broken
399: // // in favor of positive numbers, but the suggested implementation
400: // // (also mentioned in the spec.) breaks ties in favor of negatives.
401: // if (x + y == 0) return (x > y? x : y);
402: // return min;
403: // }
404: private static int moreCentralSlow(int x, int y) {
405: int ax = x;
406: if (ax < 0)
407: ax = -ax;
408: if (ax < 0)
409: return y; //x is MIN_VALUE
410: int ay = y;
411: if (ay < 0)
412: ay = -ay;
413: if (ay < 0)
414: return x; //y is MIN_VALUE
415: if (ax < ay)
416: return x;
417: if (ax > ay)
418: return y;
419: // At this point the absolute values agree, and the negative wins.
420: return x < y ? x : y;
421: }
422:
423: static final int[] LValuesCoded = { -1, 4, 8, 16, 32, 64, 128, 192,
424: 224, 240, 248, 252 };
425:
426: public byte[] getMetaCoding(Coding dflt) {
427: int K = fVlen;
428: int LCoded = 0;
429: if (tokenCoding instanceof Coding) {
430: Coding tc = (Coding) tokenCoding;
431: if (tc.B() == 1) {
432: LCoded = 1;
433: } else if (L >= 0) {
434: assert (L == tc.L());
435: for (int i = 1; i < LValuesCoded.length; i++) {
436: if (LValuesCoded[i] == L) {
437: LCoded = i;
438: break;
439: }
440: }
441: }
442: }
443: CodingMethod tokenDflt = null;
444: if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) {
445: // A simple L value is enough to recover the tokenCoding.
446: tokenDflt = tokenCoding;
447: }
448: int FDef = (favoredCoding == dflt) ? 1 : 0;
449: int UDef = (unfavoredCoding == dflt || unfavoredCoding == null) ? 1
450: : 0;
451: int TDef = (tokenCoding == tokenDflt) ? 1 : 0;
452: int TDefL = (TDef == 1) ? LCoded : 0;
453: assert (TDef == ((TDefL > 0) ? 1 : 0));
454: ByteArrayOutputStream bytes = new ByteArrayOutputStream(10);
455: bytes.write(_meta_pop + FDef + 2 * UDef + 4 * TDefL);
456: try {
457: if (FDef == 0)
458: bytes.write(favoredCoding.getMetaCoding(dflt));
459: if (TDef == 0)
460: bytes.write(tokenCoding.getMetaCoding(dflt));
461: if (UDef == 0)
462: bytes.write(unfavoredCoding.getMetaCoding(dflt));
463: } catch (IOException ee) {
464: throw new RuntimeException(ee);
465: }
466: return bytes.toByteArray();
467: }
468:
469: public static int parseMetaCoding(byte[] bytes, int pos,
470: Coding dflt, CodingMethod res[]) {
471: int op = bytes[pos++] & 0xFF;
472: if (op < _meta_pop || op >= _meta_limit)
473: return pos - 1; // backup
474: op -= _meta_pop;
475: int FDef = op % 2;
476: int UDef = (op / 2) % 2;
477: int TDefL = (op / 4);
478: int TDef = (TDefL > 0) ? 1 : 0;
479: int L = LValuesCoded[TDefL];
480: CodingMethod[] FCode = { dflt }, TCode = { null }, UCode = { dflt };
481: if (FDef == 0)
482: pos = BandStructure
483: .parseMetaCoding(bytes, pos, dflt, FCode);
484: if (TDef == 0)
485: pos = BandStructure
486: .parseMetaCoding(bytes, pos, dflt, TCode);
487: if (UDef == 0)
488: pos = BandStructure
489: .parseMetaCoding(bytes, pos, dflt, UCode);
490: PopulationCoding pop = new PopulationCoding();
491: pop.L = L; // might be -1
492: pop.favoredCoding = FCode[0];
493: pop.tokenCoding = TCode[0]; // might be null!
494: pop.unfavoredCoding = UCode[0];
495: res[0] = pop;
496: return pos;
497: }
498:
499: private String keyString(CodingMethod m) {
500: if (m instanceof Coding)
501: return ((Coding) m).keyString();
502: if (m == null)
503: return "none";
504: return m.toString();
505: }
506:
507: public String toString() {
508: PropMap p200 = Utils.currentPropMap();
509: boolean verbose = (p200 != null && p200
510: .getBoolean(Utils.COM_PREFIX + "verbose.pop"));
511: StringBuffer res = new StringBuffer(100);
512: res.append("pop(").append("fVlen=").append(fVlen);
513: if (verbose && fValues != null) {
514: res.append(" fV=[");
515: for (int i = 1; i <= fVlen; i++) {
516: res.append(i == 1 ? "" : ",").append(fValues[i]);
517: }
518: res.append(";").append(computeSentinelValue());
519: res.append("]");
520: }
521: res.append(" fc=").append(keyString(favoredCoding));
522: res.append(" tc=").append(keyString(tokenCoding));
523: res.append(" uc=").append(keyString(unfavoredCoding));
524: res.append(")");
525: return res.toString();
526: }
527: }
|