001: /*-
002: * See the file LICENSE for redistribution information.
003: *
004: * Copyright (c) 2005,2008 Oracle. All rights reserved.
005: *
006: * $Id: DbCacheSize.java,v 1.10.2.4 2008/01/07 15:14:17 cwl Exp $
007: */
008:
009: package com.sleepycat.je.util;
010:
011: import java.io.File;
012: import java.io.PrintStream;
013: import java.math.BigInteger;
014: import java.text.NumberFormat;
015: import java.util.Random;
016:
017: import com.sleepycat.je.Database;
018: import com.sleepycat.je.DatabaseConfig;
019: import com.sleepycat.je.DatabaseEntry;
020: import com.sleepycat.je.DatabaseException;
021: import com.sleepycat.je.Environment;
022: import com.sleepycat.je.EnvironmentConfig;
023: import com.sleepycat.je.EnvironmentStats;
024: import com.sleepycat.je.OperationStatus;
025: import com.sleepycat.je.PreloadConfig;
026: import com.sleepycat.je.dbi.MemoryBudget;
027: import com.sleepycat.je.utilint.CmdUtil;
028:
029: /**
030: * Estimating JE in-memory sizes as a function of key and data size is not
031: * straightforward for two reasons. There is some fixed overhead for each btree
032: * internal node, so tree fanout and degree of node sparseness impacts memory
033: * consumption. In addition, JE compresses some of the internal nodes where
034: * possible, but compression depends on on-disk layouts.
035: *
036: * DbCacheSize is an aid for estimating cache sizes. To get an estimate of the
037: * in-memory footprint for a given database, specify the number of records and
038: * record characteristics and DbCacheSize will return a minimum and maximum
039: * estimate of the cache size required for holding the database in memory.
040: * If the user specifies the record's data size, the utility will return both
041: * values for holding just the internal nodes of the btree, and for holding the
042: * entire database in cache.
043: *
044: * Note that "cache size" is a percentage more than "btree size", to cover
045: * general environment resources like log buffers. Each invocation of the
046: * utility returns an estimate for a single database in an environment. For an
047: * environment with multiple databases, run the utility for each database, add
048: * up the btree sizes, and then add 10 percent.
049: *
050: * Note that the utility does not yet cover duplicate records and the API is
051: * subject to change release to release.
052: *
053: * The only required parameters are the number of records and key size.
054: * Data size, non-tree cache overhead, btree fanout, and other parameters
055: * can also be provided. For example:
056: *
057: * $ java DbCacheSize -records 554719 -key 16 -data 100
058: * Inputs: records=554719 keySize=16 dataSize=100 nodeMax=128 density=80%
059: * overhead=10%
060: *
061: * Cache Size Btree Size Description
062: * -------------- -------------- -----------
063: * 30,547,440 27,492,696 Minimum, internal nodes only
064: * 41,460,720 37,314,648 Maximum, internal nodes only
065: * 114,371,644 102,934,480 Minimum, internal nodes and leaf nodes
066: * 125,284,924 112,756,432 Maximum, internal nodes and leaf nodes
067: *
068: * Btree levels: 3
069: *
070: * This says that the minimum cache size to hold only the internal nodes of the
071: * btree in cache is approximately 30MB. The maximum size to hold the entire
072: * database in cache, both internal nodes and datarecords, is 125Mb.
073: */
074: public class DbCacheSize {
075:
076: private static final NumberFormat INT_FORMAT = NumberFormat
077: .getIntegerInstance();
078:
079: private static final String HEADER = " Cache Size Btree Size Description\n"
080: + "-------------- -------------- -----------";
081: // 12345678901234 12345678901234
082: // 12
083: private static final int COLUMN_WIDTH = 14;
084: private static final int COLUMN_SEPARATOR = 2;
085:
086: private long records;
087: private int keySize;
088: private int dataSize;
089: private int nodeMax;
090: private int density;
091: private long overhead;
092: private long minInBtreeSize;
093: private long maxInBtreeSize;
094: private long minInCacheSize;
095: private long maxInCacheSize;
096: private long maxInBtreeSizeWithData;
097: private long maxInCacheSizeWithData;
098: private long minInBtreeSizeWithData;
099: private long minInCacheSizeWithData;
100: private int nLevels = 1;
101:
102: public DbCacheSize(long records, int keySize, int dataSize,
103: int nodeMax, int density, long overhead) {
104: this .records = records;
105: this .keySize = keySize;
106: this .dataSize = dataSize;
107: this .nodeMax = nodeMax;
108: this .density = density;
109: this .overhead = overhead;
110: }
111:
112: public long getMinCacheSizeInternalNodesOnly() {
113: return minInCacheSize;
114: }
115:
116: public long getMaxCacheSizeInternalNodesOnly() {
117: return maxInCacheSize;
118: }
119:
120: public long getMinBtreeSizeInternalNodesOnly() {
121: return minInBtreeSize;
122: }
123:
124: public long getMaxBtreeSizeInternalNodesOnly() {
125: return maxInBtreeSize;
126: }
127:
128: public long getMinCacheSizeWithData() {
129: return minInCacheSizeWithData;
130: }
131:
132: public long getMaxCacheSizeWithData() {
133: return maxInCacheSizeWithData;
134: }
135:
136: public long getMinBtreeSizeWithData() {
137: return minInBtreeSizeWithData;
138: }
139:
140: public long getMaxBtreeSizeWithData() {
141: return maxInBtreeSizeWithData;
142: }
143:
144: public int getNLevels() {
145: return nLevels;
146: }
147:
148: public static void main(String[] args) {
149:
150: try {
151: long records = 0;
152: int keySize = 0;
153: int dataSize = -1;
154: int nodeMax = 128;
155: int density = 80;
156: long overhead = 0;
157: File measureDir = null;
158: boolean measureRandom = false;
159:
160: for (int i = 0; i < args.length; i += 1) {
161: String name = args[i];
162: String val = null;
163: if (i < args.length - 1 && !args[i + 1].startsWith("-")) {
164: i += 1;
165: val = args[i];
166: }
167: if (name.equals("-records")) {
168: if (val == null) {
169: usage("No value after -records");
170: }
171: try {
172: records = Long.parseLong(val);
173: } catch (NumberFormatException e) {
174: usage(val + " is not a number");
175: }
176: if (records <= 0) {
177: usage(val + " is not a positive integer");
178: }
179: } else if (name.equals("-key")) {
180: if (val == null) {
181: usage("No value after -key");
182: }
183: try {
184: keySize = Integer.parseInt(val);
185: } catch (NumberFormatException e) {
186: usage(val + " is not a number");
187: }
188: if (keySize <= 0) {
189: usage(val + " is not a positive integer");
190: }
191: } else if (name.equals("-data")) {
192: if (val == null) {
193: usage("No value after -data");
194: }
195: try {
196: dataSize = Integer.parseInt(val);
197: } catch (NumberFormatException e) {
198: usage(val + " is not a number");
199: }
200: if (dataSize < 0) {
201: usage(val + " is not a non-negative integer");
202: }
203: } else if (name.equals("-nodemax")) {
204: if (val == null) {
205: usage("No value after -nodemax");
206: }
207: try {
208: nodeMax = Integer.parseInt(val);
209: } catch (NumberFormatException e) {
210: usage(val + " is not a number");
211: }
212: if (nodeMax <= 0) {
213: usage(val + " is not a positive integer");
214: }
215: } else if (name.equals("-density")) {
216: if (val == null) {
217: usage("No value after -density");
218: }
219: try {
220: density = Integer.parseInt(val);
221: } catch (NumberFormatException e) {
222: usage(val + " is not a number");
223: }
224: if (density < 1 || density > 100) {
225: usage(val + " is not betwen 1 and 100");
226: }
227: } else if (name.equals("-overhead")) {
228: if (val == null) {
229: usage("No value after -overhead");
230: }
231: try {
232: overhead = Long.parseLong(val);
233: } catch (NumberFormatException e) {
234: usage(val + " is not a number");
235: }
236: if (overhead < 0) {
237: usage(val + " is not a non-negative integer");
238: }
239: } else if (name.equals("-measure")) {
240: if (val == null) {
241: usage("No value after -measure");
242: }
243: measureDir = new File(val);
244: } else if (name.equals("-measurerandom")) {
245: measureRandom = true;
246: } else {
247: usage("Unknown arg: " + name);
248: }
249: }
250:
251: if (records == 0) {
252: usage("-records not specified");
253: }
254:
255: if (keySize == 0) {
256: usage("-key not specified");
257: }
258:
259: DbCacheSize dbCacheSize = new DbCacheSize(records, keySize,
260: dataSize, nodeMax, density, overhead);
261: dbCacheSize.caclulateCacheSizes();
262: dbCacheSize.printCacheSizes(System.out);
263:
264: if (measureDir != null) {
265: measure(System.out, measureDir, records, keySize,
266: dataSize, nodeMax, measureRandom);
267: }
268: } catch (Throwable e) {
269: e.printStackTrace(System.out);
270: }
271: }
272:
273: private static void usage(String msg) {
274:
275: if (msg != null) {
276: System.out.println(msg);
277: }
278:
279: System.out
280: .println("usage:"
281: + "\njava "
282: + CmdUtil.getJavaCommand(DbCacheSize.class)
283: + "\n -records <count>"
284: + "\n # Total records (key/data pairs); required"
285: + "\n -key <bytes> "
286: + "\n # Average key bytes per record; required"
287: + "\n [-data <bytes>]"
288: + "\n # Average data bytes per record; if omitted no leaf"
289: + "\n # node sizes are included in the output"
290: + "\n [-nodemax <entries>]"
291: + "\n # Number of entries per Btree node; default: 128"
292: + "\n [-density <percentage>]"
293: + "\n # Percentage of node entries occupied; default: 80"
294: + "\n [-overhead <bytes>]"
295: + "\n # Overhead of non-Btree objects (log buffers, locks,"
296: + "\n # etc); default: 10% of total cache size"
297: + "\n [-measure <environmentHomeDirectory>]"
298: + "\n # An empty directory used to write a database to find"
299: + "\n # the actual cache size; default: do not measure"
300: + "\n [-measurerandom"
301: + "\n # With -measure insert randomly generated keys;"
302: + "\n # default: insert sequential keys");
303:
304: System.exit(2);
305: }
306:
307: private void caclulateCacheSizes() {
308: int nodeAvg = (nodeMax * density) / 100;
309: long nBinEntries = (records * nodeMax) / nodeAvg;
310: long nBinNodes = (nBinEntries + nodeMax - 1) / nodeMax;
311:
312: long nInNodes = 0;
313: long lnSize = 0;
314:
315: for (long n = nBinNodes; n > 0; n /= nodeMax) {
316: nInNodes += n;
317: nLevels += 1;
318: }
319:
320: minInBtreeSize = nInNodes
321: * calcInSize(nodeMax, nodeAvg, keySize, true);
322: maxInBtreeSize = nInNodes
323: * calcInSize(nodeMax, nodeAvg, keySize, false);
324: minInCacheSize = calculateOverhead(minInBtreeSize, overhead);
325: maxInCacheSize = calculateOverhead(maxInBtreeSize, overhead);
326:
327: if (dataSize >= 0) {
328: lnSize = records * calcLnSize(dataSize);
329: }
330:
331: maxInBtreeSizeWithData = maxInBtreeSize + lnSize;
332: maxInCacheSizeWithData = calculateOverhead(
333: maxInBtreeSizeWithData, overhead);
334: minInBtreeSizeWithData = minInBtreeSize + lnSize;
335: minInCacheSizeWithData = calculateOverhead(
336: minInBtreeSizeWithData, overhead);
337: }
338:
339: private void printCacheSizes(PrintStream out) {
340:
341: out.println("Inputs:" + " records=" + records + " keySize="
342: + keySize + " dataSize=" + dataSize + " nodeMax="
343: + nodeMax + " density=" + density + '%' + " overhead="
344: + ((overhead > 0) ? overhead : 10) + "%");
345:
346: out.println();
347: out.println(HEADER);
348: out.println(line(minInBtreeSize, minInCacheSize,
349: "Minimum, internal nodes only"));
350: out.println(line(maxInBtreeSize, maxInCacheSize,
351: "Maximum, internal nodes only"));
352: if (dataSize >= 0) {
353: out.println(line(minInBtreeSizeWithData,
354: minInCacheSizeWithData,
355: "Minimum, internal nodes and leaf nodes"));
356: out.println(line(maxInBtreeSizeWithData,
357: maxInCacheSizeWithData,
358: "Maximum, internal nodes and leaf nodes"));
359: } else {
360: out.println("\nTo get leaf node sizing specify -data");
361: }
362:
363: out.println("\nBtree levels: " + nLevels);
364: }
365:
366: private int calcInSize(int nodeMax, int nodeAvg, int keySize,
367: boolean lsnCompression) {
368:
369: /* Fixed overhead */
370: int size = MemoryBudget.IN_FIXED_OVERHEAD;
371:
372: /* Byte state array plus keys and nodes arrays */
373: size += MemoryBudget.byteArraySize(nodeMax)
374: + (nodeMax * (2 * MemoryBudget.OBJECT_ARRAY_ITEM_OVERHEAD));
375:
376: /* LSN array */
377: if (lsnCompression) {
378: size += MemoryBudget.byteArraySize(nodeMax * 2);
379: } else {
380: size += MemoryBudget.ARRAY_OVERHEAD
381: + (nodeMax * MemoryBudget.LONG_OVERHEAD);
382: }
383:
384: /* Keys for populated entries plus the identifier key */
385: size += (nodeAvg + 1) * MemoryBudget.byteArraySize(keySize);
386:
387: return size;
388: }
389:
390: private int calcLnSize(int dataSize) {
391:
392: return MemoryBudget.LN_OVERHEAD
393: + MemoryBudget.byteArraySize(dataSize);
394: }
395:
396: private long calculateOverhead(long btreeSize, long overhead) {
397: long cacheSize;
398: if (overhead == 0) {
399: cacheSize = (100 * btreeSize) / 90;
400: } else {
401: cacheSize = btreeSize + overhead;
402: }
403: return cacheSize;
404: }
405:
406: private String line(long btreeSize, long cacheSize, String comment) {
407:
408: StringBuffer buf = new StringBuffer(100);
409:
410: column(buf, INT_FORMAT.format(cacheSize));
411: column(buf, INT_FORMAT.format(btreeSize));
412: column(buf, comment);
413:
414: return buf.toString();
415: }
416:
417: private void column(StringBuffer buf, String str) {
418:
419: int start = buf.length();
420:
421: while (buf.length() - start + str.length() < COLUMN_WIDTH) {
422: buf.append(' ');
423: }
424:
425: buf.append(str);
426:
427: for (int i = 0; i < COLUMN_SEPARATOR; i += 1) {
428: buf.append(' ');
429: }
430: }
431:
432: private static void measure(PrintStream out, File dir,
433: long records, int keySize, int dataSize, int nodeMax,
434: boolean randomKeys) throws DatabaseException {
435:
436: String[] fileNames = dir.list();
437: if (fileNames != null && fileNames.length > 0) {
438: usage("Directory is not empty: " + dir);
439: }
440:
441: Environment env = openEnvironment(dir, true);
442: Database db = openDatabase(env, nodeMax, true);
443:
444: try {
445: out
446: .println("\nMeasuring with cache size: "
447: + INT_FORMAT.format(env.getConfig()
448: .getCacheSize()));
449: insertRecords(out, env, db, records, keySize, dataSize,
450: randomKeys);
451: printStats(out, env,
452: "Stats for internal and leaf nodes (after insert)");
453:
454: db.close();
455: env.close();
456: env = openEnvironment(dir, false);
457: db = openDatabase(env, nodeMax, false);
458:
459: out
460: .println("\nPreloading with cache size: "
461: + INT_FORMAT.format(env.getConfig()
462: .getCacheSize()));
463: preloadRecords(out, db);
464: printStats(out, env,
465: "Stats for internal nodes only (after preload)");
466: } finally {
467: try {
468: db.close();
469: env.close();
470: } catch (Exception e) {
471: out.println("During close: " + e);
472: }
473: }
474: }
475:
476: private static Environment openEnvironment(File dir,
477: boolean allowCreate) throws DatabaseException {
478:
479: EnvironmentConfig envConfig = new EnvironmentConfig();
480: envConfig.setAllowCreate(allowCreate);
481: envConfig.setCachePercent(90);
482: return new Environment(dir, envConfig);
483: }
484:
485: private static Database openDatabase(Environment env, int nodeMax,
486: boolean allowCreate) throws DatabaseException {
487:
488: DatabaseConfig dbConfig = new DatabaseConfig();
489: dbConfig.setAllowCreate(allowCreate);
490: dbConfig.setNodeMaxEntries(nodeMax);
491: return env.openDatabase(null, "foo", dbConfig);
492: }
493:
494: private static void insertRecords(PrintStream out, Environment env,
495: Database db, long records, int keySize, int dataSize,
496: boolean randomKeys) throws DatabaseException {
497:
498: DatabaseEntry key = new DatabaseEntry();
499: DatabaseEntry data = new DatabaseEntry(new byte[dataSize]);
500: BigInteger bigInt = BigInteger.ZERO;
501: Random rnd = new Random(123);
502:
503: for (int i = 0; i < records; i += 1) {
504:
505: if (randomKeys) {
506: byte[] a = new byte[keySize];
507: rnd.nextBytes(a);
508: key.setData(a);
509: } else {
510: bigInt = bigInt.add(BigInteger.ONE);
511: byte[] a = bigInt.toByteArray();
512: if (a.length < keySize) {
513: byte[] a2 = new byte[keySize];
514: System.arraycopy(a, 0, a2, a2.length - a.length,
515: a.length);
516: a = a2;
517: } else if (a.length > keySize) {
518: out.println("*** Key doesn't fit value=" + bigInt
519: + " byte length=" + a.length);
520: return;
521: }
522: key.setData(a);
523: }
524:
525: OperationStatus status = db.putNoOverwrite(null, key, data);
526: if (status == OperationStatus.KEYEXIST && randomKeys) {
527: i -= 1;
528: out.println("Random key already exists -- retrying");
529: continue;
530: }
531: if (status != OperationStatus.SUCCESS) {
532: out.println("*** " + status);
533: return;
534: }
535:
536: if (i % 10000 == 0) {
537: EnvironmentStats stats = env.getStats(null);
538: if (stats.getNNodesScanned() > 0) {
539: out
540: .println("*** Ran out of cache memory at record "
541: + i
542: + " -- try increasing the Java heap size ***");
543: return;
544: }
545: out.print(".");
546: out.flush();
547: }
548: }
549: }
550:
551: private static void preloadRecords(final PrintStream out,
552: final Database db) throws DatabaseException {
553:
554: Thread thread = new Thread() {
555: public void run() {
556: while (true) {
557: try {
558: out.print(".");
559: out.flush();
560: Thread.sleep(5 * 1000);
561: } catch (InterruptedException e) {
562: break;
563: }
564: }
565: }
566: };
567: thread.start();
568: db.preload(PreloadConfig.DEFAULT);
569: thread.interrupt();
570: try {
571: thread.join();
572: } catch (InterruptedException e) {
573: e.printStackTrace(out);
574: }
575: }
576:
577: private static void printStats(PrintStream out, Environment env,
578: String msg) throws DatabaseException {
579:
580: out.println();
581: out.println(msg + ':');
582:
583: EnvironmentStats stats = env.getStats(null);
584:
585: out.println("CacheSize="
586: + INT_FORMAT.format(stats.getCacheTotalBytes())
587: + " BtreeSize="
588: + INT_FORMAT.format(stats.getCacheDataBytes())
589: + " NCacheMiss="
590: + INT_FORMAT.format(stats.getNCacheMiss()));
591:
592: if (stats.getNNodesScanned() > 0) {
593: out.println("*** All records did not fit in the cache ***");
594: }
595: }
596: }
|