001 /* 002 * JBoss, Home of Professional Open Source. 003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors 004 * as indicated by the @author tags. See the copyright.txt file in the 005 * distribution for a full listing of individual contributors. 006 * 007 * This is free software; you can redistribute it and/or modify it 008 * under the terms of the GNU Lesser General Public License as 009 * published by the Free Software Foundation; either version 2.1 of 010 * the License, or (at your option) any later version. 011 * 012 * This software is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * You should have received a copy of the GNU Lesser General Public 018 * License along with this software; if not, write to the Free 019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 021 */ 022 package org.jboss.dna.common.stats; 023 024 import java.math.BigDecimal; 025 import java.text.DecimalFormat; 026 import java.util.ArrayList; 027 import java.util.Arrays; 028 import java.util.Collections; 029 import java.util.Iterator; 030 import java.util.LinkedList; 031 import java.util.List; 032 import org.jboss.dna.common.math.MathOperations; 033 import org.jboss.dna.common.util.StringUtil; 034 035 public class Histogram<T extends Number> { 036 037 public static final int DEFAULT_BUCKET_COUNT = 10; 038 public static final int DEFAULT_SIGNIFICANT_FIGURES = 4; 039 040 protected final MathOperations<T> math; 041 protected final List<T> values; 042 private int bucketCount = DEFAULT_BUCKET_COUNT; 043 private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES; 044 private BigDecimal bucketWidth; 045 private LinkedList<Bucket> buckets; 046 private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy(); 047 private BucketingStrategy bucketingStrategy = actualValueStrategy; 048 049 public Histogram( MathOperations<T> operations, List<T> values ) { 050 this.math = operations; 051 this.values = new LinkedList<T>(values); 052 this.buckets = new LinkedList<Bucket>(); 053 this.bucketWidth = null; 054 // Sort the data using natural order ... 055 Collections.sort(this.values, this.math.getComparator()); 056 } 057 058 public Histogram( MathOperations<T> operations, T... values ) { 059 this(operations, Arrays.asList(values)); 060 } 061 062 public BucketingStrategy getStrategy() { 063 return this.bucketingStrategy; 064 } 065 066 /** 067 * @return math 068 */ 069 public MathOperations<T> getMathOperations() { 070 return this.math; 071 } 072 073 /** 074 * Set the histogram to use the standard deviation to determine the bucket sizes. 075 * @param median 076 * @param standardDeviation 077 * @param sigma 078 */ 079 public void setStrategy( double median, double standardDeviation, int sigma ) { 080 this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma); 081 this.bucketWidth = null; 082 } 083 084 /** 085 * Set the histogram to use the supplied minimum and maximum values to determine the bucket size. 086 * @param minimum 087 * @param maximum 088 */ 089 public void setStrategy( T minimum, T maximum ) { 090 this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum); 091 this.bucketWidth = null; 092 } 093 094 /** 095 * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes. 096 */ 097 public void setStrategyToDefault() { 098 this.bucketingStrategy = this.actualValueStrategy; 099 this.bucketWidth = null; 100 } 101 102 public int getSignificantFigures() { 103 return significantFigures; 104 } 105 106 /** 107 * Set the number of significant figures used in the calculation of the bucket widths. 108 * @param significantFigures the number of significant figures for the bucket widths 109 * @return this histogram, useful for method-chaining 110 * @see #DEFAULT_SIGNIFICANT_FIGURES 111 */ 112 public Histogram<T> setSignificantFigures( int significantFigures ) { 113 if (significantFigures != this.significantFigures) { 114 this.significantFigures = significantFigures; 115 this.bucketWidth = null; 116 this.buckets.clear(); 117 } 118 return this; 119 } 120 121 /** 122 * Return the number of buckets in this histogram. 123 * @return the number of buckets. 124 */ 125 public int getBucketCount() { 126 return bucketCount; 127 } 128 129 /** 130 * Set the number of buckets that this histogram will use. 131 * @param count the number of buckets 132 * @return this histogram, useful for method-chaining 133 * @see #DEFAULT_BUCKET_COUNT 134 */ 135 public Histogram<T> setBucketCount( int count ) { 136 if (count != this.bucketCount) { 137 this.bucketCount = count; 138 this.bucketWidth = null; 139 this.buckets.clear(); 140 } 141 return this; 142 } 143 144 /** 145 * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated. 146 * The resulting list should not be modified. 147 * @return the histogram buckets. 148 */ 149 public List<Bucket> getBuckets() { 150 compute(); 151 return this.buckets; 152 } 153 154 protected void compute() { 155 // Only compute if there is not already a histogram ... 156 if (this.bucketWidth != null) return; 157 158 // Find the lower and upper bounds of the histogram using the strategy ... 159 T lowerBound = this.bucketingStrategy.getLowerBound(); 160 T upperBound = this.bucketingStrategy.getUpperBound(); 161 162 // Find the actual minimum and maximum values ... 163 T actualMinimum = this.actualValueStrategy.getLowerBound(); 164 T actualMaximum = this.actualValueStrategy.getUpperBound(); 165 166 // Create the buckets ... 167 List<T> boundaries = getBucketBoundaries(this.math, lowerBound, upperBound, actualMinimum, actualMaximum, this.bucketCount, this.significantFigures); 168 this.buckets.clear(); 169 int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1; 170 for (int i = 0; i != numBuckets; ++i) { 171 this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1))); 172 } 173 174 // Create the histogram by adding values to each range ... 175 Iterator<Bucket> intervalIterator = this.buckets.iterator(); 176 Bucket currentInterval = null; 177 for (T value : this.values) { 178 while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) { 179 if (!intervalIterator.hasNext()) break; 180 currentInterval = intervalIterator.next(); 181 } 182 if (currentInterval != null) currentInterval.addValue(value); 183 } 184 } 185 186 /** 187 * Return the total number of values that have gone into this histogram. 188 * @return the total number of values 189 * @see Bucket#getPercentageOfValues() 190 */ 191 public long getTotalNumberOfValues() { 192 return this.values.size(); 193 } 194 195 protected float getMaximumPercentage() { 196 float maxPercentage = 0.0f; 197 for (Bucket bucket : this.buckets) { 198 maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues()); 199 } 200 return maxPercentage; 201 } 202 203 protected long getMaximumCount() { 204 long maxCount = 0l; 205 for (Bucket bucket : this.buckets) { 206 maxCount = Math.max(maxCount, bucket.getNumberOfValues()); 207 } 208 return maxCount; 209 } 210 211 /** 212 * Generate a textual (horizontal) bar graph of this histogram. 213 * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts 214 * @return the strings that make up the histogram 215 */ 216 public List<String> getTextGraph( int maxBarLength ) { 217 compute(); 218 if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount(); 219 final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength / getMaximumPercentage(); 220 final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent); 221 List<String> result = new LinkedList<String>(); 222 // First calculate the labels and the max length ... 223 int maxLowerBoundLength = 0; 224 int maxUpperBoundLength = 0; 225 for (Bucket bucket : this.buckets) { 226 maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength); 227 maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength); 228 } 229 230 // Create the header ... 231 int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1; 232 int barWidth = maxBarLength + 20; 233 result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution"); 234 result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth)); 235 for (Bucket bucket : this.buckets) { 236 float percent = bucket.getPercentageOfValues(); 237 long number = bucket.getNumberOfValues(); 238 StringBuilder sb = new StringBuilder(); 239 sb.append("["); 240 sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' ')); 241 sb.append(" - "); 242 sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' ')); 243 sb.append("] "); 244 int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0); 245 if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets 246 sb.append(fullLengthBar.substring(0, barLength)); 247 if (number != 0) { 248 sb.append(" "); 249 sb.append(number); 250 sb.append(" ("); 251 sb.append(new DecimalFormat("###.#").format(percent)); 252 sb.append("%)"); 253 } 254 result.add(sb.toString()); 255 } 256 return result; 257 } 258 259 protected static <T> List<T> getBucketBoundaries( MathOperations<T> math, T lowerBound, T upperBound, T actualMinimum, T actualMaximum, int bucketCount, int bucketWidthSigFigs ) { 260 lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound; 261 upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound; 262 if (math.compare(lowerBound, upperBound) == 0) { 263 List<T> boundaries = new ArrayList<T>(); 264 boundaries.add(lowerBound); 265 boundaries.add(upperBound); 266 return boundaries; 267 } 268 final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0; 269 final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0; 270 if (extraLowerBucketNeeded) --bucketCount; 271 if (extraUpperBucketNeeded) --bucketCount; 272 273 // Compute the delta between the lower and upper bound ... 274 T totalWidth = math.subtract(upperBound, lowerBound); 275 int totalWidthScale = math.getExponentInScientificNotation(totalWidth); 276 277 // Modify the lower bound by rounding down to the next lower meaningful value, 278 // using the scale of the totalWidth to determine how to round down. 279 T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale); 280 T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale); 281 282 // Create the ranges ... 283 double finalLowerBound = math.doubleValue(roundedLowerBound); 284 double finalUpperBound = math.doubleValue(roundedUpperBound); 285 double finalBucketCount = bucketCount; 286 double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount; 287 288 // DoubleOperations doubleOps = new DoubleOperations(); 289 // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs); 290 291 List<T> boundaries = new ArrayList<T>(); 292 if (bucketWidth > 0.0d) { 293 if (extraLowerBucketNeeded) boundaries.add(actualMinimum); 294 double nextBoundary = finalLowerBound; 295 for (int i = 0; i != bucketCount; ++i) { 296 boundaries.add(math.create(nextBoundary)); 297 nextBoundary = nextBoundary + bucketWidth; 298 // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs ); 299 } 300 boundaries.add(roundedUpperBound); 301 if (extraUpperBucketNeeded) boundaries.add(actualMaximum); 302 } 303 return boundaries; 304 } 305 306 /** 307 * Represents a bucket in a histogram. 308 */ 309 public class Bucket implements Comparable<Bucket> { 310 311 private final T lowerBound; 312 private final T upperBound; 313 private final T width; 314 private long numValues; 315 316 protected Bucket( T lowerBound, T upperBound ) { 317 this.lowerBound = lowerBound; 318 this.upperBound = upperBound; 319 this.width = Histogram.this.math.subtract(upperBound, lowerBound); 320 } 321 322 /** 323 * Get the lower bound of this bucket. 324 * @return the lower bound 325 */ 326 public T getLowerBound() { 327 return lowerBound; 328 } 329 330 /** 331 * Get the upper bound of this bucket. 332 * @return the upper bound 333 */ 334 public T getUpperBound() { 335 return upperBound; 336 } 337 338 /** 339 * Get the width of this bucket. 340 * @return the width 341 */ 342 public T getWidth() { 343 return this.width; 344 } 345 346 /** 347 * Return the percentage of values in the histogram that appear in this bucket. 348 * @return the percentage of all values in the histogram that appear in this bucket. 349 */ 350 public float getPercentageOfValues() { 351 float total = Histogram.this.getTotalNumberOfValues(); 352 if (total == 0.0f) return 0.0f; 353 float numValuesFloat = this.numValues; 354 return 100.0f * numValuesFloat / total; 355 } 356 357 /** 358 * Add a value to this bucket 359 * @param value 360 */ 361 protected void addValue( T value ) { 362 ++this.numValues; 363 } 364 365 /** 366 * Get the number of values in this bucket. 367 * @return the number of values 368 */ 369 public long getNumberOfValues() { 370 return this.numValues; 371 } 372 373 /** 374 * Check whether the value fits in this bucket. 375 * @param value the value to check 376 * @param isLast 377 * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later 378 * bucket 379 */ 380 public int checkValue( T value, boolean isLast ) { 381 if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1; 382 if (isLast) { 383 if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1; 384 } else { 385 if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1; 386 } 387 return 0; 388 } 389 390 public int compareTo( Bucket that ) { 391 // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ... 392 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1; 393 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1; 394 // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower 395 // bound ... 396 if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1; 397 if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1; 398 return 0; 399 } 400 401 protected Class<T> getNumberClass() { 402 return Histogram.this.math.getOperandClass(); 403 } 404 405 @Override 406 public boolean equals( Object obj ) { 407 if (obj != null && obj.getClass() == this.getClass()) { 408 Bucket that = (Bucket)obj; 409 if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) { 410 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false; 411 if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false; 412 if (Histogram.this.math.compare(this.width, that.width) != 0) return false; 413 return true; 414 } 415 } 416 return false; 417 } 418 419 @Override 420 public String toString() { 421 return "[" + this.lowerBound + "," + this.upperBound + ")"; 422 } 423 424 } 425 426 public abstract class BucketingStrategy { 427 428 public List<T> getValues() { 429 return Histogram.this.values; 430 } 431 432 public abstract T getLowerBound(); 433 434 public abstract T getUpperBound(); 435 } 436 437 public class DefaultBucketingStrategy extends BucketingStrategy { 438 439 @Override 440 public T getLowerBound() { 441 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue(); 442 return getValues().get(0); 443 } 444 445 @Override 446 public T getUpperBound() { 447 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue(); 448 return getValues().get(getValues().size() - 1); 449 } 450 } 451 452 public class ExplicitBucketingStrategy extends BucketingStrategy { 453 454 private final T lowerBound; 455 private final T upperBound; 456 457 protected ExplicitBucketingStrategy( T lowerBound, T upperBound ) { 458 this.lowerBound = lowerBound; 459 this.upperBound = upperBound; 460 } 461 462 @Override 463 public T getLowerBound() { 464 return this.lowerBound; 465 } 466 467 @Override 468 public T getUpperBound() { 469 return this.upperBound; 470 } 471 } 472 473 public class StandardDeviationBucketingStrategy extends BucketingStrategy { 474 475 private final double median; 476 private final double standardDeviation; 477 private final int numberOfDeviationsAboveAndBelow; 478 479 protected StandardDeviationBucketingStrategy( double median, double standardDeviation, int numDeviationsAboveAndBelow ) { 480 this.median = median; 481 this.standardDeviation = Math.abs(standardDeviation); 482 this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow); 483 } 484 485 @Override 486 public T getLowerBound() { 487 double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow); 488 return Histogram.this.math.create(lower); 489 } 490 491 @Override 492 public T getUpperBound() { 493 double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow); 494 return Histogram.this.math.create(upper); 495 } 496 } 497 498 }