001    /*
002     * JBoss, Home of Professional Open Source.
003     * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004     * as indicated by the @author tags. See the copyright.txt file in the
005     * distribution for a full listing of individual contributors. 
006     *
007     * This is free software; you can redistribute it and/or modify it
008     * under the terms of the GNU Lesser General Public License as
009     * published by the Free Software Foundation; either version 2.1 of
010     * the License, or (at your option) any later version.
011     *
012     * This software is distributed in the hope that it will be useful,
013     * but WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015     * Lesser General Public License for more details.
016     *
017     * You should have received a copy of the GNU Lesser General Public
018     * License along with this software; if not, write to the Free
019     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021     */
022    package org.jboss.dna.common.stats;
023    
024    import java.math.BigDecimal;
025    import java.text.DecimalFormat;
026    import java.util.ArrayList;
027    import java.util.Arrays;
028    import java.util.Collections;
029    import java.util.Iterator;
030    import java.util.LinkedList;
031    import java.util.List;
032    import org.jboss.dna.common.math.MathOperations;
033    import org.jboss.dna.common.util.StringUtil;
034    
035    public class Histogram<T extends Number> {
036    
037        public static final int DEFAULT_BUCKET_COUNT = 10;
038        public static final int DEFAULT_SIGNIFICANT_FIGURES = 4;
039    
040        protected final MathOperations<T> math;
041        protected final List<T> values;
042        private int bucketCount = DEFAULT_BUCKET_COUNT;
043        private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES;
044        private BigDecimal bucketWidth;
045        private LinkedList<Bucket> buckets;
046        private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy();
047        private BucketingStrategy bucketingStrategy = actualValueStrategy;
048    
049        public Histogram( MathOperations<T> operations, List<T> values ) {
050            this.math = operations;
051            this.values = new LinkedList<T>(values);
052            this.buckets = new LinkedList<Bucket>();
053            this.bucketWidth = null;
054            // Sort the data using natural order ...
055            Collections.sort(this.values, this.math.getComparator());
056        }
057    
058        public Histogram( MathOperations<T> operations, T... values ) {
059            this(operations, Arrays.asList(values));
060        }
061    
062        public BucketingStrategy getStrategy() {
063            return this.bucketingStrategy;
064        }
065    
066        /**
067         * @return math
068         */
069        public MathOperations<T> getMathOperations() {
070            return this.math;
071        }
072    
073        /**
074         * Set the histogram to use the standard deviation to determine the bucket sizes.
075         * @param median
076         * @param standardDeviation
077         * @param sigma
078         */
079        public void setStrategy( double median, double standardDeviation, int sigma ) {
080            this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma);
081            this.bucketWidth = null;
082        }
083    
084        /**
085         * Set the histogram to use the supplied minimum and maximum values to determine the bucket size.
086         * @param minimum
087         * @param maximum
088         */
089        public void setStrategy( T minimum, T maximum ) {
090            this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum);
091            this.bucketWidth = null;
092        }
093    
094        /**
095         * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes.
096         */
097        public void setStrategyToDefault() {
098            this.bucketingStrategy = this.actualValueStrategy;
099            this.bucketWidth = null;
100        }
101    
102        public int getSignificantFigures() {
103            return significantFigures;
104        }
105    
106        /**
107         * Set the number of significant figures used in the calculation of the bucket widths.
108         * @param significantFigures the number of significant figures for the bucket widths
109         * @return this histogram, useful for method-chaining
110         * @see #DEFAULT_SIGNIFICANT_FIGURES
111         */
112        public Histogram<T> setSignificantFigures( int significantFigures ) {
113            if (significantFigures != this.significantFigures) {
114                this.significantFigures = significantFigures;
115                this.bucketWidth = null;
116                this.buckets.clear();
117            }
118            return this;
119        }
120    
121        /**
122         * Return the number of buckets in this histogram.
123         * @return the number of buckets.
124         */
125        public int getBucketCount() {
126            return bucketCount;
127        }
128    
129        /**
130         * Set the number of buckets that this histogram will use.
131         * @param count the number of buckets
132         * @return this histogram, useful for method-chaining
133         * @see #DEFAULT_BUCKET_COUNT
134         */
135        public Histogram<T> setBucketCount( int count ) {
136            if (count != this.bucketCount) {
137                this.bucketCount = count;
138                this.bucketWidth = null;
139                this.buckets.clear();
140            }
141            return this;
142        }
143    
144        /**
145         * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated.
146         * The resulting list should not be modified.
147         * @return the histogram buckets.
148         */
149        public List<Bucket> getBuckets() {
150            compute();
151            return this.buckets;
152        }
153    
154        protected void compute() {
155            // Only compute if there is not already a histogram ...
156            if (this.bucketWidth != null) return;
157    
158            // Find the lower and upper bounds of the histogram using the strategy ...
159            T lowerBound = this.bucketingStrategy.getLowerBound();
160            T upperBound = this.bucketingStrategy.getUpperBound();
161    
162            // Find the actual minimum and maximum values ...
163            T actualMinimum = this.actualValueStrategy.getLowerBound();
164            T actualMaximum = this.actualValueStrategy.getUpperBound();
165    
166            // Create the buckets ...
167            List<T> boundaries = getBucketBoundaries(this.math, lowerBound, upperBound, actualMinimum, actualMaximum, this.bucketCount, this.significantFigures);
168            this.buckets.clear();
169            int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1;
170            for (int i = 0; i != numBuckets; ++i) {
171                this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1)));
172            }
173    
174            // Create the histogram by adding values to each range ...
175            Iterator<Bucket> intervalIterator = this.buckets.iterator();
176            Bucket currentInterval = null;
177            for (T value : this.values) {
178                while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) {
179                    if (!intervalIterator.hasNext()) break;
180                    currentInterval = intervalIterator.next();
181                }
182                if (currentInterval != null) currentInterval.addValue(value);
183            }
184        }
185    
186        /**
187         * Return the total number of values that have gone into this histogram.
188         * @return the total number of values
189         * @see Bucket#getPercentageOfValues()
190         */
191        public long getTotalNumberOfValues() {
192            return this.values.size();
193        }
194    
195        protected float getMaximumPercentage() {
196            float maxPercentage = 0.0f;
197            for (Bucket bucket : this.buckets) {
198                maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues());
199            }
200            return maxPercentage;
201        }
202    
203        protected long getMaximumCount() {
204            long maxCount = 0l;
205            for (Bucket bucket : this.buckets) {
206                maxCount = Math.max(maxCount, bucket.getNumberOfValues());
207            }
208            return maxCount;
209        }
210    
211        /**
212         * Generate a textual (horizontal) bar graph of this histogram.
213         * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts
214         * @return the strings that make up the histogram
215         */
216        public List<String> getTextGraph( int maxBarLength ) {
217            compute();
218            if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount();
219            final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength / getMaximumPercentage();
220            final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent);
221            List<String> result = new LinkedList<String>();
222            // First calculate the labels and the max length ...
223            int maxLowerBoundLength = 0;
224            int maxUpperBoundLength = 0;
225            for (Bucket bucket : this.buckets) {
226                maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength);
227                maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength);
228            }
229    
230            // Create the header ...
231            int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1;
232            int barWidth = maxBarLength + 20;
233            result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution");
234            result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth));
235            for (Bucket bucket : this.buckets) {
236                float percent = bucket.getPercentageOfValues();
237                long number = bucket.getNumberOfValues();
238                StringBuilder sb = new StringBuilder();
239                sb.append("[");
240                sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' '));
241                sb.append(" - ");
242                sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' '));
243                sb.append("] ");
244                int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0);
245                if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets
246                sb.append(fullLengthBar.substring(0, barLength));
247                if (number != 0) {
248                    sb.append(" ");
249                    sb.append(number);
250                    sb.append(" (");
251                    sb.append(new DecimalFormat("###.#").format(percent));
252                    sb.append("%)");
253                }
254                result.add(sb.toString());
255            }
256            return result;
257        }
258    
259        protected static <T> List<T> getBucketBoundaries( MathOperations<T> math, T lowerBound, T upperBound, T actualMinimum, T actualMaximum, int bucketCount, int bucketWidthSigFigs ) {
260            lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound;
261            upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound;
262            if (math.compare(lowerBound, upperBound) == 0) {
263                List<T> boundaries = new ArrayList<T>();
264                boundaries.add(lowerBound);
265                boundaries.add(upperBound);
266                return boundaries;
267            }
268            final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0;
269            final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0;
270            if (extraLowerBucketNeeded) --bucketCount;
271            if (extraUpperBucketNeeded) --bucketCount;
272    
273            // Compute the delta between the lower and upper bound ...
274            T totalWidth = math.subtract(upperBound, lowerBound);
275            int totalWidthScale = math.getExponentInScientificNotation(totalWidth);
276    
277            // Modify the lower bound by rounding down to the next lower meaningful value,
278            // using the scale of the totalWidth to determine how to round down.
279            T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale);
280            T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale);
281    
282            // Create the ranges ...
283            double finalLowerBound = math.doubleValue(roundedLowerBound);
284            double finalUpperBound = math.doubleValue(roundedUpperBound);
285            double finalBucketCount = bucketCount;
286            double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount;
287    
288            // DoubleOperations doubleOps = new DoubleOperations();
289            // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs);
290    
291            List<T> boundaries = new ArrayList<T>();
292            if (bucketWidth > 0.0d) {
293                if (extraLowerBucketNeeded) boundaries.add(actualMinimum);
294                double nextBoundary = finalLowerBound;
295                for (int i = 0; i != bucketCount; ++i) {
296                    boundaries.add(math.create(nextBoundary));
297                    nextBoundary = nextBoundary + bucketWidth;
298                    // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs );
299                }
300                boundaries.add(roundedUpperBound);
301                if (extraUpperBucketNeeded) boundaries.add(actualMaximum);
302            }
303            return boundaries;
304        }
305    
306        /**
307         * Represents a bucket in a histogram.
308         */
309        public class Bucket implements Comparable<Bucket> {
310    
311            private final T lowerBound;
312            private final T upperBound;
313            private final T width;
314            private long numValues;
315    
316            protected Bucket( T lowerBound, T upperBound ) {
317                this.lowerBound = lowerBound;
318                this.upperBound = upperBound;
319                this.width = Histogram.this.math.subtract(upperBound, lowerBound);
320            }
321    
322            /**
323             * Get the lower bound of this bucket.
324             * @return the lower bound
325             */
326            public T getLowerBound() {
327                return lowerBound;
328            }
329    
330            /**
331             * Get the upper bound of this bucket.
332             * @return the upper bound
333             */
334            public T getUpperBound() {
335                return upperBound;
336            }
337    
338            /**
339             * Get the width of this bucket.
340             * @return the width
341             */
342            public T getWidth() {
343                return this.width;
344            }
345    
346            /**
347             * Return the percentage of values in the histogram that appear in this bucket.
348             * @return the percentage of all values in the histogram that appear in this bucket.
349             */
350            public float getPercentageOfValues() {
351                float total = Histogram.this.getTotalNumberOfValues();
352                if (total == 0.0f) return 0.0f;
353                float numValuesFloat = this.numValues;
354                return 100.0f * numValuesFloat / total;
355            }
356    
357            /**
358             * Add a value to this bucket
359             * @param value
360             */
361            protected void addValue( T value ) {
362                ++this.numValues;
363            }
364    
365            /**
366             * Get the number of values in this bucket.
367             * @return the number of values
368             */
369            public long getNumberOfValues() {
370                return this.numValues;
371            }
372    
373            /**
374             * Check whether the value fits in this bucket.
375             * @param value the value to check
376             * @param isLast
377             * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later
378             * bucket
379             */
380            public int checkValue( T value, boolean isLast ) {
381                if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1;
382                if (isLast) {
383                    if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1;
384                } else {
385                    if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1;
386                }
387                return 0;
388            }
389    
390            public int compareTo( Bucket that ) {
391                // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ...
392                if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1;
393                if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1;
394                // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower
395                // bound ...
396                if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1;
397                if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1;
398                return 0;
399            }
400    
401            protected Class<T> getNumberClass() {
402                return Histogram.this.math.getOperandClass();
403            }
404    
405            @Override
406            public boolean equals( Object obj ) {
407                if (obj != null && obj.getClass() == this.getClass()) {
408                    Bucket that = (Bucket)obj;
409                    if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) {
410                        if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false;
411                        if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false;
412                        if (Histogram.this.math.compare(this.width, that.width) != 0) return false;
413                        return true;
414                    }
415                }
416                return false;
417            }
418    
419            @Override
420            public String toString() {
421                return "[" + this.lowerBound + "," + this.upperBound + ")";
422            }
423    
424        }
425    
426        public abstract class BucketingStrategy {
427    
428            public List<T> getValues() {
429                return Histogram.this.values;
430            }
431    
432            public abstract T getLowerBound();
433    
434            public abstract T getUpperBound();
435        }
436    
437        public class DefaultBucketingStrategy extends BucketingStrategy {
438    
439            @Override
440            public T getLowerBound() {
441                if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
442                return getValues().get(0);
443            }
444    
445            @Override
446            public T getUpperBound() {
447                if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
448                return getValues().get(getValues().size() - 1);
449            }
450        }
451    
452        public class ExplicitBucketingStrategy extends BucketingStrategy {
453    
454            private final T lowerBound;
455            private final T upperBound;
456    
457            protected ExplicitBucketingStrategy( T lowerBound, T upperBound ) {
458                this.lowerBound = lowerBound;
459                this.upperBound = upperBound;
460            }
461    
462            @Override
463            public T getLowerBound() {
464                return this.lowerBound;
465            }
466    
467            @Override
468            public T getUpperBound() {
469                return this.upperBound;
470            }
471        }
472    
473        public class StandardDeviationBucketingStrategy extends BucketingStrategy {
474    
475            private final double median;
476            private final double standardDeviation;
477            private final int numberOfDeviationsAboveAndBelow;
478    
479            protected StandardDeviationBucketingStrategy( double median, double standardDeviation, int numDeviationsAboveAndBelow ) {
480                this.median = median;
481                this.standardDeviation = Math.abs(standardDeviation);
482                this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow);
483            }
484    
485            @Override
486            public T getLowerBound() {
487                double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow);
488                return Histogram.this.math.create(lower);
489            }
490    
491            @Override
492            public T getUpperBound() {
493                double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow);
494                return Histogram.this.math.create(upper);
495            }
496        }
497    
498    }