001    /*
002     * JBoss, Home of Professional Open Source.
003     * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004     * as indicated by the @author tags. See the copyright.txt file in the
005     * distribution for a full listing of individual contributors. 
006     *
007     * This is free software; you can redistribute it and/or modify it
008     * under the terms of the GNU Lesser General Public License as
009     * published by the Free Software Foundation; either version 2.1 of
010     * the License, or (at your option) any later version.
011     *
012     * This software is distributed in the hope that it will be useful,
013     * but WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015     * Lesser General Public License for more details.
016     *
017     * You should have received a copy of the GNU Lesser General Public
018     * License along with this software; if not, write to the Free
019     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021     */
022    package org.jboss.dna.common.stats;
023    
024    import java.util.Collections;
025    import java.util.Comparator;
026    import java.util.LinkedList;
027    import java.util.List;
028    import java.util.concurrent.locks.Lock;
029    import net.jcip.annotations.ThreadSafe;
030    import org.jboss.dna.common.math.MathOperations;
031    import org.jboss.dna.common.text.Inflector;
032    import org.jboss.dna.common.util.StringUtil;
033    
034    /**
035     * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the
036     * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)},
037     * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the
038     * {@link #getHistogram() histogram} of the values.
039     * <p>
040     * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff
041     * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the
042     * standard deviation, and S is a variable used in the calculation of sigma:
043     * 
044     * <pre>
045     *   M(1) = x(1)
046     *   S(1) = 0
047     *   M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
048     *   S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k))
049     * </pre>
050     * 
051     * Then, the standard deviation for n values in x is
052     * 
053     * <pre>
054     * sigma = sqrt(S(n) / n)
055     * </pre>
056     * 
057     * </p>
058     * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower)
059     * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated.
060     * This fact should be kept in mind when performing statistics on large numbers of values.
061     * </p>
062     * <p>
063     * This class is threadsafe.
064     * </p>
065     * @param <T> the number type for these statistics
066     */
067    @ThreadSafe
068    public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> {
069    
070        private T median;
071        private Double medianValue;
072        private double s = 0.0d; // used in the calculation of standard deviation (sigma)
073        private double sigma = 0.0d;
074        private final List<T> values = new LinkedList<T>();
075        private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values);
076        private Histogram<T> histogram;
077    
078        public DetailedStatistics( MathOperations<T> operations ) {
079            super(operations);
080            this.medianValue = 0.0d;
081            this.median = this.math.createZeroValue();
082        }
083    
084        /**
085         * Get the values that have been recorded in these statistics. The contents of this list may change if new values are
086         * {@link #add(Number) added} in another thread.
087         * @return the unmodifiable collection of values, in insertion order
088         */
089        public List<T> getValues() {
090            return this.unmodifiableValues;
091        }
092    
093        @Override
094        protected void doAddValue( T value ) {
095            if (value == null) {
096                return;
097            }
098            double previousMean = this.getMeanValue();
099            super.doAddValue(value);
100            this.values.add(value);
101            this.medianValue = null;
102    
103            // Calculate the mean and standard deviation ...
104            int count = getCount();
105            if (count == 1) {
106                this.s = 0.0d;
107                this.sigma = 0.0d;
108            } else {
109                double dValue = value.doubleValue();
110                double dCount = count;
111                // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
112                double meanValue = previousMean + ((dValue - previousMean) / dCount);
113                // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) )
114                this.s = this.s + (dValue - previousMean) * (dValue - meanValue);
115                // sigma = sqrt( S(n) / (n-1) )
116                this.sigma = Math.sqrt(this.s / dCount);
117            }
118        }
119    
120        /**
121         * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if
122         * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }.
123         * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0
124         */
125        public T getMedian() {
126            getMedianValue();
127            return this.median;
128        }
129    
130        /**
131         * Return the median value.
132         * @return the median value, or 0.0 if the {@link #getCount() count} is 0
133         * @see #getMedian()
134         */
135        public double getMedianValue() {
136            Lock lock = this.getLock().writeLock();
137            try {
138                lock.lock();
139                int count = this.values.size();
140                if (count == 0) {
141                    return 0.0d;
142                }
143                if (this.medianValue == null) {
144                    // Sort the values in numerical order..
145                    Comparator<T> comparator = this.math.getComparator();
146                    Collections.sort(this.values, comparator);
147                    this.medianValue = 0.0d;
148                    // If there is only one value, then the median is that value ...
149                    if (count == 1) {
150                        this.medianValue = this.values.get(0).doubleValue();
151                    }
152                    // If there is an odd number of values, find value that is in the middle ..
153                    else if (count % 2 != 0) {
154                        this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue();
155                    }
156                    // Otherwise, there is an even number of values, so find the average of the middle two values ...
157                    else {
158                        int upperMiddleValueIndex = count / 2;
159                        int lowerMiddleValueIndex = upperMiddleValueIndex - 1;
160                        double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue();
161                        double upperValue = this.values.get(upperMiddleValueIndex).doubleValue();
162                        this.medianValue = (lowerValue + upperValue) / 2.0d;
163                    }
164                    this.median = this.math.create(this.medianValue);
165                    this.histogram = null;
166                }
167            } finally {
168                lock.unlock();
169            }
170            return this.medianValue;
171        }
172    
173        /**
174         * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a
175         * lower standard deviation has less variance in the values than a series of values with a higher standard deviation.
176         * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same.
177         */
178        public double getStandardDeviation() {
179            Lock lock = this.getLock().readLock();
180            lock.lock();
181            try {
182                return this.sigma;
183            } finally {
184                lock.unlock();
185            }
186        }
187    
188        /**
189         * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are
190         * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations,
191         * see {@link #getHistogram(int)}.
192         * @return the histogram
193         * @see #getHistogram(int)
194         */
195        public Histogram<T> getHistogram() {
196            return getHistogram(0);
197        }
198    
199        /**
200         * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The
201         * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the
202         * 'numSigmas' should be set to 0. See {@link #getHistogram()}.
203         * <p>
204         * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is
205         * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values
206         * outside of this range are placed in the first and last bucket.
207         * </p>
208         * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the
209         * histogram should be evenly distributed
210         * @return the histogram
211         * @see #getHistogram()
212         */
213        public Histogram<T> getHistogram( int numSigmas ) {
214            Lock lock = this.getLock().writeLock();
215            lock.lock();
216            try {
217                Histogram<T> hist = new Histogram<T>(this.math, this.values);
218                if (numSigmas > 0) {
219                    // The 'getMediaValue()' method will reset the current histogram, so don't set it...
220                    hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas);
221                }
222                this.histogram = hist;
223                return this.histogram;
224            } finally {
225                lock.unlock();
226            }
227        }
228    
229        @Override
230        protected void doReset() {
231            super.doReset();
232            this.medianValue = 0.0d;
233            this.median = this.math.createZeroValue();
234            this.s = 0.0d;
235            this.sigma = 0.0d;
236            this.values.clear();
237        }
238    
239        @Override
240        public String toString() {
241            int count = this.getCount();
242            String samples = Inflector.getInstance().pluralize("sample", count);
243            return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(),
244                                           this.getMaximum());
245        }
246    
247    }