001    /*
002     * JBoss DNA (http://www.jboss.org/dna)
003     * See the COPYRIGHT.txt file distributed with this work for information
004     * regarding copyright ownership.  Some portions may be licensed
005     * to Red Hat, Inc. under one or more contributor license agreements.
006     * See the AUTHORS.txt file in the distribution for a full listing of 
007     * individual contributors. 
008     *
009     * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010     * is licensed to you under the terms of the GNU Lesser General Public License as
011     * published by the Free Software Foundation; either version 2.1 of
012     * the License, or (at your option) any later version.
013     *
014     * JBoss DNA is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017     * Lesser General Public License for more details.
018     *
019     * You should have received a copy of the GNU Lesser General Public
020     * License along with this software; if not, write to the Free
021     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023     */
024    package org.jboss.dna.common.statistic;
025    
026    import java.util.Collections;
027    import java.util.Comparator;
028    import java.util.LinkedList;
029    import java.util.List;
030    import java.util.concurrent.locks.Lock;
031    import net.jcip.annotations.ThreadSafe;
032    import org.jboss.dna.common.math.MathOperations;
033    import org.jboss.dna.common.text.Inflector;
034    import org.jboss.dna.common.util.StringUtil;
035    
036    /**
037     * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the
038     * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)},
039     * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the
040     * {@link #getHistogram() histogram} of the values.
041     * <p>
042     * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff
043     * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the
044     * standard deviation, and S is a variable used in the calculation of sigma:
045     * 
046     * <pre>
047     *   M(1) = x(1)
048     *   S(1) = 0
049     *   M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
050     *   S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k))
051     * </pre>
052     * 
053     * Then, the standard deviation for n values in x is
054     * 
055     * <pre>
056     * sigma = sqrt(S(n) / n)
057     * </pre>
058     * 
059     * </p>
060     * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower)
061     * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated.
062     * This fact should be kept in mind when performing statistics on large numbers of values.
063     * </p>
064     * <p>
065     * This class is threadsafe.
066     * </p>
067     * @param <T> the number type for these statistics
068     */
069    @ThreadSafe
070    public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> {
071    
072        private T median;
073        private Double medianValue;
074        private double s = 0.0d; // used in the calculation of standard deviation (sigma)
075        private double sigma = 0.0d;
076        private final List<T> values = new LinkedList<T>();
077        private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values);
078        private Histogram<T> histogram;
079    
080        public DetailedStatistics( MathOperations<T> operations ) {
081            super(operations);
082            this.medianValue = 0.0d;
083            this.median = this.math.createZeroValue();
084        }
085    
086        /**
087         * Get the values that have been recorded in these statistics. The contents of this list may change if new values are
088         * {@link #add(Number) added} in another thread.
089         * @return the unmodifiable collection of values, in insertion order
090         */
091        public List<T> getValues() {
092            return this.unmodifiableValues;
093        }
094    
095        @Override
096        protected void doAddValue( T value ) {
097            if (value == null) {
098                return;
099            }
100            double previousMean = this.getMeanValue();
101            super.doAddValue(value);
102            this.values.add(value);
103            this.medianValue = null;
104    
105            // Calculate the mean and standard deviation ...
106            int count = getCount();
107            if (count == 1) {
108                this.s = 0.0d;
109                this.sigma = 0.0d;
110            } else {
111                double dValue = value.doubleValue();
112                double dCount = count;
113                // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
114                double meanValue = previousMean + ((dValue - previousMean) / dCount);
115                // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) )
116                this.s = this.s + (dValue - previousMean) * (dValue - meanValue);
117                // sigma = sqrt( S(n) / (n-1) )
118                this.sigma = Math.sqrt(this.s / dCount);
119            }
120        }
121    
122        /**
123         * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if
124         * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }.
125         * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0
126         */
127        public T getMedian() {
128            getMedianValue();
129            return this.median;
130        }
131    
132        /**
133         * Return the median value.
134         * @return the median value, or 0.0 if the {@link #getCount() count} is 0
135         * @see #getMedian()
136         */
137        public double getMedianValue() {
138            Lock lock = this.getLock().writeLock();
139            try {
140                lock.lock();
141                int count = this.values.size();
142                if (count == 0) {
143                    return 0.0d;
144                }
145                if (this.medianValue == null) {
146                    // Sort the values in numerical order..
147                    Comparator<T> comparator = this.math.getComparator();
148                    Collections.sort(this.values, comparator);
149                    this.medianValue = 0.0d;
150                    // If there is only one value, then the median is that value ...
151                    if (count == 1) {
152                        this.medianValue = this.values.get(0).doubleValue();
153                    }
154                    // If there is an odd number of values, find value that is in the middle ..
155                    else if (count % 2 != 0) {
156                        this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue();
157                    }
158                    // Otherwise, there is an even number of values, so find the average of the middle two values ...
159                    else {
160                        int upperMiddleValueIndex = count / 2;
161                        int lowerMiddleValueIndex = upperMiddleValueIndex - 1;
162                        double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue();
163                        double upperValue = this.values.get(upperMiddleValueIndex).doubleValue();
164                        this.medianValue = (lowerValue + upperValue) / 2.0d;
165                    }
166                    this.median = this.math.create(this.medianValue);
167                    this.histogram = null;
168                }
169            } finally {
170                lock.unlock();
171            }
172            return this.medianValue;
173        }
174    
175        /**
176         * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a
177         * lower standard deviation has less variance in the values than a series of values with a higher standard deviation.
178         * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same.
179         */
180        public double getStandardDeviation() {
181            Lock lock = this.getLock().readLock();
182            lock.lock();
183            try {
184                return this.sigma;
185            } finally {
186                lock.unlock();
187            }
188        }
189    
190        /**
191         * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are
192         * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations,
193         * see {@link #getHistogram(int)}.
194         * @return the histogram
195         * @see #getHistogram(int)
196         */
197        public Histogram<T> getHistogram() {
198            return getHistogram(0);
199        }
200    
201        /**
202         * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The
203         * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the
204         * 'numSigmas' should be set to 0. See {@link #getHistogram()}.
205         * <p>
206         * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is
207         * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values
208         * outside of this range are placed in the first and last bucket.
209         * </p>
210         * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the
211         * histogram should be evenly distributed
212         * @return the histogram
213         * @see #getHistogram()
214         */
215        public Histogram<T> getHistogram( int numSigmas ) {
216            Lock lock = this.getLock().writeLock();
217            lock.lock();
218            try {
219                Histogram<T> hist = new Histogram<T>(this.math, this.values);
220                if (numSigmas > 0) {
221                    // The 'getMediaValue()' method will reset the current histogram, so don't set it...
222                    hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas);
223                }
224                this.histogram = hist;
225                return this.histogram;
226            } finally {
227                lock.unlock();
228            }
229        }
230    
231        @Override
232        protected void doReset() {
233            super.doReset();
234            this.medianValue = 0.0d;
235            this.median = this.math.createZeroValue();
236            this.s = 0.0d;
237            this.sigma = 0.0d;
238            this.values.clear();
239        }
240    
241        @Override
242        public String toString() {
243            int count = this.getCount();
244            String samples = Inflector.getInstance().pluralize("sample", count);
245            return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(),
246                                           this.getMaximum());
247        }
248    
249    }