001 /* 002 * JBoss DNA (http://www.jboss.org/dna) 003 * See the COPYRIGHT.txt file distributed with this work for information 004 * regarding copyright ownership. Some portions may be licensed 005 * to Red Hat, Inc. under one or more contributor license agreements. 006 * See the AUTHORS.txt file in the distribution for a full listing of 007 * individual contributors. 008 * 009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA 010 * is licensed to you under the terms of the GNU Lesser General Public License as 011 * published by the Free Software Foundation; either version 2.1 of 012 * the License, or (at your option) any later version. 013 * 014 * JBoss DNA is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 017 * Lesser General Public License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this software; if not, write to the Free 021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 023 */ 024 package org.jboss.dna.common.statistic; 025 026 import java.util.Collections; 027 import java.util.Comparator; 028 import java.util.LinkedList; 029 import java.util.List; 030 import java.util.concurrent.locks.Lock; 031 import net.jcip.annotations.ThreadSafe; 032 import org.jboss.dna.common.math.MathOperations; 033 import org.jboss.dna.common.text.Inflector; 034 import org.jboss.dna.common.util.StringUtil; 035 036 /** 037 * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the 038 * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)}, 039 * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the 040 * {@link #getHistogram() histogram} of the values. 041 * <p> 042 * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff 043 * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the 044 * standard deviation, and S is a variable used in the calculation of sigma: 045 * 046 * <pre> 047 * M(1) = x(1) 048 * S(1) = 0 049 * M(k) = M(k-1) + ( x(k) - M(k-1) ) / k 050 * S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k)) 051 * </pre> 052 * 053 * Then, the standard deviation for n values in x is 054 * 055 * <pre> 056 * sigma = sqrt(S(n) / n) 057 * </pre> 058 * 059 * </p> 060 * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower) 061 * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated. 062 * This fact should be kept in mind when performing statistics on large numbers of values. 063 * </p> 064 * <p> 065 * This class is threadsafe. 066 * </p> 067 * @param <T> the number type for these statistics 068 */ 069 @ThreadSafe 070 public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> { 071 072 private T median; 073 private Double medianValue; 074 private double s = 0.0d; // used in the calculation of standard deviation (sigma) 075 private double sigma = 0.0d; 076 private final List<T> values = new LinkedList<T>(); 077 private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values); 078 private Histogram<T> histogram; 079 080 public DetailedStatistics( MathOperations<T> operations ) { 081 super(operations); 082 this.medianValue = 0.0d; 083 this.median = this.math.createZeroValue(); 084 } 085 086 /** 087 * Get the values that have been recorded in these statistics. The contents of this list may change if new values are 088 * {@link #add(Number) added} in another thread. 089 * @return the unmodifiable collection of values, in insertion order 090 */ 091 public List<T> getValues() { 092 return this.unmodifiableValues; 093 } 094 095 @Override 096 protected void doAddValue( T value ) { 097 if (value == null) { 098 return; 099 } 100 double previousMean = this.getMeanValue(); 101 super.doAddValue(value); 102 this.values.add(value); 103 this.medianValue = null; 104 105 // Calculate the mean and standard deviation ... 106 int count = getCount(); 107 if (count == 1) { 108 this.s = 0.0d; 109 this.sigma = 0.0d; 110 } else { 111 double dValue = value.doubleValue(); 112 double dCount = count; 113 // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k 114 double meanValue = previousMean + ((dValue - previousMean) / dCount); 115 // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) ) 116 this.s = this.s + (dValue - previousMean) * (dValue - meanValue); 117 // sigma = sqrt( S(n) / (n-1) ) 118 this.sigma = Math.sqrt(this.s / dCount); 119 } 120 } 121 122 /** 123 * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if 124 * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }. 125 * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0 126 */ 127 public T getMedian() { 128 getMedianValue(); 129 return this.median; 130 } 131 132 /** 133 * Return the median value. 134 * @return the median value, or 0.0 if the {@link #getCount() count} is 0 135 * @see #getMedian() 136 */ 137 public double getMedianValue() { 138 Lock lock = this.getLock().writeLock(); 139 try { 140 lock.lock(); 141 int count = this.values.size(); 142 if (count == 0) { 143 return 0.0d; 144 } 145 if (this.medianValue == null) { 146 // Sort the values in numerical order.. 147 Comparator<T> comparator = this.math.getComparator(); 148 Collections.sort(this.values, comparator); 149 this.medianValue = 0.0d; 150 // If there is only one value, then the median is that value ... 151 if (count == 1) { 152 this.medianValue = this.values.get(0).doubleValue(); 153 } 154 // If there is an odd number of values, find value that is in the middle .. 155 else if (count % 2 != 0) { 156 this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue(); 157 } 158 // Otherwise, there is an even number of values, so find the average of the middle two values ... 159 else { 160 int upperMiddleValueIndex = count / 2; 161 int lowerMiddleValueIndex = upperMiddleValueIndex - 1; 162 double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue(); 163 double upperValue = this.values.get(upperMiddleValueIndex).doubleValue(); 164 this.medianValue = (lowerValue + upperValue) / 2.0d; 165 } 166 this.median = this.math.create(this.medianValue); 167 this.histogram = null; 168 } 169 } finally { 170 lock.unlock(); 171 } 172 return this.medianValue; 173 } 174 175 /** 176 * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a 177 * lower standard deviation has less variance in the values than a series of values with a higher standard deviation. 178 * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same. 179 */ 180 public double getStandardDeviation() { 181 Lock lock = this.getLock().readLock(); 182 lock.lock(); 183 try { 184 return this.sigma; 185 } finally { 186 lock.unlock(); 187 } 188 } 189 190 /** 191 * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are 192 * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations, 193 * see {@link #getHistogram(int)}. 194 * @return the histogram 195 * @see #getHistogram(int) 196 */ 197 public Histogram<T> getHistogram() { 198 return getHistogram(0); 199 } 200 201 /** 202 * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The 203 * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the 204 * 'numSigmas' should be set to 0. See {@link #getHistogram()}. 205 * <p> 206 * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is 207 * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values 208 * outside of this range are placed in the first and last bucket. 209 * </p> 210 * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the 211 * histogram should be evenly distributed 212 * @return the histogram 213 * @see #getHistogram() 214 */ 215 public Histogram<T> getHistogram( int numSigmas ) { 216 Lock lock = this.getLock().writeLock(); 217 lock.lock(); 218 try { 219 Histogram<T> hist = new Histogram<T>(this.math, this.values); 220 if (numSigmas > 0) { 221 // The 'getMediaValue()' method will reset the current histogram, so don't set it... 222 hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas); 223 } 224 this.histogram = hist; 225 return this.histogram; 226 } finally { 227 lock.unlock(); 228 } 229 } 230 231 @Override 232 protected void doReset() { 233 super.doReset(); 234 this.medianValue = 0.0d; 235 this.median = this.math.createZeroValue(); 236 this.s = 0.0d; 237 this.sigma = 0.0d; 238 this.values.clear(); 239 } 240 241 @Override 242 public String toString() { 243 int count = this.getCount(); 244 String samples = Inflector.getInstance().pluralize("sample", count); 245 return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(), 246 this.getMaximum()); 247 } 248 249 }