001 /* 002 * JBoss, Home of Professional Open Source. 003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors 004 * as indicated by the @author tags. See the copyright.txt file in the 005 * distribution for a full listing of individual contributors. 006 * 007 * This is free software; you can redistribute it and/or modify it 008 * under the terms of the GNU Lesser General Public License as 009 * published by the Free Software Foundation; either version 2.1 of 010 * the License, or (at your option) any later version. 011 * 012 * This software is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * You should have received a copy of the GNU Lesser General Public 018 * License along with this software; if not, write to the Free 019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 021 */ 022 package org.jboss.dna.common.stats; 023 024 import java.util.Collections; 025 import java.util.Comparator; 026 import java.util.LinkedList; 027 import java.util.List; 028 import java.util.concurrent.locks.Lock; 029 import net.jcip.annotations.ThreadSafe; 030 import org.jboss.dna.common.math.MathOperations; 031 import org.jboss.dna.common.text.Inflector; 032 import org.jboss.dna.common.util.StringUtil; 033 034 /** 035 * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the 036 * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)}, 037 * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the 038 * {@link #getHistogram() histogram} of the values. 039 * <p> 040 * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff 041 * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the 042 * standard deviation, and S is a variable used in the calculation of sigma: 043 * 044 * <pre> 045 * M(1) = x(1) 046 * S(1) = 0 047 * M(k) = M(k-1) + ( x(k) - M(k-1) ) / k 048 * S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k)) 049 * </pre> 050 * 051 * Then, the standard deviation for n values in x is 052 * 053 * <pre> 054 * sigma = sqrt(S(n) / n) 055 * </pre> 056 * 057 * </p> 058 * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower) 059 * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated. 060 * This fact should be kept in mind when performing statistics on large numbers of values. 061 * </p> 062 * <p> 063 * This class is threadsafe. 064 * </p> 065 * @param <T> the number type for these statistics 066 */ 067 @ThreadSafe 068 public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> { 069 070 private T median; 071 private Double medianValue; 072 private double s = 0.0d; // used in the calculation of standard deviation (sigma) 073 private double sigma = 0.0d; 074 private final List<T> values = new LinkedList<T>(); 075 private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values); 076 private Histogram<T> histogram; 077 078 public DetailedStatistics( MathOperations<T> operations ) { 079 super(operations); 080 this.medianValue = 0.0d; 081 this.median = this.math.createZeroValue(); 082 } 083 084 /** 085 * Get the values that have been recorded in these statistics. The contents of this list may change if new values are 086 * {@link #add(Number) added} in another thread. 087 * @return the unmodifiable collection of values, in insertion order 088 */ 089 public List<T> getValues() { 090 return this.unmodifiableValues; 091 } 092 093 @Override 094 protected void doAddValue( T value ) { 095 if (value == null) { 096 return; 097 } 098 double previousMean = this.getMeanValue(); 099 super.doAddValue(value); 100 this.values.add(value); 101 this.medianValue = null; 102 103 // Calculate the mean and standard deviation ... 104 int count = getCount(); 105 if (count == 1) { 106 this.s = 0.0d; 107 this.sigma = 0.0d; 108 } else { 109 double dValue = value.doubleValue(); 110 double dCount = count; 111 // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k 112 double meanValue = previousMean + ((dValue - previousMean) / dCount); 113 // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) ) 114 this.s = this.s + (dValue - previousMean) * (dValue - meanValue); 115 // sigma = sqrt( S(n) / (n-1) ) 116 this.sigma = Math.sqrt(this.s / dCount); 117 } 118 } 119 120 /** 121 * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if 122 * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }. 123 * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0 124 */ 125 public T getMedian() { 126 getMedianValue(); 127 return this.median; 128 } 129 130 /** 131 * Return the median value. 132 * @return the median value, or 0.0 if the {@link #getCount() count} is 0 133 * @see #getMedian() 134 */ 135 public double getMedianValue() { 136 Lock lock = this.getLock().writeLock(); 137 try { 138 lock.lock(); 139 int count = this.values.size(); 140 if (count == 0) { 141 return 0.0d; 142 } 143 if (this.medianValue == null) { 144 // Sort the values in numerical order.. 145 Comparator<T> comparator = this.math.getComparator(); 146 Collections.sort(this.values, comparator); 147 this.medianValue = 0.0d; 148 // If there is only one value, then the median is that value ... 149 if (count == 1) { 150 this.medianValue = this.values.get(0).doubleValue(); 151 } 152 // If there is an odd number of values, find value that is in the middle .. 153 else if (count % 2 != 0) { 154 this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue(); 155 } 156 // Otherwise, there is an even number of values, so find the average of the middle two values ... 157 else { 158 int upperMiddleValueIndex = count / 2; 159 int lowerMiddleValueIndex = upperMiddleValueIndex - 1; 160 double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue(); 161 double upperValue = this.values.get(upperMiddleValueIndex).doubleValue(); 162 this.medianValue = (lowerValue + upperValue) / 2.0d; 163 } 164 this.median = this.math.create(this.medianValue); 165 this.histogram = null; 166 } 167 } finally { 168 lock.unlock(); 169 } 170 return this.medianValue; 171 } 172 173 /** 174 * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a 175 * lower standard deviation has less variance in the values than a series of values with a higher standard deviation. 176 * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same. 177 */ 178 public double getStandardDeviation() { 179 Lock lock = this.getLock().readLock(); 180 lock.lock(); 181 try { 182 return this.sigma; 183 } finally { 184 lock.unlock(); 185 } 186 } 187 188 /** 189 * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are 190 * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations, 191 * see {@link #getHistogram(int)}. 192 * @return the histogram 193 * @see #getHistogram(int) 194 */ 195 public Histogram<T> getHistogram() { 196 return getHistogram(0); 197 } 198 199 /** 200 * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The 201 * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the 202 * 'numSigmas' should be set to 0. See {@link #getHistogram()}. 203 * <p> 204 * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is 205 * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values 206 * outside of this range are placed in the first and last bucket. 207 * </p> 208 * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the 209 * histogram should be evenly distributed 210 * @return the histogram 211 * @see #getHistogram() 212 */ 213 public Histogram<T> getHistogram( int numSigmas ) { 214 Lock lock = this.getLock().writeLock(); 215 lock.lock(); 216 try { 217 Histogram<T> hist = new Histogram<T>(this.math, this.values); 218 if (numSigmas > 0) { 219 // The 'getMediaValue()' method will reset the current histogram, so don't set it... 220 hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas); 221 } 222 this.histogram = hist; 223 return this.histogram; 224 } finally { 225 lock.unlock(); 226 } 227 } 228 229 @Override 230 protected void doReset() { 231 super.doReset(); 232 this.medianValue = 0.0d; 233 this.median = this.math.createZeroValue(); 234 this.s = 0.0d; 235 this.sigma = 0.0d; 236 this.values.clear(); 237 } 238 239 @Override 240 public String toString() { 241 int count = this.getCount(); 242 String samples = Inflector.getInstance().pluralize("sample", count); 243 return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(), 244 this.getMaximum()); 245 } 246 247 }