1 /*******************************************************************************
\r
2 * Copyright (c) 2007 VTT Technical Research Centre of Finland and others.
\r
3 * All rights reserved. This program and the accompanying materials
\r
4 * are made available under the terms of the Eclipse Public License v1.0
\r
5 * which accompanies this distribution, and is available at
\r
6 * http://www.eclipse.org/legal/epl-v10.html
\r
9 * VTT Technical Research Centre of Finland - initial API and implementation
\r
10 *******************************************************************************/
\r
11 package org.simantics.history.util;
\r
13 import java.util.Formatter;
\r
14 import java.util.Map;
\r
15 import java.util.Map.Entry;
\r
16 import java.util.TreeMap;
\r
18 import org.simantics.databoard.primitives.MutableInteger;
\r
21 * This class gives a rough distribution classification for double values.
\r
23 * It calculates the frequency of values in each class. All values are
\r
24 * accounted. There is a value between two samples.
\r
25 * n(sample intervals) = n(samples) - 1 <p>
\r
27 * Classes are derieved using logarithmic scale.
\r
28 * Each class accounts all values within a range [ base ^ class .. base ^ (class-1) ).
\r
30 * Example for classes of base number 2.0
\r
33 * -10 [ 0,98ms .. 1,95ms )
\r
34 * -9 [ 1,95ms .. 3,91ms )
\r
35 * -8 [ 3,91ms .. 7,81ms )
\r
36 * -7 [ 7,81ms .. 15,63ms )
\r
37 * -6 [ 15,63ms .. 31,25ms )
\r
38 * -5 [ 31,25ms .. 62,50ms )
\r
39 * -4 [ 62,50ms .. 125,00ms )
\r
40 * -3 [ 125,00ms .. 250,00ms )
\r
41 * -2 [ 250,00ms .. 500,00ms )
\r
42 * -1 [ 500,00ms .. 1 000,00ms )
\r
43 * 0 [ 1 000,00ms .. 2 000,00ms )
\r
44 * +1 [ 2 000,00ms .. 4 000,00ms )
\r
45 * +2 [ 4 000,00ms .. 8 000,00ms )
\r
46 * +3 [ 8 000,00ms .. 16 000,00ms )
\r
47 * +4 [ 16 000,00ms .. 32 000,00ms )
\r
48 * +5 [ 32 000,00ms .. 64 000,00ms )
\r
49 * +6 [ 64 000,00ms .. 128 000,00ms )
\r
50 * +7 [ 128 000,00ms .. 256 000,00ms )
\r
51 * +8 [ 256 000,00ms .. 512 000,00ms )
\r
52 * +9 [ 512 000,00ms .. 1 024 000,00ms )
\r
53 * +10 [ 1 024 000,00ms .. 2 048 000,00ms )
\r
55 * @author Toni Kalajainen <toni.kalajainen@vtt.fi>
\r
57 public class ClassDistribution {
\r
59 // Optimization that tries to cirumvent slow Math.log
\r
60 // previously entered value and its class is remembered
\r
61 private transient double lastEnteredValue;
\r
62 private transient int lastClass;
\r
65 private transient double log_base;
\r
68 TreeMap<Integer, MutableInteger> distribution;
\r
70 public ClassDistribution() {
\r
74 public ClassDistribution(double base) {
\r
76 log_base = Math.log( base );
\r
77 lastEnteredValue = 0.001;
\r
78 lastClass = (int) Math.floor( Math.log( 0.001 ) / log_base );
\r
79 distribution = new TreeMap<Integer, MutableInteger>();
\r
82 public ClassDistribution(double base, TreeMap<Integer, MutableInteger> initialBreakdown) {
\r
84 log_base = Math.log( base );
\r
85 lastEnteredValue = 0.001;
\r
86 lastClass = (int) Math.floor( Math.log( 0.001 ) / log_base );
\r
87 distribution = new TreeMap<Integer, MutableInteger>( initialBreakdown );
\r
90 public double getBase() {
\r
94 public void setBase(double base) {
\r
96 log_base = Math.log( base );
\r
99 public double getSmallest() {
\r
100 if (distribution.isEmpty()) return 1.0;
\r
101 int clazz = distribution.firstKey();
\r
102 return getClassAvg(clazz);
\r
110 public double getMedian() {
\r
113 for (MutableInteger v : distribution.values()) n += v.value;
\r
115 double median = (double) n/2;
\r
117 for (Entry<Integer, MutableInteger> e : distribution.entrySet()) {
\r
118 sum += e.getValue().value;
\r
119 if (sum==median) {
\r
120 int c = e.getKey();
\r
121 Integer nextC = distribution.higherKey( c );
\r
123 return getClassMax( c );
\r
125 return ( getClassMax( c ) + getClassMin( nextC ) ) /2;
\r
128 int c = e.getKey();
\r
129 return getClassAvg( c );
\r
136 * Get an index to the class with highest frequency.
\r
138 * @return interval class or 0 if there are no samples
\r
140 public int getLargestClassIndex() {
\r
143 for (Entry<Integer, MutableInteger> e : distribution.entrySet()) {
\r
144 if (e.getValue().value > nResult) {
\r
145 nResult = e.getValue().value;
\r
146 result = e.getKey();
\r
153 * Write distribution with user given map.
\r
155 * @param result map where breakdown is written to
\r
157 public void getDistribution(Map<Integer, Integer> result) {
\r
159 for (Entry<Integer, MutableInteger> e : distribution.entrySet()) {
\r
160 result.put( e.getKey(), Integer.valueOf(e.getValue().value));
\r
164 public void setDistribution(Map<Integer, Integer> map) {
\r
165 this.distribution.clear();
\r
166 for (Entry<Integer, Integer> e : map.entrySet()) {
\r
167 distribution.put( e.getKey(), new MutableInteger(e.getValue()) );
\r
172 * Create a snapshot copy of the distribution.
\r
174 * @return a histogram
\r
176 public TreeMap<Integer, Integer> getDistribution() {
\r
177 TreeMap<Integer, Integer> result = new TreeMap<Integer, Integer>();
\r
178 getDistribution(result);
\r
184 * Add new value to the distribution.
\r
188 public void addValue(double value) {
\r
189 Integer k = Integer.valueOf( getClassIndex(value) );
\r
190 MutableInteger r = distribution.get(k);
\r
192 r = new MutableInteger();
\r
193 distribution.put(k, r);
\r
199 * Get lowest value of a class
\r
201 * @param intervalClass
\r
202 * @return min value
\r
204 public double getClassMin(int intervalClass) {
\r
205 return Math.pow(base, intervalClass);
\r
209 * Get highest value of a class
\r
211 * @param intervalClass
\r
212 * @return max value
\r
214 public double getClassMax(int intervalClass) {
\r
215 return Math.pow(base, intervalClass+1);
\r
219 * Get average value of a class
\r
221 * @param intervalClass
\r
222 * @return average interval
\r
224 public double getClassAvg(int intervalClass) {
\r
225 double min = getClassMin(intervalClass);
\r
226 double max = getClassMax(intervalClass);
\r
227 return (max-min)/2+min;
\r
231 * Get class index for a value
\r
233 * @return class index
\r
235 public int getClassIndex(double interval) {
\r
236 if (interval == lastEnteredValue) return lastClass;
\r
237 lastClass = (int) Math.floor( Math.log(interval) / log_base );
\r
238 lastEnteredValue = interval;
\r
243 public String toString() {
\r
244 StringBuilder sb = new StringBuilder();
\r
245 Formatter f = new Formatter(sb);
\r
246 sb.append("Index Range Count\n");
\r
247 for (Entry<Integer, MutableInteger> e : distribution.entrySet()) {
\r
248 int ic = e.getKey();
\r
249 double start = getClassMin(ic);
\r
250 double end = getClassMax(ic);
\r
251 double avg = getClassAvg(ic);
\r
252 int count = e.getValue().value;
\r
258 format = " %+3d [ %(,8fm .. %(,8fm ) = %d, avg = %(,8fm\n";
\r
264 format = " %+3d [ %(,8.2fm .. %(,8.2fm ) = %d, avg = %(,8.2fm\n";
\r
266 format = " %+3d [ %(9.0f .. %(9.0f ) = %d, avg = %(8.1f\n";
\r
268 f.format(format , ic, start, end, count, avg);
\r
270 return sb.toString();
\r