/*******************************************************************************
* Copyright (c) 2007 VTT Technical Research Centre of Finland and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* VTT Technical Research Centre of Finland - initial API and implementation
*******************************************************************************/
package org.simantics.history.util;
import java.util.Formatter;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.simantics.databoard.primitives.MutableInteger;
/**
* This class gives a rough distribution classification for double values.
*
* It calculates the frequency of values in each class. All values are
* accounted. There is a value between two samples.
* n(sample intervals) = n(samples) - 1
*
* Classes are derieved using logarithmic scale.
* Each class accounts all values within a range [ base ^ class .. base ^ (class-1) ).
*
* Example for classes of base number 2.0
*
* Class Ranges
* -10 [ 0,98ms .. 1,95ms )
* -9 [ 1,95ms .. 3,91ms )
* -8 [ 3,91ms .. 7,81ms )
* -7 [ 7,81ms .. 15,63ms )
* -6 [ 15,63ms .. 31,25ms )
* -5 [ 31,25ms .. 62,50ms )
* -4 [ 62,50ms .. 125,00ms )
* -3 [ 125,00ms .. 250,00ms )
* -2 [ 250,00ms .. 500,00ms )
* -1 [ 500,00ms .. 1 000,00ms )
* 0 [ 1 000,00ms .. 2 000,00ms )
* +1 [ 2 000,00ms .. 4 000,00ms )
* +2 [ 4 000,00ms .. 8 000,00ms )
* +3 [ 8 000,00ms .. 16 000,00ms )
* +4 [ 16 000,00ms .. 32 000,00ms )
* +5 [ 32 000,00ms .. 64 000,00ms )
* +6 [ 64 000,00ms .. 128 000,00ms )
* +7 [ 128 000,00ms .. 256 000,00ms )
* +8 [ 256 000,00ms .. 512 000,00ms )
* +9 [ 512 000,00ms .. 1 024 000,00ms )
* +10 [ 1 024 000,00ms .. 2 048 000,00ms )
*
* @author Toni Kalajainen
*/
public class ClassDistribution {
// Optimization that tries to cirumvent slow Math.log
// previously entered value and its class is remembered
private transient double lastEnteredValue;
private transient int lastClass;
double base;
private transient double log_base;
/** Distribution */
TreeMap distribution;
public ClassDistribution() {
this( 2.0 );
}
public ClassDistribution(double base) {
this.base = base;
log_base = Math.log( base );
lastEnteredValue = 0.001;
lastClass = (int) Math.floor( Math.log( 0.001 ) / log_base );
distribution = new TreeMap();
}
public ClassDistribution(double base, TreeMap initialBreakdown) {
this.base = base;
log_base = Math.log( base );
lastEnteredValue = 0.001;
lastClass = (int) Math.floor( Math.log( 0.001 ) / log_base );
distribution = new TreeMap( initialBreakdown );
}
public double getBase() {
return base;
}
public void setBase(double base) {
this.base = base;
log_base = Math.log( base );
}
public double getSmallest() {
if (distribution.isEmpty()) return 1.0;
int clazz = distribution.firstKey();
return getClassAvg(clazz);
}
/**
* Get median value
*
* @return
*/
public double getMedian() {
// Count total
int n = 0;
for (MutableInteger v : distribution.values()) n += v.value;
double median = (double) n/2;
double sum = 0;
for (Entry e : distribution.entrySet()) {
sum += e.getValue().value;
if (sum==median) {
int c = e.getKey();
Integer nextC = distribution.higherKey( c );
if (nextC == null)
return getClassMax( c );
return ( getClassMax( c ) + getClassMin( nextC ) ) /2;
}
if (sum>median) {
int c = e.getKey();
return getClassAvg( c );
}
}
return 0;
}
/**
* Get an index to the class with highest frequency.
*
* @return interval class or 0 if there are no samples
*/
public int getLargestClassIndex() {
int result = 0;
int nResult = -1;
for (Entry e : distribution.entrySet()) {
if (e.getValue().value > nResult) {
nResult = e.getValue().value;
result = e.getKey();
}
}
return result;
}
/**
* Write distribution with user given map.
*
* @param result map where breakdown is written to
*/
public void getDistribution(Map result) {
result.clear();
for (Entry e : distribution.entrySet()) {
result.put( e.getKey(), Integer.valueOf(e.getValue().value));
}
}
public void setDistribution(Map map) {
this.distribution.clear();
for (Entry e : map.entrySet()) {
distribution.put( e.getKey(), new MutableInteger(e.getValue()) );
}
}
/**
* Create a snapshot copy of the distribution.
*
* @return a histogram
*/
public TreeMap getDistribution() {
TreeMap result = new TreeMap();
getDistribution(result);
return result;
}
/**
* Add new value to the distribution.
*
* @param value
*/
public void addValue(double value) {
Integer k = Integer.valueOf( getClassIndex(value) );
MutableInteger r = distribution.get(k);
if ( r == null ) {
r = new MutableInteger();
distribution.put(k, r);
}
r.value ++;
}
/**
* Get lowest value of a class
*
* @param intervalClass
* @return min value
*/
public double getClassMin(int intervalClass) {
return Math.pow(base, intervalClass);
}
/**
* Get highest value of a class
*
* @param intervalClass
* @return max value
*/
public double getClassMax(int intervalClass) {
return Math.pow(base, intervalClass+1);
}
/**
* Get average value of a class
*
* @param intervalClass
* @return average interval
*/
public double getClassAvg(int intervalClass) {
double min = getClassMin(intervalClass);
double max = getClassMax(intervalClass);
return (max-min)/2+min;
}
/**
* Get class index for a value
* @param interval
* @return class index
*/
public int getClassIndex(double interval) {
if (interval == lastEnteredValue) return lastClass;
lastClass = (int) Math.floor( Math.log(interval) / log_base );
lastEnteredValue = interval;
return lastClass;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Formatter f = new Formatter(sb);
sb.append("Index Range Count\n");
for (Entry e : distribution.entrySet()) {
int ic = e.getKey();
double start = getClassMin(ic);
double end = getClassMax(ic);
double avg = getClassAvg(ic);
int count = e.getValue().value;
String format;
if (start<0.001) {
start *= 1000.0;
end *= 1000.0;
avg *= 1000.0;
format = " %+3d [ %(,8fm .. %(,8fm ) = %d, avg = %(,8fm\n";
} else
if (start<1) {
start *= 1000.0;
end *= 1000.0;
avg *= 1000.0;
format = " %+3d [ %(,8.2fm .. %(,8.2fm ) = %d, avg = %(,8.2fm\n";
} else {
format = " %+3d [ %(9.0f .. %(9.0f ) = %d, avg = %(8.1f\n";
}
f.format(format , ic, start, end, count, avg);
}
return sb.toString();
}
}