IntegerDistribution.java

/*
 * Copyright 2013 University of Glasgow.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package broadwick.statistics.distributions;

import broadwick.rng.RNG;
import java.io.Serializable;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListMap;
import lombok.Synchronized;

/**
 * A simple class for defining custom distributions. This is, in effect, a histogram.
 */
public class IntegerDistribution implements Serializable {

    /**
     * Create an empty distribution.
     */
    public IntegerDistribution() {
        bins = new ConcurrentSkipListMap<>();
        this.init(0);
    }

    /**
     * Create the distribution with a specified number of bins.
     * @param nbins the number of bins
     */
    public IntegerDistribution(final int nbins) {
        bins = new ConcurrentSkipListMap<>();
        this.init(nbins);
    }

    /**
     * Add a number of entries (keys) to the histogram (with zero value).
     * @param nbins the number of bins to add.
     */
    @Synchronized
    private void init(final int nbins) {
        for (int i = 1; i <= nbins; i++) {
            bins.put(Integer.valueOf(i), Integer.valueOf(0));
        }
    }

    /**
     * Clear all the data from the distribution, after this method the distribution has no denominator or frequency.
     */
    @Synchronized
    public void clear() {
        bins.clear();
    }

    /**
     * Resets the value in each bin.
     * @param val the reset value.
     */
    @Synchronized
    public final void reset(final int val) {
        for (int i : getBins()) {
            bins.put(Integer.valueOf(i), val);
        }
    }

    /**
     * Resets the value in each bin.
     */
    @Synchronized
    public final void reset() {
        for (int i : getBins()) {
            bins.put(Integer.valueOf(i), 0);
        }
    }

    /**
     * Adds the content of the argument to the current object.
     * @param hist the histogram data to add.
     */
    @Synchronized
    public final void add(final IntegerDistribution hist) {
        for (Integer i : hist.getBins()) {
            if (bins.containsKey(i)) {
                bins.put(i, this.getFrequency(i) + hist.getFrequency(i));
            } else {
                bins.put(i, hist.getFrequency(i));
            }
        }
    }

    /**
     * Get the number of bins.
     * @return the number of bins.
     */
    @Synchronized
    public final int getNumBins() {
        return bins.size();
    }

    /**
     * Increment the size of a bin.
     * @param bin the bin to increment.
     */
    @Synchronized
    public final void setFrequency(final Integer bin) {
        Integer value = 1;
        if (bins.get(bin) != null) {
            value = bins.get(bin) + 1;
        }
        bins.put(bin, value);
    }

    /**
     * Set the value of a bin.
     * @param bin  the bin.
     * @param data the data.
     */
    @Synchronized
    public final void setFrequency(final Integer bin, final Integer data) {
        bins.put(bin, data);
    }

    /**
     * Get the size of the histogram at the given bin.
     * @param bin the bin.
     * @return the size of the bin, or null if the bin is not in the histogram.
     */
    @Synchronized
    public final Integer getFrequency(final Integer bin) {
        return bins.get(bin);
    }

    /**
     * Increment the size of a bin.
     * @param bin the bin to increment.
     * @deprecated use getFrequency() instead
     */
    @Synchronized
    public final void setData(final Integer bin) {
        this.setFrequency(bin);
    }

    /**
     * Set the value of a bin.
     * @param bin  the bin.
     * @param data the data.
     * @deprecated use setFrequency() instead
     */
    @Synchronized
    public final void setData(final Integer bin, final Integer data) {
        this.setFrequency(bin, data);
    }

    /**
     * Get the size of the histogram at the given bin.
     * @param bin the bin.
     * @deprecated use getFrequency() instead
     * @return the size of the bin, or null if the bin is not in the histogram.
     */
    @Synchronized
    public final Integer getData(final Integer bin) {
        return getFrequency(bin);
    }

    /**
     * Select a random bin from the cumulative distribution of the histograms contents.
     * @return the index of the random bin, or zero if no bin could be selected.
     */
    @Synchronized
    public final Integer getRandomBin() {
        final int cumulativeSum = getSumCounts();
        if (cumulativeSum == 0) {
            throw new IllegalArgumentException("Cannot select random bin: there are no bins!");
        }
        final int randomBin = GENERATOR.getInteger(0, cumulativeSum);

        final Integer[] arr = bins.keySet().toArray(new Integer[bins.size()]);
        int index = 0;
        int sum = 0;
        for (int value : bins.values()) {
            sum += value;
            if (sum >= randomBin) {
                return arr[index];
            }
            index++;
        }
        return arr[index - 1];
    }

    /**
     * Select a random bin from the cumulative distribution of the frequencies and return the frequency.
     * @return the contents of the randomly selected bin.
     */
    @Synchronized
    public final Integer getRandomBinFrequency() {
        final int cumulativeSum = getSumCounts();
        final int randomBin = GENERATOR.getInteger(0, cumulativeSum);

        int sum = 0;
        for (int value : bins.values()) {
            sum += value;
            if (sum >= randomBin) {
                return value;
            }
        }

        // return the contents of the last bin.
        return bins.keySet().toArray(new Integer[bins.size()])[bins.size() - 1];
    }

    /**
     * Creates a copy of this histogram and returns it. The returned IntegerDistribution is a completely different
     * object.
     * @return a copy of this histogram.
     */
    @Synchronized
    public final IntegerDistribution copy() {
        final IntegerDistribution clone = new IntegerDistribution(bins.size());

        for (Integer bin : this.getBins()) {
            clone.setFrequency(bin, bins.get(bin));
        }

        return clone;
    }

    /**
     * Get the sum of the counts in the histogram.
     * @return the sum of all the bins in the histogram.
     */
    @Synchronized
    public final Integer getSumCounts() {
        int sum = 0;
        for (int value : bins.values()) {
            sum += value;
        }
        return sum;
    }

    /**
     * Get the bins in the histogram. The bins are the keySet of the underlying map object.
     * @return the number of bins.
     */
    @Synchronized
    public final Collection<Integer> getBins() {
        return bins.keySet();
    }

    /**
     * Get a collection of the values held in the histogram. The bins are the keySet of the underlying map object, the
     * values are the contents if the bin.
     * @return a collection of the values held in the histogram.
     */
    @Synchronized
    public final Collection<Integer> getBinContents() {
        return bins.values();
    }

    /**
     * Get the size (the number of bins) in the histogram.
     * @return the size
     */
    @Synchronized
    public final int size() {
        return bins.size();
    }

    /**
     * Get an array of the bin values.
     * @return an array of int values.
     */
    @Synchronized
    public final synchronized int[] toArray() {
        int[] arr = new int[size()];

        int i = 0;
        final Iterator<Integer> it = bins.values().iterator();
        while (it.hasNext()) {
            arr[i] = it.next();
            i++;
        }

        return arr;
    }

    /**
     * Get an array of the bin values.
     * @return an array of long values.
     */
    @Synchronized
    public final synchronized long[] toLongArray() {
        long[] arr = new long[size()];

        int i = 0;
        final Iterator<Integer> it = bins.values().iterator();
        while (it.hasNext()) {
            arr[i] = (long) it.next();
            i++;
        }

        return arr;
    }

    /**
     * Scale the values in the histogram by a given factor. A copy of the histogram is returned, the original is not
     * modified.
     * @param factor the value by which every value in the hiistogram will be multiplied.
     * @return the scaled histogram.
     */
    public final IntegerDistribution scaleBins(final double factor) {
        final IntegerDistribution data = this.copy();

        for (Integer bin : data.getBins()) {
            final double d = data.getFrequency(bin) * factor;
            data.setFrequency(bin, (int) d);
        }

        return data;
    }

    /**
     * Normalise the frequency distribution. The returned histogram is a scaled copy of the input histogram so that the
     * sum of the returned histogram is equal to the normalising factor.
     * @param constant the normalising constant
     * @return the scaled and normalised histogram.
     */
    public final IntegerDistribution normaliseBins(final int constant) {
        final IntegerDistribution data = this.copy();
        final IntegerDistribution normalisedBins = new IntegerDistribution(this.getNumBins());
        final Map<Integer, Double> fractions = new HashMap<>(this.getNumBins());
        final double factor = constant / Double.valueOf(this.getSumCounts());

        for (Integer bin : data.getBins()) {
            final double d = data.getFrequency(bin) * factor;
            final int size = (int) Math.floor(d);
            fractions.put(bin, d - size);
            normalisedBins.setFrequency(bin, size);
        }

        // Now normalisedBins.getSumCounts() < data.getSumCounts()/runs so we need to increment the
        // bins with the (data.getSumCounts()/runs - normalisedBins.getSumCounts()) largest fractions.
        int diff = constant - normalisedBins.getSumCounts();
        while (diff > 0) {
            int binMax = -1;
            double max = 0.0;

            for (Entry<Integer, Double> entry : fractions.entrySet()) {
                if (entry.getValue() > max) {
                    max = entry.getValue();
                    binMax = entry.getKey();
                }
            }
            if (binMax > 0) {
                normalisedBins.setFrequency(binMax);
                fractions.put(binMax, 0.0);
            }
            diff--;
        }

        return normalisedBins;
    }

    /**
     * Get a string representation of the histogram.
     * @return the string.
     */
    @Override
    @Synchronized
    public final String toString() {
        final StringBuilder str = new StringBuilder(10);
        for (Map.Entry<Integer, Integer> entry : bins.entrySet()) {
            str.append(entry.getKey()).append(":").append(entry.getValue()).append("\n");
        }
        return str.toString();
    }

    /**
     * Get a string representation of the histogram in a csv format.
     * @return the string.
     */
    @Synchronized
    public final String toCsv() {
        final StringBuilder str = new StringBuilder(10);
        for (Map.Entry<Integer, Integer> entry : bins.entrySet()) {
            str.append(entry.getValue()).append(",");
        }
        if (str.length() > 0) {
            str.deleteCharAt(str.length() - 1);
        }
        return str.toString();
    }
    private ConcurrentMap<Integer, Integer> bins;
    private static final RNG GENERATOR = new RNG(RNG.Generator.Well19937c);
}