Raymond | dee0849 | 2015-04-02 10:43:13 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | * contributor license agreements. See the NOTICE file distributed with |
| 4 | * this work for additional information regarding copyright ownership. |
| 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | * (the "License"); you may not use this file except in compliance with |
| 7 | * the License. You may obtain a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | */ |
| 17 | |
| 18 | package org.apache.commons.math.random; |
| 19 | |
| 20 | import java.io.IOException; |
| 21 | import java.io.File; |
| 22 | import java.net.URL; |
| 23 | import java.util.List; |
| 24 | |
| 25 | import org.apache.commons.math.stat.descriptive.StatisticalSummary; |
| 26 | import org.apache.commons.math.stat.descriptive.SummaryStatistics; |
| 27 | |
| 28 | /** |
| 29 | * Represents an <a href="http://random.mat.sbg.ac.at/~ste/dipl/node11.html"> |
| 30 | * empirical probability distribution</a> -- a probability distribution derived |
| 31 | * from observed data without making any assumptions about the functional form |
| 32 | * of the population distribution that the data come from.<p> |
| 33 | * Implementations of this interface maintain data structures, called |
| 34 | * <i>distribution digests</i>, that describe empirical distributions and |
| 35 | * support the following operations: <ul> |
| 36 | * <li>loading the distribution from a file of observed data values</li> |
| 37 | * <li>dividing the input data into "bin ranges" and reporting bin frequency |
| 38 | * counts (data for histogram)</li> |
| 39 | * <li>reporting univariate statistics describing the full set of data values |
| 40 | * as well as the observations within each bin</li> |
| 41 | * <li>generating random values from the distribution</li> |
| 42 | * </ul> |
| 43 | * Applications can use <code>EmpiricalDistribution</code> implementations to |
| 44 | * build grouped frequency histograms representing the input data or to |
| 45 | * generate random values "like" those in the input file -- i.e., the values |
| 46 | * generated will follow the distribution of the values in the file.</p> |
| 47 | * |
| 48 | * @version $Revision: 817128 $ $Date: 2009-09-21 03:30:53 +0200 (lun. 21 sept. 2009) $ |
| 49 | */ |
| 50 | public interface EmpiricalDistribution { |
| 51 | |
| 52 | /** |
| 53 | * Computes the empirical distribution from the provided |
| 54 | * array of numbers. |
| 55 | * |
| 56 | * @param dataArray the data array |
| 57 | */ |
| 58 | void load(double[] dataArray); |
| 59 | |
| 60 | /** |
| 61 | * Computes the empirical distribution from the input file. |
| 62 | * |
| 63 | * @param file the input file |
| 64 | * @throws IOException if an IO error occurs |
| 65 | */ |
| 66 | void load(File file) throws IOException; |
| 67 | |
| 68 | /** |
| 69 | * Computes the empirical distribution using data read from a URL. |
| 70 | * |
| 71 | * @param url url of the input file |
| 72 | * @throws IOException if an IO error occurs |
| 73 | */ |
| 74 | void load(URL url) throws IOException; |
| 75 | |
| 76 | /** |
| 77 | * Generates a random value from this distribution. |
| 78 | * <strong>Preconditions:</strong><ul> |
| 79 | * <li>the distribution must be loaded before invoking this method</li></ul> |
| 80 | * @return the random value. |
| 81 | * |
| 82 | * @throws IllegalStateException if the distribution has not been loaded |
| 83 | */ |
| 84 | double getNextValue() throws IllegalStateException; |
| 85 | |
| 86 | |
| 87 | /** |
| 88 | * Returns a |
| 89 | * {@link org.apache.commons.math.stat.descriptive.StatisticalSummary} |
| 90 | * describing this distribution. |
| 91 | * <strong>Preconditions:</strong><ul> |
| 92 | * <li>the distribution must be loaded before invoking this method</li> |
| 93 | * </ul> |
| 94 | * |
| 95 | * @return the sample statistics |
| 96 | * @throws IllegalStateException if the distribution has not been loaded |
| 97 | */ |
| 98 | StatisticalSummary getSampleStats() throws IllegalStateException; |
| 99 | |
| 100 | /** |
| 101 | * Property indicating whether or not the distribution has been loaded. |
| 102 | * |
| 103 | * @return true if the distribution has been loaded |
| 104 | */ |
| 105 | boolean isLoaded(); |
| 106 | |
| 107 | /** |
| 108 | * Returns the number of bins. |
| 109 | * |
| 110 | * @return the number of bins |
| 111 | */ |
| 112 | int getBinCount(); |
| 113 | |
| 114 | /** |
| 115 | * Returns a list of |
| 116 | * {@link org.apache.commons.math.stat.descriptive.SummaryStatistics} |
| 117 | * containing statistics describing the values in each of the bins. The |
| 118 | * List is indexed on the bin number. |
| 119 | * |
| 120 | * @return List of bin statistics |
| 121 | */ |
| 122 | List<SummaryStatistics> getBinStats(); |
| 123 | |
| 124 | /** |
| 125 | * Returns the array of upper bounds for the bins. Bins are: <br/> |
| 126 | * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],..., |
| 127 | * (upperBounds[binCount-2], upperBounds[binCount-1] = max]. |
| 128 | * |
| 129 | * @return array of bin upper bounds |
| 130 | */ |
| 131 | double[] getUpperBounds(); |
| 132 | |
| 133 | } |