blob: f9dff2d6a649ab1a85a9046e08218c35f7122132 [file] [log] [blame]
Raymonddee08492015-04-02 10:43:13 -07001/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package org.apache.commons.math.distribution;
19
20import java.io.Serializable;
21
22import org.apache.commons.math.MathRuntimeException;
23import org.apache.commons.math.exception.util.LocalizedFormats;
24import org.apache.commons.math.util.MathUtils;
25import org.apache.commons.math.util.FastMath;
26
27/**
28 * The default implementation of {@link HypergeometricDistribution}.
29 *
30 * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $
31 */
32public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
33 implements HypergeometricDistribution, Serializable {
34
35 /** Serializable version identifier */
36 private static final long serialVersionUID = -436928820673516179L;
37
38 /** The number of successes in the population. */
39 private int numberOfSuccesses;
40
41 /** The population size. */
42 private int populationSize;
43
44 /** The sample size. */
45 private int sampleSize;
46
47 /**
48 * Construct a new hypergeometric distribution with the given the population
49 * size, the number of successes in the population, and the sample size.
50 *
51 * @param populationSize the population size.
52 * @param numberOfSuccesses number of successes in the population.
53 * @param sampleSize the sample size.
54 */
55 public HypergeometricDistributionImpl(int populationSize,
56 int numberOfSuccesses, int sampleSize) {
57 super();
58 if (numberOfSuccesses > populationSize) {
59 throw MathRuntimeException
60 .createIllegalArgumentException(
61 LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
62 numberOfSuccesses, populationSize);
63 }
64 if (sampleSize > populationSize) {
65 throw MathRuntimeException
66 .createIllegalArgumentException(
67 LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
68 sampleSize, populationSize);
69 }
70
71 setPopulationSizeInternal(populationSize);
72 setSampleSizeInternal(sampleSize);
73 setNumberOfSuccessesInternal(numberOfSuccesses);
74 }
75
76 /**
77 * For this distribution, X, this method returns P(X ≤ x).
78 *
79 * @param x the value at which the PDF is evaluated.
80 * @return PDF for this distribution.
81 */
82 @Override
83 public double cumulativeProbability(int x) {
84 double ret;
85
86 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
87 if (x < domain[0]) {
88 ret = 0.0;
89 } else if (x >= domain[1]) {
90 ret = 1.0;
91 } else {
92 ret = innerCumulativeProbability(domain[0], x, 1, populationSize,
93 numberOfSuccesses, sampleSize);
94 }
95
96 return ret;
97 }
98
99 /**
100 * Return the domain for the given hypergeometric distribution parameters.
101 *
102 * @param n the population size.
103 * @param m number of successes in the population.
104 * @param k the sample size.
105 * @return a two element array containing the lower and upper bounds of the
106 * hypergeometric distribution.
107 */
108 private int[] getDomain(int n, int m, int k) {
109 return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
110 }
111
112 /**
113 * Access the domain value lower bound, based on <code>p</code>, used to
114 * bracket a PDF root.
115 *
116 * @param p the desired probability for the critical value
117 * @return domain value lower bound, i.e. P(X &lt; <i>lower bound</i>) &lt;
118 * <code>p</code>
119 */
120 @Override
121 protected int getDomainLowerBound(double p) {
122 return getLowerDomain(populationSize, numberOfSuccesses, sampleSize);
123 }
124
125 /**
126 * Access the domain value upper bound, based on <code>p</code>, used to
127 * bracket a PDF root.
128 *
129 * @param p the desired probability for the critical value
130 * @return domain value upper bound, i.e. P(X &lt; <i>upper bound</i>) &gt;
131 * <code>p</code>
132 */
133 @Override
134 protected int getDomainUpperBound(double p) {
135 return getUpperDomain(sampleSize, numberOfSuccesses);
136 }
137
138 /**
139 * Return the lowest domain value for the given hypergeometric distribution
140 * parameters.
141 *
142 * @param n the population size.
143 * @param m number of successes in the population.
144 * @param k the sample size.
145 * @return the lowest domain value of the hypergeometric distribution.
146 */
147 private int getLowerDomain(int n, int m, int k) {
148 return FastMath.max(0, m - (n - k));
149 }
150
151 /**
152 * Access the number of successes.
153 *
154 * @return the number of successes.
155 */
156 public int getNumberOfSuccesses() {
157 return numberOfSuccesses;
158 }
159
160 /**
161 * Access the population size.
162 *
163 * @return the population size.
164 */
165 public int getPopulationSize() {
166 return populationSize;
167 }
168
169 /**
170 * Access the sample size.
171 *
172 * @return the sample size.
173 */
174 public int getSampleSize() {
175 return sampleSize;
176 }
177
178 /**
179 * Return the highest domain value for the given hypergeometric distribution
180 * parameters.
181 *
182 * @param m number of successes in the population.
183 * @param k the sample size.
184 * @return the highest domain value of the hypergeometric distribution.
185 */
186 private int getUpperDomain(int m, int k) {
187 return FastMath.min(k, m);
188 }
189
190 /**
191 * For this distribution, X, this method returns P(X = x).
192 *
193 * @param x the value at which the PMF is evaluated.
194 * @return PMF for this distribution.
195 */
196 public double probability(int x) {
197 double ret;
198
199 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
200 if (x < domain[0] || x > domain[1]) {
201 ret = 0.0;
202 } else {
203 double p = (double) sampleSize / (double) populationSize;
204 double q = (double) (populationSize - sampleSize) / (double) populationSize;
205 double p1 = SaddlePointExpansion.logBinomialProbability(x,
206 numberOfSuccesses, p, q);
207 double p2 =
208 SaddlePointExpansion.logBinomialProbability(sampleSize - x,
209 populationSize - numberOfSuccesses, p, q);
210 double p3 =
211 SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
212 ret = FastMath.exp(p1 + p2 - p3);
213 }
214
215 return ret;
216 }
217
218 /**
219 * For the distribution, X, defined by the given hypergeometric distribution
220 * parameters, this method returns P(X = x).
221 *
222 * @param n the population size.
223 * @param m number of successes in the population.
224 * @param k the sample size.
225 * @param x the value at which the PMF is evaluated.
226 * @return PMF for the distribution.
227 */
228 private double probability(int n, int m, int k, int x) {
229 return FastMath.exp(MathUtils.binomialCoefficientLog(m, x) +
230 MathUtils.binomialCoefficientLog(n - m, k - x) -
231 MathUtils.binomialCoefficientLog(n, k));
232 }
233
234 /**
235 * Modify the number of successes.
236 *
237 * @param num the new number of successes.
238 * @throws IllegalArgumentException if <code>num</code> is negative.
239 * @deprecated as of 2.1 (class will become immutable in 3.0)
240 */
241 @Deprecated
242 public void setNumberOfSuccesses(int num) {
243 setNumberOfSuccessesInternal(num);
244 }
245
246 /**
247 * Modify the number of successes.
248 *
249 * @param num the new number of successes.
250 * @throws IllegalArgumentException if <code>num</code> is negative.
251 */
252 private void setNumberOfSuccessesInternal(int num) {
253 if (num < 0) {
254 throw MathRuntimeException.createIllegalArgumentException(
255 LocalizedFormats.NEGATIVE_NUMBER_OF_SUCCESSES, num);
256 }
257 numberOfSuccesses = num;
258 }
259
260 /**
261 * Modify the population size.
262 *
263 * @param size the new population size.
264 * @throws IllegalArgumentException if <code>size</code> is not positive.
265 * @deprecated as of 2.1 (class will become immutable in 3.0)
266 */
267 @Deprecated
268 public void setPopulationSize(int size) {
269 setPopulationSizeInternal(size);
270 }
271
272 /**
273 * Modify the population size.
274 *
275 * @param size the new population size.
276 * @throws IllegalArgumentException if <code>size</code> is not positive.
277 */
278 private void setPopulationSizeInternal(int size) {
279 if (size <= 0) {
280 throw MathRuntimeException.createIllegalArgumentException(
281 LocalizedFormats.NOT_POSITIVE_POPULATION_SIZE, size);
282 }
283 populationSize = size;
284 }
285
286 /**
287 * Modify the sample size.
288 *
289 * @param size the new sample size.
290 * @throws IllegalArgumentException if <code>size</code> is negative.
291 * @deprecated as of 2.1 (class will become immutable in 3.0)
292 */
293 @Deprecated
294 public void setSampleSize(int size) {
295 setSampleSizeInternal(size);
296 }
297 /**
298 * Modify the sample size.
299 *
300 * @param size the new sample size.
301 * @throws IllegalArgumentException if <code>size</code> is negative.
302 */
303 private void setSampleSizeInternal(int size) {
304 if (size < 0) {
305 throw MathRuntimeException.createIllegalArgumentException(
306 LocalizedFormats.NOT_POSITIVE_SAMPLE_SIZE, size);
307 }
308 sampleSize = size;
309 }
310
311 /**
312 * For this distribution, X, this method returns P(X &ge; x).
313 *
314 * @param x the value at which the CDF is evaluated.
315 * @return upper tail CDF for this distribution.
316 * @since 1.1
317 */
318 public double upperCumulativeProbability(int x) {
319 double ret;
320
321 final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
322 if (x < domain[0]) {
323 ret = 1.0;
324 } else if (x > domain[1]) {
325 ret = 0.0;
326 } else {
327 ret = innerCumulativeProbability(domain[1], x, -1, populationSize, numberOfSuccesses, sampleSize);
328 }
329
330 return ret;
331 }
332
333 /**
334 * For this distribution, X, this method returns P(x0 &le; X &le; x1). This
335 * probability is computed by summing the point probabilities for the values
336 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
337 *
338 * @param x0 the inclusive, lower bound
339 * @param x1 the inclusive, upper bound
340 * @param dx the direction of summation. 1 indicates summing from x0 to x1.
341 * 0 indicates summing from x1 to x0.
342 * @param n the population size.
343 * @param m number of successes in the population.
344 * @param k the sample size.
345 * @return P(x0 &le; X &le; x1).
346 */
347 private double innerCumulativeProbability(int x0, int x1, int dx, int n,
348 int m, int k) {
349 double ret = probability(n, m, k, x0);
350 while (x0 != x1) {
351 x0 += dx;
352 ret += probability(n, m, k, x0);
353 }
354 return ret;
355 }
356
357 /**
358 * Returns the lower bound for the support for the distribution.
359 *
360 * For population size <code>N</code>,
361 * number of successes <code>m</code>, and
362 * sample size <code>n</code>,
363 * the lower bound of the support is
364 * <code>max(0, n + m - N)</code>
365 *
366 * @return lower bound of the support
367 * @since 2.2
368 */
369 public int getSupportLowerBound() {
370 return FastMath.max(0,
371 getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
372 }
373
374 /**
375 * Returns the upper bound for the support of the distribution.
376 *
377 * For number of successes <code>m</code> and
378 * sample size <code>n</code>,
379 * the upper bound of the support is
380 * <code>min(m, n)</code>
381 *
382 * @return upper bound of the support
383 * @since 2.2
384 */
385 public int getSupportUpperBound() {
386 return FastMath.min(getNumberOfSuccesses(), getSampleSize());
387 }
388
389 /**
390 * Returns the mean.
391 *
392 * For population size <code>N</code>,
393 * number of successes <code>m</code>, and
394 * sample size <code>n</code>, the mean is
395 * <code>n * m / N</code>
396 *
397 * @return the mean
398 * @since 2.2
399 */
400 protected double getNumericalMean() {
401 return (double)(getSampleSize() * getNumberOfSuccesses()) / (double)getPopulationSize();
402 }
403
404 /**
405 * Returns the variance.
406 *
407 * For population size <code>N</code>,
408 * number of successes <code>m</code>, and
409 * sample size <code>n</code>, the variance is
410 * <code>[ n * m * (N - n) * (N - m) ] / [ N^2 * (N - 1) ]</code>
411 *
412 * @return the variance
413 * @since 2.2
414 */
415 public double getNumericalVariance() {
416 final double N = getPopulationSize();
417 final double m = getNumberOfSuccesses();
418 final double n = getSampleSize();
419 return ( n * m * (N - n) * (N - m) ) / ( (N*N * (N - 1)) );
420 }
421}