blob: 9f195284a9452e3da6b2a2df8fd15a5176588b03 [file] [log] [blame]
Raymonddee08492015-04-02 10:43:13 -07001/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package org.apache.commons.math.distribution;
19
20import java.io.Serializable;
21
22import org.apache.commons.math.MathRuntimeException;
23import org.apache.commons.math.exception.util.LocalizedFormats;
24import org.apache.commons.math.util.FastMath;
25
26/**
27 * Implementation for the {@link ZipfDistribution}.
28 *
29 * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $
30 */
31public class ZipfDistributionImpl extends AbstractIntegerDistribution
32 implements ZipfDistribution, Serializable {
33
34 /** Serializable version identifier. */
35 private static final long serialVersionUID = -140627372283420404L;
36
37 /** Number of elements. */
38 private int numberOfElements;
39
40 /** Exponent parameter of the distribution. */
41 private double exponent;
42
43 /**
44 * Create a new Zipf distribution with the given number of elements and
45 * exponent. Both values must be positive; otherwise an
46 * <code>IllegalArgumentException</code> is thrown.
47 *
48 * @param numberOfElements the number of elements
49 * @param exponent the exponent
50 * @exception IllegalArgumentException if n &le; 0 or s &le; 0.0
51 */
52 public ZipfDistributionImpl(final int numberOfElements, final double exponent)
53 throws IllegalArgumentException {
54 setNumberOfElementsInternal(numberOfElements);
55 setExponentInternal(exponent);
56 }
57
58 /**
59 * Get the number of elements (e.g. corpus size) for the distribution.
60 *
61 * @return the number of elements
62 */
63 public int getNumberOfElements() {
64 return numberOfElements;
65 }
66
67 /**
68 * Set the number of elements (e.g. corpus size) for the distribution.
69 * The parameter value must be positive; otherwise an
70 * <code>IllegalArgumentException</code> is thrown.
71 *
72 * @param n the number of elements
73 * @exception IllegalArgumentException if n &le; 0
74 * @deprecated as of 2.1 (class will become immutable in 3.0)
75 */
76 @Deprecated
77 public void setNumberOfElements(final int n) {
78 setNumberOfElementsInternal(n);
79 }
80 /**
81 * Set the number of elements (e.g. corpus size) for the distribution.
82 * The parameter value must be positive; otherwise an
83 * <code>IllegalArgumentException</code> is thrown.
84 *
85 * @param n the number of elements
86 * @exception IllegalArgumentException if n &le; 0
87 */
88 private void setNumberOfElementsInternal(final int n)
89 throws IllegalArgumentException {
90 if (n <= 0) {
91 throw MathRuntimeException.createIllegalArgumentException(
92 LocalizedFormats.INSUFFICIENT_DIMENSION, n, 0);
93 }
94 this.numberOfElements = n;
95 }
96
97 /**
98 * Get the exponent characterising the distribution.
99 *
100 * @return the exponent
101 */
102 public double getExponent() {
103 return exponent;
104 }
105
106 /**
107 * Set the exponent characterising the distribution.
108 * The parameter value must be positive; otherwise an
109 * <code>IllegalArgumentException</code> is thrown.
110 *
111 * @param s the exponent
112 * @exception IllegalArgumentException if s &le; 0.0
113 * @deprecated as of 2.1 (class will become immutable in 3.0)
114 */
115 @Deprecated
116 public void setExponent(final double s) {
117 setExponentInternal(s);
118 }
119
120 /**
121 * Set the exponent characterising the distribution.
122 * The parameter value must be positive; otherwise an
123 * <code>IllegalArgumentException</code> is thrown.
124 *
125 * @param s the exponent
126 * @exception IllegalArgumentException if s &le; 0.0
127 */
128 private void setExponentInternal(final double s)
129 throws IllegalArgumentException {
130 if (s <= 0.0) {
131 throw MathRuntimeException.createIllegalArgumentException(
132 LocalizedFormats.NOT_POSITIVE_EXPONENT,
133 s);
134 }
135 this.exponent = s;
136 }
137
138 /**
139 * The probability mass function P(X = x) for a Zipf distribution.
140 *
141 * @param x the value at which the probability density function is evaluated.
142 * @return the value of the probability mass function at x
143 */
144 public double probability(final int x) {
145 if (x <= 0 || x > numberOfElements) {
146 return 0.0;
147 }
148
149 return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
150
151 }
152
153 /**
154 * The probability distribution function P(X <= x) for a Zipf distribution.
155 *
156 * @param x the value at which the PDF is evaluated.
157 * @return Zipf distribution function evaluated at x
158 */
159 @Override
160 public double cumulativeProbability(final int x) {
161 if (x <= 0) {
162 return 0.0;
163 } else if (x >= numberOfElements) {
164 return 1.0;
165 }
166
167 return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
168
169 }
170
171 /**
172 * Access the domain value lower bound, based on <code>p</code>, used to
173 * bracket a PDF root.
174 *
175 * @param p the desired probability for the critical value
176 * @return domain value lower bound, i.e.
177 * P(X &lt; <i>lower bound</i>) &lt; <code>p</code>
178 */
179 @Override
180 protected int getDomainLowerBound(final double p) {
181 return 0;
182 }
183
184 /**
185 * Access the domain value upper bound, based on <code>p</code>, used to
186 * bracket a PDF root.
187 *
188 * @param p the desired probability for the critical value
189 * @return domain value upper bound, i.e.
190 * P(X &lt; <i>upper bound</i>) &gt; <code>p</code>
191 */
192 @Override
193 protected int getDomainUpperBound(final double p) {
194 return numberOfElements;
195 }
196
197
198 /**
199 * Calculates the Nth generalized harmonic number. See
200 * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic
201 * Series</a>.
202 *
203 * @param n the term in the series to calculate (must be &ge; 1)
204 * @param m the exponent; special case m == 1.0 is the harmonic series
205 * @return the nth generalized harmonic number
206 */
207 private double generalizedHarmonic(final int n, final double m) {
208 double value = 0;
209 for (int k = n; k > 0; --k) {
210 value += 1.0 / FastMath.pow(k, m);
211 }
212 return value;
213 }
214
215 /**
216 * Returns the lower bound of the support for the distribution.
217 *
218 * The lower bound of the support is always 1 no matter the parameters.
219 *
220 * @return lower bound of the support (always 1)
221 * @since 2.2
222 */
223 public int getSupportLowerBound() {
224 return 1;
225 }
226
227 /**
228 * Returns the upper bound of the support for the distribution.
229 *
230 * The upper bound of the support is the number of elements
231 *
232 * @return upper bound of the support
233 * @since 2.2
234 */
235 public int getSupportUpperBound() {
236 return getNumberOfElements();
237 }
238
239 /**
240 * Returns the mean.
241 *
242 * For number of elements N and exponent s, the mean is
243 * <code>Hs1 / Hs</code> where
244 * <ul>
245 * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
246 * <li><code>Hs = generalizedHarmonic(N, s)</code></li>
247 * </ul>
248 *
249 * @return the mean
250 * @since 2.2
251 */
252 protected double getNumericalMean() {
253 final int N = getNumberOfElements();
254 final double s = getExponent();
255
256 final double Hs1 = generalizedHarmonic(N, s - 1);
257 final double Hs = generalizedHarmonic(N, s);
258
259 return Hs1 / Hs;
260 }
261
262 /**
263 * Returns the variance.
264 *
265 * For number of elements N and exponent s, the mean is
266 * <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where
267 * <ul>
268 * <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li>
269 * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li>
270 * <li><code>Hs = generalizedHarmonic(N, s)</code></li>
271 * </ul>
272 *
273 * @return the variance
274 * @since 2.2
275 */
276 protected double getNumericalVariance() {
277 final int N = getNumberOfElements();
278 final double s = getExponent();
279
280 final double Hs2 = generalizedHarmonic(N, s - 2);
281 final double Hs1 = generalizedHarmonic(N, s - 1);
282 final double Hs = generalizedHarmonic(N, s);
283
284 return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs));
285 }
286}