blob: 434819e013e663c3bb2212b4e815e4ac65250c34 [file] [log] [blame]
Raymonddee08492015-04-02 10:43:13 -07001/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package org.apache.commons.math.stat;
18
19import java.io.Serializable;
20import java.text.NumberFormat;
21import java.util.Iterator;
22import java.util.Comparator;
23import java.util.TreeMap;
24
25import org.apache.commons.math.MathRuntimeException;
26import org.apache.commons.math.exception.util.LocalizedFormats;
27
28/**
29 * Maintains a frequency distribution.
30 * <p>
31 * Accepts int, long, char or Comparable values. New values added must be
32 * comparable to those that have been added, otherwise the add method will
33 * throw an IllegalArgumentException.</p>
34 * <p>
35 * Integer values (int, long, Integer, Long) are not distinguished by type --
36 * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
37 * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
38 * <p>
39 * char values are converted by <code>addValue</code> to Character instances.
40 * As such, these values are not comparable to integral values, so attempts
41 * to combine integral types with chars in a frequency distribution will fail.
42 * </p>
43 * <p>
44 * The values are ordered using the default (natural order), unless a
45 * <code>Comparator</code> is supplied in the constructor.</p>
46 *
47 * @version $Revision: 1054186 $ $Date: 2011-01-01 03:28:46 +0100 (sam. 01 janv. 2011) $
48 */
49public class Frequency implements Serializable {
50
51 /** Serializable version identifier */
52 private static final long serialVersionUID = -3845586908418844111L;
53
54 /** underlying collection */
55 private final TreeMap<Comparable<?>, Long> freqTable;
56
57 /**
58 * Default constructor.
59 */
60 public Frequency() {
61 freqTable = new TreeMap<Comparable<?>, Long>();
62 }
63
64 /**
65 * Constructor allowing values Comparator to be specified.
66 *
67 * @param comparator Comparator used to order values
68 */
69 @SuppressWarnings("unchecked") // TODO is the cast OK?
70 public Frequency(Comparator<?> comparator) {
71 freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
72 }
73
74 /**
75 * Return a string representation of this frequency
76 * distribution.
77 *
78 * @return a string representation.
79 */
80 @Override
81 public String toString() {
82 NumberFormat nf = NumberFormat.getPercentInstance();
83 StringBuilder outBuffer = new StringBuilder();
84 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
85 Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
86 while (iter.hasNext()) {
87 Comparable<?> value = iter.next();
88 outBuffer.append(value);
89 outBuffer.append('\t');
90 outBuffer.append(getCount(value));
91 outBuffer.append('\t');
92 outBuffer.append(nf.format(getPct(value)));
93 outBuffer.append('\t');
94 outBuffer.append(nf.format(getCumPct(value)));
95 outBuffer.append('\n');
96 }
97 return outBuffer.toString();
98 }
99
100 /**
101 * Adds 1 to the frequency count for v.
102 * <p>
103 * If other objects have already been added to this Frequency, v must
104 * be comparable to those that have already been added.
105 * </p>
106 *
107 * @param v the value to add.
108 * @throws IllegalArgumentException if <code>v</code> is not Comparable,
109 * or is not comparable with previous entries
110 * @deprecated use {@link #addValue(Comparable)} instead
111 */
112 @Deprecated
113 public void addValue(Object v) {
114 if (v instanceof Comparable<?>){
115 addValue((Comparable<?>) v);
116 } else {
117 throw MathRuntimeException.createIllegalArgumentException(
118 LocalizedFormats.CLASS_DOESNT_IMPLEMENT_COMPARABLE,
119 v.getClass().getName());
120 }
121 }
122
123 /**
124 * Adds 1 to the frequency count for v.
125 * <p>
126 * If other objects have already been added to this Frequency, v must
127 * be comparable to those that have already been added.
128 * </p>
129 *
130 * @param v the value to add.
131 * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
132 */
133 public void addValue(Comparable<?> v){
134 Comparable<?> obj = v;
135 if (v instanceof Integer) {
136 obj = Long.valueOf(((Integer) v).longValue());
137 }
138 try {
139 Long count = freqTable.get(obj);
140 if (count == null) {
141 freqTable.put(obj, Long.valueOf(1));
142 } else {
143 freqTable.put(obj, Long.valueOf(count.longValue() + 1));
144 }
145 } catch (ClassCastException ex) {
146 //TreeMap will throw ClassCastException if v is not comparable
147 throw MathRuntimeException.createIllegalArgumentException(
148 LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
149 v.getClass().getName());
150 }
151 }
152
153 /**
154 * Adds 1 to the frequency count for v.
155 *
156 * @param v the value to add.
157 */
158 public void addValue(int v) {
159 addValue(Long.valueOf(v));
160 }
161
162 /**
163 * Adds 1 to the frequency count for v.
164 *
165 * @param v the value to add.
166 * @deprecated to be removed in math 3.0
167 */
168 @Deprecated
169 public void addValue(Integer v) {
170 addValue(Long.valueOf(v.longValue()));
171 }
172
173 /**
174 * Adds 1 to the frequency count for v.
175 *
176 * @param v the value to add.
177 */
178 public void addValue(long v) {
179 addValue(Long.valueOf(v));
180 }
181
182 /**
183 * Adds 1 to the frequency count for v.
184 *
185 * @param v the value to add.
186 */
187 public void addValue(char v) {
188 addValue(Character.valueOf(v));
189 }
190
191 /** Clears the frequency table */
192 public void clear() {
193 freqTable.clear();
194 }
195
196 /**
197 * Returns an Iterator over the set of values that have been added.
198 * <p>
199 * If added values are integral (i.e., integers, longs, Integers, or Longs),
200 * they are converted to Longs when they are added, so the objects returned
201 * by the Iterator will in this case be Longs.</p>
202 *
203 * @return values Iterator
204 */
205 public Iterator<Comparable<?>> valuesIterator() {
206 return freqTable.keySet().iterator();
207 }
208
209 //-------------------------------------------------------------------------
210
211 /**
212 * Returns the sum of all frequencies.
213 *
214 * @return the total frequency count.
215 */
216 public long getSumFreq() {
217 long result = 0;
218 Iterator<Long> iterator = freqTable.values().iterator();
219 while (iterator.hasNext()) {
220 result += iterator.next().longValue();
221 }
222 return result;
223 }
224
225 /**
226 * Returns the number of values = v.
227 * Returns 0 if the value is not comparable.
228 *
229 * @param v the value to lookup.
230 * @return the frequency of v.
231 * @deprecated replaced by {@link #getCount(Comparable)} as of 2.0
232 */
233 @Deprecated
234 public long getCount(Object v) {
235 return getCount((Comparable<?>) v);
236 }
237
238 /**
239 * Returns the number of values = v.
240 * Returns 0 if the value is not comparable.
241 *
242 * @param v the value to lookup.
243 * @return the frequency of v.
244 */
245 public long getCount(Comparable<?> v) {
246 if (v instanceof Integer) {
247 return getCount(((Integer) v).longValue());
248 }
249 long result = 0;
250 try {
251 Long count = freqTable.get(v);
252 if (count != null) {
253 result = count.longValue();
254 }
255 } catch (ClassCastException ex) {
256 // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
257 }
258 return result;
259 }
260
261 /**
262 * Returns the number of values = v.
263 *
264 * @param v the value to lookup.
265 * @return the frequency of v.
266 */
267 public long getCount(int v) {
268 return getCount(Long.valueOf(v));
269 }
270
271 /**
272 * Returns the number of values = v.
273 *
274 * @param v the value to lookup.
275 * @return the frequency of v.
276 */
277 public long getCount(long v) {
278 return getCount(Long.valueOf(v));
279 }
280
281 /**
282 * Returns the number of values = v.
283 *
284 * @param v the value to lookup.
285 * @return the frequency of v.
286 */
287 public long getCount(char v) {
288 return getCount(Character.valueOf(v));
289 }
290
291 /**
292 * Returns the number of values in the frequency table.
293 *
294 * @return the number of unique values that have been added to the frequency table.
295 * @see #valuesIterator()
296 */
297 public int getUniqueCount(){
298 return freqTable.keySet().size();
299 }
300
301 //-------------------------------------------------------------
302
303 /**
304 * Returns the percentage of values that are equal to v
305 * (as a proportion between 0 and 1).
306 * <p>
307 * Returns <code>Double.NaN</code> if no values have been added.</p>
308 *
309 * @param v the value to lookup
310 * @return the proportion of values equal to v
311 * @deprecated replaced by {@link #getPct(Comparable)} as of 2.0
312 */
313 @Deprecated
314 public double getPct(Object v) {
315 return getPct((Comparable<?>) v);
316 }
317
318 /**
319 * Returns the percentage of values that are equal to v
320 * (as a proportion between 0 and 1).
321 * <p>
322 * Returns <code>Double.NaN</code> if no values have been added.</p>
323 *
324 * @param v the value to lookup
325 * @return the proportion of values equal to v
326 */
327 public double getPct(Comparable<?> v) {
328 final long sumFreq = getSumFreq();
329 if (sumFreq == 0) {
330 return Double.NaN;
331 }
332 return (double) getCount(v) / (double) sumFreq;
333 }
334
335 /**
336 * Returns the percentage of values that are equal to v
337 * (as a proportion between 0 and 1).
338 *
339 * @param v the value to lookup
340 * @return the proportion of values equal to v
341 */
342 public double getPct(int v) {
343 return getPct(Long.valueOf(v));
344 }
345
346 /**
347 * Returns the percentage of values that are equal to v
348 * (as a proportion between 0 and 1).
349 *
350 * @param v the value to lookup
351 * @return the proportion of values equal to v
352 */
353 public double getPct(long v) {
354 return getPct(Long.valueOf(v));
355 }
356
357 /**
358 * Returns the percentage of values that are equal to v
359 * (as a proportion between 0 and 1).
360 *
361 * @param v the value to lookup
362 * @return the proportion of values equal to v
363 */
364 public double getPct(char v) {
365 return getPct(Character.valueOf(v));
366 }
367
368 //-----------------------------------------------------------------------------------------
369
370 /**
371 * Returns the cumulative frequency of values less than or equal to v.
372 * <p>
373 * Returns 0 if v is not comparable to the values set.</p>
374 *
375 * @param v the value to lookup.
376 * @return the proportion of values equal to v
377 * @deprecated replaced by {@link #getCumFreq(Comparable)} as of 2.0
378 */
379 @Deprecated
380 public long getCumFreq(Object v) {
381 return getCumFreq((Comparable<?>) v);
382 }
383
384 /**
385 * Returns the cumulative frequency of values less than or equal to v.
386 * <p>
387 * Returns 0 if v is not comparable to the values set.</p>
388 *
389 * @param v the value to lookup.
390 * @return the proportion of values equal to v
391 */
392 public long getCumFreq(Comparable<?> v) {
393 if (getSumFreq() == 0) {
394 return 0;
395 }
396 if (v instanceof Integer) {
397 return getCumFreq(((Integer) v).longValue());
398 }
399 @SuppressWarnings("unchecked") // OK, freqTable is Comparable<?>
400 Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
401 if (c == null) {
402 c = new NaturalComparator();
403 }
404 long result = 0;
405
406 try {
407 Long value = freqTable.get(v);
408 if (value != null) {
409 result = value.longValue();
410 }
411 } catch (ClassCastException ex) {
412 return result; // v is not comparable
413 }
414
415 if (c.compare(v, freqTable.firstKey()) < 0) {
416 return 0; // v is comparable, but less than first value
417 }
418
419 if (c.compare(v, freqTable.lastKey()) >= 0) {
420 return getSumFreq(); // v is comparable, but greater than the last value
421 }
422
423 Iterator<Comparable<?>> values = valuesIterator();
424 while (values.hasNext()) {
425 Comparable<?> nextValue = values.next();
426 if (c.compare(v, nextValue) > 0) {
427 result += getCount(nextValue);
428 } else {
429 return result;
430 }
431 }
432 return result;
433 }
434
435 /**
436 * Returns the cumulative frequency of values less than or equal to v.
437 * <p>
438 * Returns 0 if v is not comparable to the values set.</p>
439 *
440 * @param v the value to lookup
441 * @return the proportion of values equal to v
442 */
443 public long getCumFreq(int v) {
444 return getCumFreq(Long.valueOf(v));
445 }
446
447 /**
448 * Returns the cumulative frequency of values less than or equal to v.
449 * <p>
450 * Returns 0 if v is not comparable to the values set.</p>
451 *
452 * @param v the value to lookup
453 * @return the proportion of values equal to v
454 */
455 public long getCumFreq(long v) {
456 return getCumFreq(Long.valueOf(v));
457 }
458
459 /**
460 * Returns the cumulative frequency of values less than or equal to v.
461 * <p>
462 * Returns 0 if v is not comparable to the values set.</p>
463 *
464 * @param v the value to lookup
465 * @return the proportion of values equal to v
466 */
467 public long getCumFreq(char v) {
468 return getCumFreq(Character.valueOf(v));
469 }
470
471 //----------------------------------------------------------------------------------------------
472
473 /**
474 * Returns the cumulative percentage of values less than or equal to v
475 * (as a proportion between 0 and 1).
476 * <p>
477 * Returns <code>Double.NaN</code> if no values have been added.
478 * Returns 0 if at least one value has been added, but v is not comparable
479 * to the values set.</p>
480 *
481 * @param v the value to lookup
482 * @return the proportion of values less than or equal to v
483 * @deprecated replaced by {@link #getCumPct(Comparable)} as of 2.0
484 */
485 @Deprecated
486 public double getCumPct(Object v) {
487 return getCumPct((Comparable<?>) v);
488
489 }
490
491 /**
492 * Returns the cumulative percentage of values less than or equal to v
493 * (as a proportion between 0 and 1).
494 * <p>
495 * Returns <code>Double.NaN</code> if no values have been added.
496 * Returns 0 if at least one value has been added, but v is not comparable
497 * to the values set.</p>
498 *
499 * @param v the value to lookup
500 * @return the proportion of values less than or equal to v
501 */
502 public double getCumPct(Comparable<?> v) {
503 final long sumFreq = getSumFreq();
504 if (sumFreq == 0) {
505 return Double.NaN;
506 }
507 return (double) getCumFreq(v) / (double) sumFreq;
508 }
509
510 /**
511 * Returns the cumulative percentage of values less than or equal to v
512 * (as a proportion between 0 and 1).
513 * <p>
514 * Returns 0 if v is not comparable to the values set.</p>
515 *
516 * @param v the value to lookup
517 * @return the proportion of values less than or equal to v
518 */
519 public double getCumPct(int v) {
520 return getCumPct(Long.valueOf(v));
521 }
522
523 /**
524 * Returns the cumulative percentage of values less than or equal to v
525 * (as a proportion between 0 and 1).
526 * <p>
527 * Returns 0 if v is not comparable to the values set.</p>
528 *
529 * @param v the value to lookup
530 * @return the proportion of values less than or equal to v
531 */
532 public double getCumPct(long v) {
533 return getCumPct(Long.valueOf(v));
534 }
535
536 /**
537 * Returns the cumulative percentage of values less than or equal to v
538 * (as a proportion between 0 and 1).
539 * <p>
540 * Returns 0 if v is not comparable to the values set.</p>
541 *
542 * @param v the value to lookup
543 * @return the proportion of values less than or equal to v
544 */
545 public double getCumPct(char v) {
546 return getCumPct(Character.valueOf(v));
547 }
548
549 /**
550 * A Comparator that compares comparable objects using the
551 * natural order. Copied from Commons Collections ComparableComparator.
552 */
553 private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
554
555 /** Serializable version identifier */
556 private static final long serialVersionUID = -3852193713161395148L;
557
558 /**
559 * Compare the two {@link Comparable Comparable} arguments.
560 * This method is equivalent to:
561 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
562 *
563 * @param o1 the first object
564 * @param o2 the second object
565 * @return result of comparison
566 * @throws NullPointerException when <i>o1</i> is <code>null</code>,
567 * or when <code>((Comparable)o1).compareTo(o2)</code> does
568 * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
569 * or when <code>((Comparable)o1).compareTo(o2)</code> does
570 */
571 @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
572 public int compare(Comparable<T> o1, Comparable<T> o2) {
573 return o1.compareTo((T) o2);
574 }
575 }
576
577 /** {@inheritDoc} */
578 @Override
579 public int hashCode() {
580 final int prime = 31;
581 int result = 1;
582 result = prime * result +
583 ((freqTable == null) ? 0 : freqTable.hashCode());
584 return result;
585 }
586
587 /** {@inheritDoc} */
588 @Override
589 public boolean equals(Object obj) {
590 if (this == obj)
591 return true;
592 if (!(obj instanceof Frequency))
593 return false;
594 Frequency other = (Frequency) obj;
595 if (freqTable == null) {
596 if (other.freqTable != null)
597 return false;
598 } else if (!freqTable.equals(other.freqTable))
599 return false;
600 return true;
601 }
602
603}