osdl.net!shemminger | b3f23c9 | 2005-02-09 22:05:41 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Experimental data distribution table generator |
Stephen Hemminger | ebfd0f3 | 2006-06-15 16:22:09 -0700 | [diff] [blame] | 3 | * Taken from the uncopyrighted NISTnet code (public domain). |
osdl.net!shemminger | b3f23c9 | 2005-02-09 22:05:41 +0000 | [diff] [blame] | 4 | * |
Stephen Hemminger | ebfd0f3 | 2006-06-15 16:22:09 -0700 | [diff] [blame] | 5 | * Read in a series of "random" data values, either |
osdl.net!shemminger | b3f23c9 | 2005-02-09 22:05:41 +0000 | [diff] [blame] | 6 | * experimentally or generated from some probability distribution. |
| 7 | * From this, create the inverse distribution table used to approximate |
| 8 | * the distribution. |
| 9 | */ |
| 10 | #include <stdio.h> |
| 11 | #include <stdlib.h> |
| 12 | #include <math.h> |
| 13 | #include <malloc.h> |
| 14 | #include <string.h> |
| 15 | #include <sys/types.h> |
| 16 | #include <sys/stat.h> |
| 17 | |
| 18 | |
| 19 | double * |
| 20 | readdoubles(FILE *fp, int *number) |
| 21 | { |
| 22 | struct stat info; |
| 23 | double *x; |
| 24 | int limit; |
| 25 | int n=0, i; |
| 26 | |
| 27 | fstat(fileno(fp), &info); |
| 28 | if (info.st_size > 0) { |
| 29 | limit = 2*info.st_size/sizeof(double); /* @@ approximate */ |
| 30 | } else { |
| 31 | limit = 10000; |
| 32 | } |
| 33 | |
| 34 | x = calloc(limit, sizeof(double)); |
| 35 | if (!x) { |
| 36 | perror("double alloc"); |
| 37 | exit(3); |
| 38 | } |
| 39 | |
| 40 | for (i=0; i<limit; ++i){ |
| 41 | fscanf(fp, "%lf", &x[i]); |
| 42 | if (feof(fp)) |
| 43 | break; |
| 44 | ++n; |
| 45 | } |
| 46 | *number = n; |
| 47 | return x; |
| 48 | } |
| 49 | |
| 50 | void |
| 51 | arraystats(double *x, int limit, double *mu, double *sigma, double *rho) |
| 52 | { |
| 53 | int n=0, i; |
| 54 | double sumsquare=0.0, sum=0.0, top=0.0; |
| 55 | double sigma2=0.0; |
| 56 | |
| 57 | for (i=0; i<limit; ++i){ |
| 58 | sumsquare += x[i]*x[i]; |
| 59 | sum += x[i]; |
| 60 | ++n; |
| 61 | } |
| 62 | *mu = sum/(double)n; |
| 63 | *sigma = sqrt((sumsquare - (double)n*(*mu)*(*mu))/(double)(n-1)); |
| 64 | |
| 65 | for (i=1; i < n; ++i){ |
| 66 | top += ((double)x[i]- *mu)*((double)x[i-1]- *mu); |
| 67 | sigma2 += ((double)x[i-1] - *mu)*((double)x[i-1] - *mu); |
| 68 | |
| 69 | } |
| 70 | *rho = top/sigma2; |
| 71 | } |
| 72 | |
| 73 | /* Create a (normalized) distribution table from a set of observed |
| 74 | * values. The table is fixed to run from (as it happens) -4 to +4, |
| 75 | * with granularity .00002. |
| 76 | */ |
| 77 | |
| 78 | #define TABLESIZE 16384/4 |
| 79 | #define TABLEFACTOR 8192 |
| 80 | #ifndef MINSHORT |
| 81 | #define MINSHORT -32768 |
| 82 | #define MAXSHORT 32767 |
| 83 | #endif |
| 84 | |
| 85 | /* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger |
| 86 | * than MAXSHORT, we don't bother looking at a larger domain than this: |
| 87 | */ |
| 88 | #define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1) |
| 89 | #define DISTTABLEGRANULARITY 50000 |
| 90 | #define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2) |
| 91 | |
| 92 | static int * |
| 93 | makedist(double *x, int limit, double mu, double sigma) |
| 94 | { |
| 95 | int *table; |
| 96 | int i, index, first=DISTTABLESIZE, last=0; |
| 97 | double input; |
| 98 | |
| 99 | table = calloc(DISTTABLESIZE, sizeof(int)); |
| 100 | if (!table) { |
| 101 | perror("table alloc"); |
| 102 | exit(3); |
| 103 | } |
| 104 | |
| 105 | for (i=0; i < limit; ++i) { |
| 106 | /* Normalize value */ |
| 107 | input = (x[i]-mu)/sigma; |
| 108 | |
| 109 | index = (int)rint((input+DISTTABLEDOMAIN)*DISTTABLEGRANULARITY); |
| 110 | if (index < 0) index = 0; |
| 111 | if (index >= DISTTABLESIZE) index = DISTTABLESIZE-1; |
| 112 | ++table[index]; |
| 113 | if (index > last) |
| 114 | last = index +1; |
| 115 | if (index < first) |
| 116 | first = index; |
| 117 | } |
| 118 | return table; |
| 119 | } |
| 120 | |
| 121 | /* replace an array by its cumulative distribution */ |
| 122 | static void |
| 123 | cumulativedist(int *table, int limit, int *total) |
| 124 | { |
| 125 | int accum=0; |
| 126 | |
| 127 | while (--limit >= 0) { |
| 128 | accum += *table; |
| 129 | *table++ = accum; |
| 130 | } |
| 131 | *total = accum; |
| 132 | } |
| 133 | |
| 134 | static short * |
| 135 | inverttable(int *table, int inversesize, int tablesize, int cumulative) |
| 136 | { |
| 137 | int i, inverseindex, inversevalue; |
| 138 | short *inverse; |
| 139 | double findex, fvalue; |
| 140 | |
| 141 | inverse = (short *)malloc(inversesize*sizeof(short)); |
| 142 | for (i=0; i < inversesize; ++i) { |
| 143 | inverse[i] = MINSHORT; |
| 144 | } |
| 145 | for (i=0; i < tablesize; ++i) { |
| 146 | findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN; |
| 147 | fvalue = (double)table[i]/(double)cumulative; |
| 148 | inverseindex = (int)rint(fvalue*inversesize); |
| 149 | inversevalue = (int)rint(findex*TABLEFACTOR); |
| 150 | if (inversevalue <= MINSHORT) inversevalue = MINSHORT+1; |
| 151 | if (inversevalue > MAXSHORT) inversevalue = MAXSHORT; |
Stephen Hemminger | 2d3af16 | 2017-04-12 10:10:44 -0700 | [diff] [blame] | 152 | if (inverseindex >= inversesize) inverseindex = inversesize- 1; |
| 153 | |
osdl.net!shemminger | b3f23c9 | 2005-02-09 22:05:41 +0000 | [diff] [blame] | 154 | inverse[inverseindex] = inversevalue; |
| 155 | } |
| 156 | return inverse; |
| 157 | |
| 158 | } |
| 159 | |
| 160 | /* Run simple linear interpolation over the table to fill in missing entries */ |
| 161 | static void |
| 162 | interpolatetable(short *table, int limit) |
| 163 | { |
| 164 | int i, j, last, lasti = -1; |
| 165 | |
| 166 | last = MINSHORT; |
| 167 | for (i=0; i < limit; ++i) { |
| 168 | if (table[i] == MINSHORT) { |
| 169 | for (j=i; j < limit; ++j) |
| 170 | if (table[j] != MINSHORT) |
| 171 | break; |
| 172 | if (j < limit) { |
| 173 | table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti); |
| 174 | } else { |
| 175 | table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti); |
| 176 | } |
| 177 | } else { |
| 178 | last = table[i]; |
| 179 | lasti = i; |
| 180 | } |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | static void |
| 185 | printtable(const short *table, int limit) |
| 186 | { |
| 187 | int i; |
| 188 | |
| 189 | printf("# This is the distribution table for the experimental distribution.\n"); |
| 190 | |
| 191 | for (i=0 ; i < limit; ++i) { |
| 192 | printf("%d%c", table[i], |
| 193 | (i % 8) == 7 ? '\n' : ' '); |
| 194 | } |
| 195 | } |
| 196 | |
Stephen Hemminger | 81c6179 | 2006-12-13 17:05:50 -0800 | [diff] [blame] | 197 | int |
osdl.net!shemminger | b3f23c9 | 2005-02-09 22:05:41 +0000 | [diff] [blame] | 198 | main(int argc, char **argv) |
| 199 | { |
| 200 | FILE *fp; |
| 201 | double *x; |
| 202 | double mu, sigma, rho; |
| 203 | int limit; |
| 204 | int *table; |
| 205 | short *inverse; |
| 206 | int total; |
| 207 | |
| 208 | if (argc > 1) { |
| 209 | if (!(fp = fopen(argv[1], "r"))) { |
| 210 | perror(argv[1]); |
| 211 | exit(1); |
| 212 | } |
| 213 | } else { |
| 214 | fp = stdin; |
Stephen Hemminger | e9e9365 | 2016-03-27 10:47:46 -0700 | [diff] [blame] | 215 | } |
osdl.net!shemminger | b3f23c9 | 2005-02-09 22:05:41 +0000 | [diff] [blame] | 216 | x = readdoubles(fp, &limit); |
| 217 | if (limit <= 0) { |
| 218 | fprintf(stderr, "Nothing much read!\n"); |
| 219 | exit(2); |
| 220 | } |
| 221 | arraystats(x, limit, &mu, &sigma, &rho); |
| 222 | #ifdef DEBUG |
| 223 | fprintf(stderr, "%d values, mu %10.4f, sigma %10.4f, rho %10.4f\n", |
| 224 | limit, mu, sigma, rho); |
| 225 | #endif |
Stephen Hemminger | e9e9365 | 2016-03-27 10:47:46 -0700 | [diff] [blame] | 226 | |
osdl.net!shemminger | b3f23c9 | 2005-02-09 22:05:41 +0000 | [diff] [blame] | 227 | table = makedist(x, limit, mu, sigma); |
| 228 | free((void *) x); |
| 229 | cumulativedist(table, DISTTABLESIZE, &total); |
| 230 | inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total); |
| 231 | interpolatetable(inverse, TABLESIZE); |
| 232 | printtable(inverse, TABLESIZE); |
| 233 | return 0; |
| 234 | } |