mbligh | 51fca2c | 2008-10-10 21:08:59 +0000 | [diff] [blame^] | 1 | |
| 2 | /* |
| 3 | * This is the latest version of hackbench.c, that tests scheduler and |
| 4 | * unix-socket (or pipe) performance. |
| 5 | * |
| 6 | * Usage: hackbench [-pipe] <num groups> [process|thread] [loops] |
| 7 | * |
| 8 | * Build it with: |
| 9 | * gcc -g -Wall -O2 -o hackbench hackbench.c -lpthread |
| 10 | */ |
| 11 | #if 0 |
| 12 | |
| 13 | Date: Fri, 04 Jan 2008 14:06:26 +0800 |
| 14 | From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com> |
| 15 | To: LKML <linux-kernel@vger.kernel.org> |
| 16 | Subject: Improve hackbench |
| 17 | Cc: Ingo Molnar <mingo@elte.hu>, Arjan van de Ven <arjan@infradead.org> |
| 18 | |
| 19 | hackbench tests the Linux scheduler. The original program is at |
| 20 | http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c |
| 21 | Based on this multi-process version, a nice person created a multi-thread |
| 22 | version. Pls. see |
| 23 | http://www.bullopensource.org/posix/pi-futex/hackbench_pth.c |
| 24 | |
| 25 | When I integrated them into my automation testing system, I found |
| 26 | a couple of issues and did some improvements. |
| 27 | |
| 28 | 1) Merge hackbench: I integrated hackbench_pth.c into hackbench and added a |
| 29 | new parameter which can be used to choose process mode or thread mode. The |
| 30 | default mode is process. |
| 31 | |
| 32 | 2) It runs too fast and ends in a couple of seconds. Sometimes it's too hard to debug |
| 33 | the issues. On my ia64 Montecito machines, the result looks weird when comparing |
| 34 | process mode and thread mode. |
| 35 | I want a stable result and hope the testing could run for a stable longer time, so I |
| 36 | might use performance tools to debug issues. |
| 37 | I added another new parameter,`loops`, which can be used to change variable loops, |
| 38 | so more messages will be passed from writers to receivers. Parameter 'loops' is equal to |
| 39 | 100 by default. |
| 40 | |
| 41 | For example on my 8-core x86_64: |
| 42 | [ymzhang@lkp-st01-x8664 hackbench]$ uname -a |
| 43 | Linux lkp-st01-x8664 2.6.24-rc6 #1 SMP Fri Dec 21 08:32:31 CST 2007 x86_64 x86_64 x86_64 GNU/Linux |
| 44 | [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench |
| 45 | Usage: hackbench [-pipe] <num groups> [process|thread] [loops] |
| 46 | [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench 150 process 1000 |
| 47 | Time: 151.533 |
| 48 | [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench 150 thread 1000 |
| 49 | Time: 153.666 |
| 50 | |
| 51 | |
| 52 | With the same new parameters, I did captured the SLUB issue discussed on LKML recently. |
| 53 | |
| 54 | 3) hackbench_pth.c will fail on ia64 machine because pthread_attr_setstacksize always |
| 55 | fails if the stack size is less than 196*1024. I moved this statement within a __ia64__ check. |
| 56 | |
| 57 | |
| 58 | This new program could be compiled with command line: |
| 59 | #gcc -g -Wall -o hackbench hackbench.c -lpthread |
| 60 | |
| 61 | |
| 62 | Thank Ingo for his great comments! |
| 63 | |
| 64 | -yanmin |
| 65 | |
| 66 | --- |
| 67 | |
| 68 | * Nathan Lynch <ntl@pobox.com> wrote: |
| 69 | |
| 70 | > Here's a fixlet for the hackbench program found at |
| 71 | > |
| 72 | > http://people.redhat.com/mingo/cfs-scheduler/tools/hackbench.c |
| 73 | > |
| 74 | > When redirecting hackbench output I am seeing multiple copies of the |
| 75 | > "Running with %d*40 (== %d) tasks" line. Need to flush the buffered |
| 76 | > output before forking. |
| 77 | |
| 78 | #endif |
| 79 | |
| 80 | /* Test groups of 20 processes spraying to 20 receivers */ |
| 81 | #include <pthread.h> |
| 82 | #include <stdio.h> |
| 83 | #include <stdlib.h> |
| 84 | #include <string.h> |
| 85 | #include <errno.h> |
| 86 | #include <unistd.h> |
| 87 | #include <sys/types.h> |
| 88 | #include <sys/socket.h> |
| 89 | #include <sys/wait.h> |
| 90 | #include <sys/time.h> |
| 91 | #include <sys/poll.h> |
| 92 | #include <limits.h> |
| 93 | |
| 94 | #define DATASIZE 100 |
| 95 | static unsigned int loops = 100; |
| 96 | /* |
| 97 | * 0 means thread mode and others mean process (default) |
| 98 | */ |
| 99 | static unsigned int process_mode = 1; |
| 100 | |
| 101 | static int use_pipes = 0; |
| 102 | |
| 103 | struct sender_context { |
| 104 | unsigned int num_fds; |
| 105 | int ready_out; |
| 106 | int wakefd; |
| 107 | int out_fds[0]; |
| 108 | }; |
| 109 | |
| 110 | struct receiver_context { |
| 111 | unsigned int num_packets; |
| 112 | int in_fds[2]; |
| 113 | int ready_out; |
| 114 | int wakefd; |
| 115 | }; |
| 116 | |
| 117 | |
| 118 | static void barf(const char *msg) |
| 119 | { |
| 120 | fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno)); |
| 121 | exit(1); |
| 122 | } |
| 123 | |
| 124 | static void print_usage_exit() |
| 125 | { |
| 126 | printf("Usage: hackbench [-pipe] <num groups> [process|thread] [loops]\n"); |
| 127 | exit(1); |
| 128 | } |
| 129 | |
| 130 | static void fdpair(int fds[2]) |
| 131 | { |
| 132 | if (use_pipes) { |
| 133 | if (pipe(fds) == 0) |
| 134 | return; |
| 135 | } else { |
| 136 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0) |
| 137 | return; |
| 138 | } |
| 139 | barf("Creating fdpair"); |
| 140 | } |
| 141 | |
| 142 | /* Block until we're ready to go */ |
| 143 | static void ready(int ready_out, int wakefd) |
| 144 | { |
| 145 | char dummy; |
| 146 | struct pollfd pollfd = { .fd = wakefd, .events = POLLIN }; |
| 147 | |
| 148 | /* Tell them we're ready. */ |
| 149 | if (write(ready_out, &dummy, 1) != 1) |
| 150 | barf("CLIENT: ready write"); |
| 151 | |
| 152 | /* Wait for "GO" signal */ |
| 153 | if (poll(&pollfd, 1, -1) != 1) |
| 154 | barf("poll"); |
| 155 | } |
| 156 | |
| 157 | /* Sender sprays loops messages down each file descriptor */ |
| 158 | static void *sender(struct sender_context *ctx) |
| 159 | { |
| 160 | char data[DATASIZE]; |
| 161 | unsigned int i, j; |
| 162 | |
| 163 | ready(ctx->ready_out, ctx->wakefd); |
| 164 | |
| 165 | /* Now pump to every receiver. */ |
| 166 | for (i = 0; i < loops; i++) { |
| 167 | for (j = 0; j < ctx->num_fds; j++) { |
| 168 | int ret, done = 0; |
| 169 | |
| 170 | again: |
| 171 | ret = write(ctx->out_fds[j], data + done, sizeof(data)-done); |
| 172 | if (ret < 0) |
| 173 | barf("SENDER: write"); |
| 174 | done += ret; |
| 175 | if (done < sizeof(data)) |
| 176 | goto again; |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | return NULL; |
| 181 | } |
| 182 | |
| 183 | |
| 184 | /* One receiver per fd */ |
| 185 | static void *receiver(struct receiver_context* ctx) |
| 186 | { |
| 187 | unsigned int i; |
| 188 | |
| 189 | if (process_mode) |
| 190 | close(ctx->in_fds[1]); |
| 191 | |
| 192 | /* Wait for start... */ |
| 193 | ready(ctx->ready_out, ctx->wakefd); |
| 194 | |
| 195 | /* Receive them all */ |
| 196 | for (i = 0; i < ctx->num_packets; i++) { |
| 197 | char data[DATASIZE]; |
| 198 | int ret, done = 0; |
| 199 | |
| 200 | again: |
| 201 | ret = read(ctx->in_fds[0], data + done, DATASIZE - done); |
| 202 | if (ret < 0) |
| 203 | barf("SERVER: read"); |
| 204 | done += ret; |
| 205 | if (done < DATASIZE) |
| 206 | goto again; |
| 207 | } |
| 208 | |
| 209 | return NULL; |
| 210 | } |
| 211 | |
| 212 | pthread_t create_worker(void *ctx, void *(*func)(void *)) |
| 213 | { |
| 214 | pthread_attr_t attr; |
| 215 | pthread_t childid; |
| 216 | int err; |
| 217 | |
| 218 | if (process_mode) { |
| 219 | /* process mode */ |
| 220 | /* Fork the receiver. */ |
| 221 | switch (fork()) { |
| 222 | case -1: barf("fork()"); |
| 223 | case 0: |
| 224 | (*func) (ctx); |
| 225 | exit(0); |
| 226 | } |
| 227 | |
| 228 | return (pthread_t) 0; |
| 229 | } |
| 230 | |
| 231 | if (pthread_attr_init(&attr) != 0) |
| 232 | barf("pthread_attr_init:"); |
| 233 | |
| 234 | #ifndef __ia64__ |
| 235 | if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0) |
| 236 | barf("pthread_attr_setstacksize"); |
| 237 | #endif |
| 238 | |
| 239 | if ((err=pthread_create(&childid, &attr, func, ctx)) != 0) { |
| 240 | fprintf(stderr, "pthread_create failed: %s (%d)\n", strerror(err), err); |
| 241 | exit(-1); |
| 242 | } |
| 243 | return (childid); |
| 244 | } |
| 245 | |
| 246 | void reap_worker(pthread_t id) |
| 247 | { |
| 248 | int status; |
| 249 | |
| 250 | if (process_mode) { |
| 251 | /* process mode */ |
| 252 | wait(&status); |
| 253 | if (!WIFEXITED(status)) |
| 254 | exit(1); |
| 255 | } else { |
| 256 | void *status; |
| 257 | |
| 258 | pthread_join(id, &status); |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | /* One group of senders and receivers */ |
| 263 | static unsigned int group(pthread_t *pth, |
| 264 | unsigned int num_fds, |
| 265 | int ready_out, |
| 266 | int wakefd) |
| 267 | { |
| 268 | unsigned int i; |
| 269 | struct sender_context* snd_ctx = malloc (sizeof(struct sender_context) |
| 270 | +num_fds*sizeof(int)); |
| 271 | |
| 272 | for (i = 0; i < num_fds; i++) { |
| 273 | int fds[2]; |
| 274 | struct receiver_context* ctx = malloc (sizeof(*ctx)); |
| 275 | |
| 276 | if (!ctx) |
| 277 | barf("malloc()"); |
| 278 | |
| 279 | |
| 280 | /* Create the pipe between client and server */ |
| 281 | fdpair(fds); |
| 282 | |
| 283 | ctx->num_packets = num_fds*loops; |
| 284 | ctx->in_fds[0] = fds[0]; |
| 285 | ctx->in_fds[1] = fds[1]; |
| 286 | ctx->ready_out = ready_out; |
| 287 | ctx->wakefd = wakefd; |
| 288 | |
| 289 | pth[i] = create_worker(ctx, (void *)(void *)receiver); |
| 290 | |
| 291 | snd_ctx->out_fds[i] = fds[1]; |
| 292 | if (process_mode) |
| 293 | close(fds[0]); |
| 294 | } |
| 295 | |
| 296 | /* Now we have all the fds, fork the senders */ |
| 297 | for (i = 0; i < num_fds; i++) { |
| 298 | snd_ctx->ready_out = ready_out; |
| 299 | snd_ctx->wakefd = wakefd; |
| 300 | snd_ctx->num_fds = num_fds; |
| 301 | |
| 302 | pth[num_fds+i] = create_worker(snd_ctx, (void *)(void *)sender); |
| 303 | } |
| 304 | |
| 305 | /* Close the fds we have left */ |
| 306 | if (process_mode) |
| 307 | for (i = 0; i < num_fds; i++) |
| 308 | close(snd_ctx->out_fds[i]); |
| 309 | |
| 310 | /* Return number of children to reap */ |
| 311 | return num_fds * 2; |
| 312 | } |
| 313 | |
| 314 | int main(int argc, char *argv[]) |
| 315 | { |
| 316 | unsigned int i, num_groups = 10, total_children; |
| 317 | struct timeval start, stop, diff; |
| 318 | unsigned int num_fds = 20; |
| 319 | int readyfds[2], wakefds[2]; |
| 320 | char dummy; |
| 321 | pthread_t *pth_tab; |
| 322 | |
| 323 | if (argv[1] && strcmp(argv[1], "-pipe") == 0) { |
| 324 | use_pipes = 1; |
| 325 | argc--; |
| 326 | argv++; |
| 327 | } |
| 328 | |
| 329 | if (argc >= 2 && (num_groups = atoi(argv[1])) == 0) |
| 330 | print_usage_exit(); |
| 331 | |
| 332 | printf("Running with %d*40 (== %d) tasks.\n", |
| 333 | num_groups, num_groups*40); |
| 334 | |
| 335 | fflush(NULL); |
| 336 | |
| 337 | if (argc > 2) { |
| 338 | if ( !strcmp(argv[2], "process") ) |
| 339 | process_mode = 1; |
| 340 | else if ( !strcmp(argv[2], "thread") ) |
| 341 | process_mode = 0; |
| 342 | else |
| 343 | print_usage_exit(); |
| 344 | } |
| 345 | |
| 346 | if (argc > 3) |
| 347 | loops = atoi(argv[3]); |
| 348 | |
| 349 | pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t)); |
| 350 | |
| 351 | if (!pth_tab) |
| 352 | barf("main:malloc()"); |
| 353 | |
| 354 | fdpair(readyfds); |
| 355 | fdpair(wakefds); |
| 356 | |
| 357 | total_children = 0; |
| 358 | for (i = 0; i < num_groups; i++) |
| 359 | total_children += group(pth_tab+total_children, num_fds, readyfds[1], wakefds[0]); |
| 360 | |
| 361 | /* Wait for everyone to be ready */ |
| 362 | for (i = 0; i < total_children; i++) |
| 363 | if (read(readyfds[0], &dummy, 1) != 1) |
| 364 | barf("Reading for readyfds"); |
| 365 | |
| 366 | gettimeofday(&start, NULL); |
| 367 | |
| 368 | /* Kick them off */ |
| 369 | if (write(wakefds[1], &dummy, 1) != 1) |
| 370 | barf("Writing to start them"); |
| 371 | |
| 372 | /* Reap them all */ |
| 373 | for (i = 0; i < total_children; i++) |
| 374 | reap_worker(pth_tab[i]); |
| 375 | |
| 376 | gettimeofday(&stop, NULL); |
| 377 | |
| 378 | /* Print time... */ |
| 379 | timersub(&stop, &start, &diff); |
| 380 | printf("Time: %lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000); |
| 381 | exit(0); |
| 382 | } |
| 383 | |
| 384 | |