Branch and cache miss speedups

Just some low hanging fruit.

Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/lib/lfsr.c b/lib/lfsr.c
index 927b2a1..9771318 100644
--- a/lib/lfsr.c
+++ b/lib/lfsr.c
@@ -2,6 +2,7 @@
 #include <math.h>
 
 #include "lfsr.h"
+#include "../compiler/compiler.h"
 
 /*
  * LFSR taps retrieved from:
@@ -132,11 +133,9 @@
 		if (fl->cycle_length && !--fl->cycle_length) {
 			__lfsr_next(fl, fl->spin + 1);
 			fl->cycle_length = fl->cached_cycle_length;
-			goto check;
-		}
-		__lfsr_next(fl, fl->spin);
-check: ;
-	} while (fl->last_val > fl->max_val);
+		} else
+			__lfsr_next(fl, fl->spin);
+	} while (fio_unlikely(fl->last_val > fl->max_val));
 
 	*off = fl->last_val;
 	return 0;