Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 1 | /* Copyright 2016 Google Inc. All Rights Reserved. |
Eugene Kliuchnikov | dd8fa3e | 2016-09-22 11:32:23 +0200 | [diff] [blame] | 2 | Author: zip753@gmail.com (Ivan Nikulin) |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 3 | |
| 4 | Distributed under MIT license. |
| 5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 6 | */ |
| 7 | |
| 8 | /* Backward reference visualization tool. Accepts file with backward references |
| 9 | as an input and produces PGM image with histogram of those references. */ |
| 10 | |
| 11 | #include <algorithm> /* min */ |
| 12 | #include <cassert> |
| 13 | #include <cstring> /* memset */ |
| 14 | #include <cmath> /* log, round */ |
| 15 | #include <cstdio> /* fscanf, fprintf */ |
| 16 | #include <cstdint> |
| 17 | |
Eugene Kliuchnikov | dd8fa3e | 2016-09-22 11:32:23 +0200 | [diff] [blame] | 18 | #include <gflags/gflags.h> |
| 19 | using gflags::ParseCommandLineFlags; |
| 20 | |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 21 | #include "./read_dist.h" |
| 22 | |
Eugene Kliuchnikov | dd8fa3e | 2016-09-22 11:32:23 +0200 | [diff] [blame] | 23 | DEFINE_int32(height, 1000, "Height of the resulting histogam."); |
| 24 | DEFINE_int32(width, 8000, "Width of the resulting histogam."); |
| 25 | DEFINE_int32(size, 1e8, "Size of the compressed file."); |
| 26 | DEFINE_int32(brotli_window, -1, "Size of brotli window in bits."); |
| 27 | DEFINE_uint64(min_distance, 0, "Minimum distance."); |
| 28 | DEFINE_uint64(max_distance, 1 << 30, "Maximum distance."); |
| 29 | DEFINE_bool(with_copies, false, "True if input contains copy length."); |
| 30 | DEFINE_bool(simple, false, "True if using only black and white pixels."); |
| 31 | DEFINE_bool(linear, false, "True if using linear distance mapping."); |
| 32 | DEFINE_uint64(skip, 0, "Number of bytes to skip."); |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 33 | |
| 34 | inline double DistanceTransform(double x) { |
| 35 | static bool linear = FLAGS_linear; |
| 36 | if (linear) { |
| 37 | return x; |
| 38 | } else { |
| 39 | /* Using log^2 scale because log scale produces big white gap at the bottom |
| 40 | of image. */ |
| 41 | return log(x) * log(x); |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | /* Mapping pixel density on arc function to increase contrast. */ |
| 46 | inline double DensityTransform(double x) { |
| 47 | double z = 255 - x; |
| 48 | return sqrt(255 * 255 - z * z); |
| 49 | } |
| 50 | |
| 51 | inline int GetMaxDistance() { |
| 52 | return FLAGS_max_distance; |
| 53 | } |
| 54 | |
| 55 | void AdjustPosition(int* pos) { |
| 56 | static uint32_t offset = 0; |
| 57 | static int last = 0; |
| 58 | static uint32_t window_size = (1 << FLAGS_brotli_window); |
| 59 | assert(*pos >= 0 && *pos < window_size); |
| 60 | if (*pos < last) { |
| 61 | offset += window_size; |
| 62 | } |
| 63 | last = *pos; |
| 64 | *pos += offset; |
| 65 | } |
| 66 | |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 67 | void BuildHistogram(FILE* fin, int** histo) { |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 68 | int height = FLAGS_height; |
| 69 | int width = FLAGS_width; |
| 70 | int skip = FLAGS_skip; |
| 71 | size_t min_distance = FLAGS_min_distance; |
| 72 | |
| 73 | printf("height = %d, width = %d\n", height, width); |
| 74 | |
| 75 | for (int i = 0; i < height; i++) { |
| 76 | for (int j = 0; j < width; j++) { |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 77 | histo[i][j] = 0; |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 78 | } |
| 79 | } |
| 80 | |
| 81 | int max_pos = FLAGS_size - skip; |
| 82 | double min_dist = min_distance > 0 ? DistanceTransform(min_distance) : 0; |
| 83 | double max_dist = DistanceTransform(GetMaxDistance()) - min_dist; |
| 84 | int copy, pos, distance, x, y; |
| 85 | double dist; |
| 86 | while (ReadBackwardReference(fin, ©, &pos, &distance)) { |
| 87 | if (pos == -1) continue; // In case when only insert is present. |
| 88 | if (distance < min_distance || distance >= GetMaxDistance()) continue; |
Eugene Kliuchnikov | dd8fa3e | 2016-09-22 11:32:23 +0200 | [diff] [blame] | 89 | if (FLAGS_brotli_window != -1) { |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 90 | AdjustPosition(&pos); |
| 91 | } |
| 92 | if (pos >= skip && distance <= pos) { |
| 93 | pos -= skip; |
| 94 | if (pos >= max_pos) break; |
| 95 | dist = DistanceTransform(static_cast<double>(distance)) - min_dist; |
| 96 | |
| 97 | x = std::min(static_cast<int>(round(dist / max_dist * height)), |
| 98 | height - 1); |
| 99 | y = 1ul * pos * width / max_pos; |
| 100 | if (!(y >= 0 && y < width)) { |
| 101 | printf("pos = %d, max_pos = %d, y = %d\n", pos, max_pos, y); |
| 102 | assert(y >= 0 && y < width); |
| 103 | } |
| 104 | |
| 105 | if (FLAGS_with_copies) { |
| 106 | int right = 1ul * (pos + copy - 1) * width / max_pos; |
| 107 | if (right < 0) { |
| 108 | printf("pos = %d, distance = %d, copy = %d, y = %d, right = %d\n", |
| 109 | pos, distance, copy, y, right); |
| 110 | assert(right >= 0); |
| 111 | } |
| 112 | if (y == right) { |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 113 | histo[x][y] += copy; |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 114 | } else { |
| 115 | int pos2 = static_cast<int>(ceil(1.0 * (y + 1) * max_pos / width)); |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 116 | histo[x][y] += pos2 - pos; |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 117 | for (int i = y + 1; i < right && i < width; ++i) { |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 118 | histo[x][i] += max_pos / width; // Sometimes 1 more, but who cares. |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 119 | } |
| 120 | // Make sure the match doesn't go beyond the image. |
| 121 | if (right < width) { |
| 122 | pos2 = static_cast<int>(ceil(1.0 * right * max_pos / width)); |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 123 | histo[x][right] += pos + copy - 1 - pos2 + 1; |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 124 | } |
| 125 | } |
| 126 | } else { |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 127 | histo[x][y]++; |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 128 | } |
| 129 | } |
| 130 | } |
| 131 | } |
| 132 | |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 133 | void ConvertToPixels(int** histo, uint8_t** pixel) { |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 134 | int height = FLAGS_height; |
| 135 | int width = FLAGS_width; |
| 136 | |
| 137 | int maxs = 0; |
| 138 | for (int i = 0; i < height; i++) { |
| 139 | for (int j = 0; j < width; j++) { |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 140 | if (maxs < histo[i][j]) maxs = histo[i][j]; |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 141 | } |
| 142 | } |
| 143 | |
| 144 | bool simple = FLAGS_simple; |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 145 | double max_histo = static_cast<double>(maxs); |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 146 | for (int i = 0; i < height; i++) { |
| 147 | for (int j = 0; j < width; j++) { |
| 148 | if (simple) { |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 149 | pixel[i][j] = histo[i][j] > 0 ? 0 : 255; |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 150 | } else { |
| 151 | pixel[i][j] = static_cast<uint8_t>( |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 152 | 255 - DensityTransform(histo[i][j] / max_histo * 255)); |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 153 | } |
| 154 | } |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | void DrawPixels(uint8_t** pixel, FILE* fout) { |
| 159 | int height = FLAGS_height; |
| 160 | int width = FLAGS_width; |
| 161 | |
| 162 | fprintf(fout, "P5\n%d %d\n255\n", width, height); |
| 163 | for (int i = height - 1; i >= 0; i--) { |
| 164 | fwrite(pixel[i], 1, width, fout); |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | int main(int argc, char* argv[]) { |
Eugene Kliuchnikov | dd8fa3e | 2016-09-22 11:32:23 +0200 | [diff] [blame] | 169 | ParseCommandLineFlags(&argc, &argv, true); |
| 170 | if (argc != 3) { |
| 171 | printf("usage: draw_histogram.cc data output_file\n"); |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 172 | return 1; |
| 173 | } |
| 174 | |
| 175 | int height = FLAGS_height; |
| 176 | int width = FLAGS_width; |
| 177 | |
| 178 | FILE* fin = fopen(argv[1], "r"); |
Eugene Kliuchnikov | dd8fa3e | 2016-09-22 11:32:23 +0200 | [diff] [blame] | 179 | FILE* fout = fopen(argv[2], "wb"); |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 180 | |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 181 | uint8_t** pixel = new uint8_t*[height]; |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 182 | int** histo = new int*[height]; |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 183 | for (int i = 0; i < height; i++) { |
| 184 | pixel[i] = new uint8_t[width]; |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 185 | histo[i] = new int[width]; |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 186 | } |
| 187 | |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 188 | BuildHistogram(fin, histo); |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 189 | fclose(fin); |
| 190 | |
Ivan Nikulin | 0e52c59 | 2016-09-15 16:59:52 +0200 | [diff] [blame] | 191 | ConvertToPixels(histo, pixel); |
Ivan Nikulin | 58cecf1 | 2016-09-15 10:44:19 +0200 | [diff] [blame] | 192 | |
| 193 | DrawPixels(pixel, fout); |
| 194 | fclose(fout); |
| 195 | |
| 196 | return 0; |
| 197 | } |