blob: d0977dcf6c4dc7a5719491d7010442588a38bf61 [file] [log] [blame]
Luigi Semenzato6fdc0b42013-04-11 17:22:13 -07001/* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 *
5 * This flex program reads /var/log/messages as it grows and saves kernel
6 * warnings to files. It keeps track of warnings it has seen (based on
7 * file/line only, ignoring differences in the stack trace), and reports only
8 * the first warning of each kind, but maintains a count of all warnings by
9 * using their hashes as buckets in a UMA sparse histogram. It also invokes
10 * the crash collector, which collects the warnings and prepares them for later
11 * shipment to the crash server.
12 */
13
14%{
15#include <fcntl.h>
16#include <inttypes.h>
17#include <pwd.h>
18#include <stdarg.h>
19#include <sys/inotify.h>
20#include <sys/select.h>
21#include <sys/stat.h>
22#include <sys/types.h>
23#include <unistd.h>
24
25#include "metrics/c_metrics_library.h"
26
27int WarnStart(void);
28void WarnEnd(void);
29void WarnInput(char *buf, int *result, size_t max_size);
30
31#define YY_INPUT(buf, result, max_size) WarnInput(buf, &result, max_size)
32
33%}
34
35/* Define a few useful regular expressions. */
36
37D [0-9]
38PREFIX .*" kernel: [ "*{D}+"."{D}+"]"
39CUT_HERE {PREFIX}" ------------[ cut here".*
40WARNING {PREFIX}" WARNING: at "
41END_TRACE {PREFIX}" ---[ end trace".*
42
43/* Use exclusive start conditions. */
44%x PRE_WARN WARN
45
46%%
47 /* The scanner itself. */
48
49^{CUT_HERE}\n{WARNING} BEGIN(PRE_WARN);
50.|\n /* ignore all other input in state 0 */
51<PRE_WARN>.*\n if (WarnStart()) {
52 BEGIN(WARN); ECHO;
53 } else {
54 BEGIN(0);
55 }
56
57 /* Assume the warning ends at the "end trace" line */
58<WARN>^{END_TRACE}\n ECHO; BEGIN(0); WarnEnd();
59<WARN>^.*\n ECHO;
60
61%%
62
63#define HASH_BITMAP_SIZE (1 << 15) /* size in bits */
64#define HASH_BITMAP_MASK (HASH_BITMAP_SIZE - 1)
65
66const char warn_hist_name[] = "Platform.KernelWarningHashes";
67uint32_t hash_bitmap[HASH_BITMAP_SIZE / 32];
68CMetricsLibrary metrics_library;
69
70const char *prog_name; /* the name of this program */
71int yyin_fd; /* instead of FILE *yyin to avoid buffering */
72int i_fd; /* for inotify, to detect file changes */
73int testing; /* 1 if running test */
74int filter; /* 1 when using as filter (for development) */
75int fifo; /* 1 when reading from fifo (for devel) */
76int draining; /* 1 when draining renamed log file */
77
78const char *msg_path = "/var/log/messages";
79const char warn_dump_dir[] = "/var/run/kwarn";
80const char *warn_dump_path = "/var/run/kwarn/warning";
81const char *crash_reporter_command;
82
83static void Die(const char *format, ...) {
84 va_list ap;
85 va_start(ap, format);
86 fprintf(stderr, "%s: ", prog_name);
87 vfprintf(stderr, format, ap);
88 exit(1);
89}
90
91static void RunCrashReporter(void) {
92 int status = system(crash_reporter_command);
93 if (status != 0)
94 Die("%s exited with status %d\n", crash_reporter_command, status);
95}
96
97static uint32_t StringHash(const char *string) {
98 uint32_t hash = 0;
99 while (*string != '\0') {
100 hash = (hash << 5) + hash + *string++;
101 }
102 return hash;
103}
104
105/* We expect only a handful of different warnings per boot session, so the
106 * probability of a collision is very low, and statistically it won't matter
107 * (unless warnings with the same hash also happens in tandem, which is even
108 * rarer).
109 */
110static int HashSeen(uint32_t hash) {
111 int word_index = (hash & HASH_BITMAP_MASK) / 32;
112 int bit_index = (hash & HASH_BITMAP_MASK) % 32;
113 return hash_bitmap[word_index] & 1 << bit_index;
114}
115
116static void SetHashSeen(uint32_t hash) {
117 int word_index = (hash & HASH_BITMAP_MASK) / 32;
118 int bit_index = (hash & HASH_BITMAP_MASK) % 32;
119 hash_bitmap[word_index] |= 1 << bit_index;
120}
121
122int WarnStart(void) {
123 uint32_t hash;
124
125 if (filter)
126 return 1;
127
128 hash = StringHash(yytext);
129 if (!(testing || fifo || filter)) {
130 CMetricsLibrarySendSparseToUMA(metrics_library, warn_hist_name, (int) hash);
131 }
132 if (HashSeen(hash))
133 return 0;
134 SetHashSeen(hash);
135
136 yyout = fopen(warn_dump_path, "w");
137 if (yyout == NULL)
138 Die("fopen %s failed: %s\n", warn_dump_path, strerror(errno));
139 fprintf(yyout, "%08x\n", hash);
140 return 1;
141}
142
143void WarnEnd(void) {
144 if (filter)
145 return;
146 fclose(yyout);
147 yyout = stdout; /* for debugging */
148 RunCrashReporter();
149}
150
151static void WarnOpenInput(const char *path) {
152 yyin_fd = open(path, O_RDONLY);
153 if (yyin_fd < 0)
154 Die("could not open %s: %s\n", path, strerror(errno));
155 if (!fifo) {
156 /* Set up notification of file growth and rename. */
157 i_fd = inotify_init();
158 if (i_fd < 0)
159 Die("inotify_init: %s\n", strerror(errno));
160 if (inotify_add_watch(i_fd, path, IN_MODIFY | IN_MOVE_SELF) < 0)
161 Die("inotify_add_watch: %s\n", strerror(errno));
162 }
163}
164
165/* We replace the default YY_INPUT() for the following reasons:
166 *
167 * 1. We want to read data as soon as it becomes available, but the default
168 * YY_INPUT() uses buffered I/O.
169 *
170 * 2. We want to block on end of input and wait for the file to grow.
171 *
172 * 3. We want to detect log rotation, and reopen the input file as needed.
173 */
174void WarnInput(char *buf, int *result, size_t max_size) {
175 while (1) {
176 *result = read(yyin_fd, buf, max_size);
177 if (*result < 0)
178 Die("read: %s", strerror(errno));
179 if (*result > 0 || fifo || filter)
180 return;
181 if (draining) {
182 /* Assume we're done with this log, and move to next
183 * log. Rsyslogd may keep writing to the old log file
184 * for a while, but we don't care since we don't have
185 * to be exact.
186 */
187 close(yyin_fd);
188 if (YYSTATE == WARN) {
189 /* Be conservative in case we lose the warn
190 * terminator during the switch---or we may
191 * collect personally identifiable information.
192 */
193 WarnEnd();
194 }
195 BEGIN(0); /* see above comment */
196 sleep(1); /* avoid race with log rotator */
197 WarnOpenInput(msg_path);
198 draining = 0;
199 continue;
200 }
201 /* Nothing left to read, so we must wait. */
202 struct inotify_event event;
203 int n = read(i_fd, &event, sizeof(event));
204 if (n <= 0)
205 Die("inotify: %s\n", strerror(errno));
206 if (event.mask & IN_MOVE_SELF) {
207 /* The file has been renamed. Before switching
208 * to the new one, we process any remaining
209 * content of this file.
210 */
211 draining = 1;
212 }
213 }
214}
215
216int main(int argc, char **argv) {
217 int result;
218 struct passwd *user;
219 prog_name = argv[0];
220
221 if (argc == 2 && strcmp(argv[1], "--test") == 0)
222 testing = 1;
223 else if (argc == 2 && strcmp(argv[1], "--filter") == 0)
224 filter = 1;
225 else if (argc == 2 && strcmp(argv[1], "--fifo") == 0) {
226 fifo = 1;
227 } else if (argc != 1) {
228 fprintf(stderr,
229 "usage: %s [single-flag]\n"
230 "flags (for testing only):\n"
231 "--fifo\tinput is fifo \"fifo\", output is stdout\n"
232 "--filter\tinput is stdin, output is stdout\n"
233 "--test\trun self-test\n",
234 prog_name);
235 exit(1);
236 }
237
238 metrics_library = CMetricsLibraryNew();
239 CMetricsLibraryInit(metrics_library);
240
241 crash_reporter_command = testing ?
242 "./warn_collector_test_reporter.sh" :
243 "/sbin/crash_reporter --kernel_warning";
244
245 /* When filtering with --filter (for development) use stdin for input.
246 * Otherwise read input from a file or a fifo.
247 */
248 yyin_fd = fileno(stdin);
249 if (testing) {
250 msg_path = "messages";
251 warn_dump_path = "warning";
252 }
253 if (fifo) {
254 msg_path = "fifo";
255 }
256 if (!filter) {
257 WarnOpenInput(msg_path);
258 }
259
260 /* Create directory for dump file. Still need to be root here. */
261 unlink(warn_dump_path);
262 if (!testing && !fifo && !filter) {
263 rmdir(warn_dump_dir);
264 result = mkdir(warn_dump_dir, 0755);
265 if (result < 0)
266 Die("could not create %s: %s\n",
267 warn_dump_dir, strerror(errno));
268 }
269
270 if (0) {
271 /* TODO(semenzato): put this back in once we decide it's safe
272 * to make /var/spool/crash rwxrwxrwx root, or use a different
273 * owner and setuid for the crash reporter as well.
274 */
275
276 /* Get low privilege uid, gid. */
277 user = getpwnam("chronos");
278 if (user == NULL)
279 Die("getpwnam failed\n");
280
281 /* Change dump directory ownership. */
282 if (chown(warn_dump_dir, user->pw_uid, user->pw_gid) < 0)
283 Die("chown: %s\n", strerror(errno));
284
285 /* Drop privileges. */
286 if (setuid(user->pw_uid) < 0) {
287 Die("setuid: %s\n", strerror(errno));
288 }
289 }
290
291 /* Go! */
292 return yylex();
293}
294
295/* Flex should really know not to generate these functions.
296 */
297void UnusedFunctionWarningSuppressor(void) {
298 yyunput(0, 0);
299 (void) input();
300}