genksyms: track symbol checksum changes

Sometimes it is preferable to avoid changes of exported symbol checksums
(to avoid breaking externally provided modules).  When a checksum change
occurs, it can be hard to figure out what caused this change: underlying
types may have changed, or additional type information may simply have
become available at the point where a symbol is exported.

Add a new --reference option to genksyms which allows it to report why
checksums change, based on the type information dumps it creates with the
--dump-types flag.  Genksyms will read in such a dump from a previous run,
and report which symbols have changed (and why).

The behavior can be controlled for an entire build as follows: If
KBUILD_SYMTYPES is set, genksyms uses --dump-types to produce *.symtypes
dump files.  If any *.symref files exist, those will be used as the
reference to check against.  If KBUILD_PRESERVE is set, checksum changes
will fail the build.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index c249274..ddac174 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -42,7 +42,8 @@
 int cur_line = 1;
 char *cur_filename;
 
-static int flag_debug, flag_dump_defs, flag_dump_types, flag_warnings;
+static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types,
+	   flag_preserve, flag_warnings;
 static const char *arch = "";
 static const char *mod_prefix = "";
 
@@ -58,6 +59,8 @@
 
 static int equal_list(struct string_list *a, struct string_list *b);
 static void print_list(FILE * f, struct string_list *list);
+static void print_location(void);
+static void print_type_name(enum symbol_type type, const char *name);
 
 /*----------------------------------------------------------------------*/
 
@@ -151,27 +154,68 @@
 
 	for (sym = symtab[h]; sym; sym = sym->hash_next)
 		if (map_to_ns(sym->type) == map_to_ns(ns) &&
-		    strcmp(name, sym->name) == 0)
+		    strcmp(name, sym->name) == 0 &&
+		    sym->is_declared)
 			break;
 
 	return sym;
 }
 
-struct symbol *add_symbol(const char *name, enum symbol_type type,
-			  struct string_list *defn, int is_extern)
+static int is_unknown_symbol(struct symbol *sym)
+{
+	struct string_list *defn;
+
+	return ((sym->type == SYM_STRUCT ||
+		 sym->type == SYM_UNION ||
+		 sym->type == SYM_ENUM) &&
+		(defn = sym->defn)  && defn->tag == SYM_NORMAL &&
+			strcmp(defn->string, "}") == 0 &&
+		(defn = defn->next) && defn->tag == SYM_NORMAL &&
+			strcmp(defn->string, "UNKNOWN") == 0 &&
+		(defn = defn->next) && defn->tag == SYM_NORMAL &&
+			strcmp(defn->string, "{") == 0);
+}
+
+struct symbol *__add_symbol(const char *name, enum symbol_type type,
+			    struct string_list *defn, int is_extern,
+			    int is_reference)
 {
 	unsigned long h = crc32(name) % HASH_BUCKETS;
 	struct symbol *sym;
+	enum symbol_status status = STATUS_UNCHANGED;
 
 	for (sym = symtab[h]; sym; sym = sym->hash_next) {
-		if (map_to_ns(sym->type) == map_to_ns(type)
-		    && strcmp(name, sym->name) == 0) {
-			if (!equal_list(sym->defn, defn))
+		if (map_to_ns(sym->type) == map_to_ns(type) &&
+		    strcmp(name, sym->name) == 0) {
+			if (is_reference)
+				/* fall through */ ;
+			else if (sym->type == type &&
+				 equal_list(sym->defn, defn)) {
+				sym->is_declared = 1;
+				return sym;
+			} else if (!sym->is_declared) {
+				status = is_unknown_symbol(sym) ?
+					STATUS_DEFINED : STATUS_MODIFIED;
+			} else {
 				error_with_pos("redefinition of %s", name);
-			return sym;
+				return sym;
+			}
+			break;
 		}
 	}
 
+	if (sym) {
+		struct symbol **psym;
+
+		for (psym = &symtab[h]; *psym; psym = &(*psym)->hash_next) {
+			if (*psym == sym) {
+				*psym = sym->hash_next;
+				break;
+			}
+		}
+		--nsyms;
+	}
+
 	sym = xmalloc(sizeof(*sym));
 	sym->name = name;
 	sym->type = type;
@@ -183,6 +227,9 @@
 	sym->hash_next = symtab[h];
 	symtab[h] = sym;
 
+	sym->is_declared = !is_reference;
+	sym->status = status;
+
 	if (flag_debug) {
 		fprintf(debugfile, "Defn for %s %s == <",
 			symbol_type_name[type], name);
@@ -196,6 +243,18 @@
 	return sym;
 }
 
+struct symbol *add_symbol(const char *name, enum symbol_type type,
+			  struct string_list *defn, int is_extern)
+{
+	return __add_symbol(name, type, defn, is_extern, 0);
+}
+
+struct symbol *add_reference_symbol(const char *name, enum symbol_type type,
+				    struct string_list *defn, int is_extern)
+{
+	return __add_symbol(name, type, defn, is_extern, 1);
+}
+
 /*----------------------------------------------------------------------*/
 
 void free_node(struct string_list *node)
@@ -236,6 +295,82 @@
 	return !a && !b;
 }
 
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct string_list *read_node(FILE *f)
+{
+	char buffer[256];
+	struct string_list node = {
+		.string = buffer,
+		.tag = SYM_NORMAL };
+	int c;
+
+	while ((c = fgetc(f)) != EOF) {
+		if (c == ' ') {
+			if (node.string == buffer)
+				continue;
+			break;
+		} else if (c == '\n') {
+			if (node.string == buffer)
+				return NULL;
+			ungetc(c, f);
+			break;
+		}
+		if (node.string >= buffer + sizeof(buffer) - 1) {
+			fprintf(stderr, "Token too long\n");
+			exit(1);
+		}
+		*node.string++ = c;
+	}
+	if (node.string == buffer)
+		return NULL;
+	*node.string = 0;
+	node.string = buffer;
+
+	if (node.string[1] == '#') {
+		int n;
+
+		for (n = 0; n < ARRAY_SIZE(symbol_type_name); n++) {
+			if (node.string[0] == symbol_type_name[n][0]) {
+				node.tag = n;
+				node.string += 2;
+				return copy_node(&node);
+			}
+		}
+		fprintf(stderr, "Unknown type %c\n", node.string[0]);
+		exit(1);
+	}
+	return copy_node(&node);
+}
+
+static void read_reference(FILE *f)
+{
+	while (!feof(f)) {
+		struct string_list *defn = NULL;
+		struct string_list *sym, *def;
+		int is_extern = 0;
+
+		sym = read_node(f);
+		if (!sym)
+			continue;
+		def = read_node(f);
+		if (def && def->tag == SYM_NORMAL &&
+		    !strcmp(def->string, "extern")) {
+			is_extern = 1;
+			free_node(def);
+			def = read_node(f);
+		}
+		while (def) {
+			def->next = defn;
+			defn = def;
+			def = read_node(f);
+		}
+		add_reference_symbol(xstrdup(sym->string), sym->tag,
+					      defn, is_extern);
+		free_node(sym);
+	}
+}
+
 static void print_node(FILE * f, struct string_list *list)
 {
 	if (list->tag != SYM_NORMAL) {
@@ -311,6 +446,7 @@
 
 		case SYM_TYPEDEF:
 			subsym = find_symbol(cur->string, cur->tag);
+			/* FIXME: Bad reference files can segfault here. */
 			if (subsym->expansion_trail) {
 				if (flag_dump_defs)
 					fprintf(debugfile, "%s ", cur->string);
@@ -347,9 +483,22 @@
 				t = n;
 
 				n = xmalloc(sizeof(*n));
-				n->string = xstrdup("{ UNKNOWN }");
+				n->string = xstrdup("{");
 				n->tag = SYM_NORMAL;
 				n->next = t;
+				t = n;
+
+				n = xmalloc(sizeof(*n));
+				n->string = xstrdup("UNKNOWN");
+				n->tag = SYM_NORMAL;
+				n->next = t;
+				t = n;
+
+				n = xmalloc(sizeof(*n));
+				n->string = xstrdup("}");
+				n->tag = SYM_NORMAL;
+				n->next = t;
+				t = n;
 
 				subsym =
 				    add_symbol(cur->string, cur->tag, n, 0);
@@ -397,20 +546,42 @@
 		error_with_pos("export undefined symbol %s", name);
 	else {
 		unsigned long crc;
+		int has_changed = 0;
 
 		if (flag_dump_defs)
 			fprintf(debugfile, "Export %s == <", name);
 
 		expansion_trail = (struct symbol *)-1L;
 
+		sym->expansion_trail = expansion_trail;
+		expansion_trail = sym;
 		crc = expand_and_crc_sym(sym, 0xffffffff) ^ 0xffffffff;
 
 		sym = expansion_trail;
 		while (sym != (struct symbol *)-1L) {
 			struct symbol *n = sym->expansion_trail;
+
+			if (sym->status != STATUS_UNCHANGED) {
+				if (!has_changed) {
+					print_location();
+					fprintf(stderr, "%s: %s: modversion "
+						"changed because of changes "
+						"in ", flag_preserve ? "error" :
+						       "warning", name);
+				} else
+					fprintf(stderr, ", ");
+				print_type_name(sym->type, sym->name);
+				if (sym->status == STATUS_DEFINED)
+					fprintf(stderr, " (became defined)");
+				has_changed = 1;
+				if (flag_preserve)
+					errors++;
+			}
 			sym->expansion_trail = 0;
 			sym = n;
 		}
+		if (has_changed)
+			fprintf(stderr, "\n");
 
 		if (flag_dump_defs)
 			fputs(">\n", debugfile);
@@ -421,13 +592,26 @@
 }
 
 /*----------------------------------------------------------------------*/
+
+static void print_location(void)
+{
+	fprintf(stderr, "%s:%d: ", cur_filename ? : "<stdin>", cur_line);
+}
+
+static void print_type_name(enum symbol_type type, const char *name)
+{
+	if (type != SYM_NORMAL)
+		fprintf(stderr, "%s %s", symbol_type_name[type], name);
+	else
+		fprintf(stderr, "%s", name);
+}
+
 void error_with_pos(const char *fmt, ...)
 {
 	va_list args;
 
 	if (flag_warnings) {
-		fprintf(stderr, "%s:%d: ", cur_filename ? : "<stdin>",
-			cur_line);
+		print_location();
 
 		va_start(args, fmt);
 		vfprintf(stderr, fmt, args);
@@ -445,7 +629,9 @@
 	      "  -a, --arch            Select architecture\n"
 	      "  -d, --debug           Increment the debug level (repeatable)\n"
 	      "  -D, --dump            Dump expanded symbol defs (for debugging only)\n"
-	      "  -T, --dump-types file Dump expanded types into file (for debugging only)\n"
+	      "  -r, --reference file  Read reference symbols from a file\n"
+	      "  -T, --dump-types file Dump expanded types into file\n"
+	      "  -p, --preserve        Preserve reference modversions or fail\n"
 	      "  -w, --warnings        Enable warnings\n"
 	      "  -q, --quiet           Disable warnings (default)\n"
 	      "  -h, --help            Print this message\n"
@@ -454,7 +640,9 @@
 	      "  -a                    Select architecture\n"
 	      "  -d                    Increment the debug level (repeatable)\n"
 	      "  -D                    Dump expanded symbol defs (for debugging only)\n"
-	      "  -T file               Dump expanded types into file (for debugging only)\n"
+	      "  -r file               Read reference symbols from a file\n"
+	      "  -T file               Dump expanded types into file\n"
+	      "  -p                    Preserve reference modversions or fail\n"
 	      "  -w                    Enable warnings\n"
 	      "  -q                    Disable warnings (default)\n"
 	      "  -h                    Print this message\n"
@@ -465,7 +653,7 @@
 
 int main(int argc, char **argv)
 {
-	FILE *dumpfile = NULL;
+	FILE *dumpfile = NULL, *ref_file = NULL;
 	int o;
 
 #ifdef __GNU_LIBRARY__
@@ -475,16 +663,18 @@
 		{"warnings", 0, 0, 'w'},
 		{"quiet", 0, 0, 'q'},
 		{"dump", 0, 0, 'D'},
+		{"reference", 1, 0, 'r'},
 		{"dump-types", 1, 0, 'T'},
+		{"preserve", 0, 0, 'p'},
 		{"version", 0, 0, 'V'},
 		{"help", 0, 0, 'h'},
 		{0, 0, 0, 0}
 	};
 
-	while ((o = getopt_long(argc, argv, "a:dwqVDT:h",
+	while ((o = getopt_long(argc, argv, "a:dwqVDr:T:ph",
 				&long_opts[0], NULL)) != EOF)
 #else				/* __GNU_LIBRARY__ */
-	while ((o = getopt(argc, argv, "a:dwqVDT:h")) != EOF)
+	while ((o = getopt(argc, argv, "a:dwqVDr:T:ph")) != EOF)
 #endif				/* __GNU_LIBRARY__ */
 		switch (o) {
 		case 'a':
@@ -505,6 +695,14 @@
 		case 'D':
 			flag_dump_defs = 1;
 			break;
+		case 'r':
+			flag_reference = 1;
+			ref_file = fopen(optarg, "r");
+			if (!ref_file) {
+				perror(optarg);
+				return 1;
+			}
+			break;
 		case 'T':
 			flag_dump_types = 1;
 			dumpfile = fopen(optarg, "w");
@@ -513,6 +711,9 @@
 				return 1;
 			}
 			break;
+		case 'p':
+			flag_preserve = 1;
+			break;
 		case 'h':
 			genksyms_usage();
 			return 0;
@@ -533,6 +734,9 @@
 		/* setlinebuf(debugfile); */
 	}
 
+	if (flag_reference)
+		read_reference(ref_file);
+
 	yyparse();
 
 	if (flag_dump_types && visited_symbols) {