Blame - tools/perf/util/levenshtein.c - kernel/msm-4.9

blob: e521d1516df6eac70c6c2b3d9f925dd9708f4907 [file] [log] [blame]

Ingo Molnar	0780060	2009-04-20 15:00:56 +0200	[diff] [blame]	1	#include "cache.h"
				2	#include "levenshtein.h"
				3
				4	/*
				5	* This function implements the Damerau-Levenshtein algorithm to
				6	* calculate a distance between strings.
				7	*
				8	* Basically, it says how many letters need to be swapped, substituted,
				9	* deleted from, or added to string1, at least, to get string2.
				10	*
				11	* The idea is to build a distance matrix for the substrings of both
				12	* strings. To avoid a large space complexity, only the last three rows
				13	* are kept in memory (if swaps had the same or higher cost as one deletion
				14	* plus one insertion, only two rows would be needed).
				15	*
				16	* At any stage, "i + 1" denotes the length of the current substring of
				17	* string1 that the distance is calculated for.
				18	*
				19	* row2 holds the current row, row1 the previous row (i.e. for the substring
				20	* of string1 of length "i"), and row0 the row before that.
				21	*
				22	* In other words, at the start of the big loop, row2[j + 1] contains the
				23	* Damerau-Levenshtein distance between the substring of string1 of length
				24	* "i" and the substring of string2 of length "j + 1".
				25	*
				26	* All the big loop does is determine the partial minimum-cost paths.
				27	*
				28	* It does so by calculating the costs of the path ending in characters
				29	* i (in string1) and j (in string2), respectively, given that the last
				30	* operation is a substition, a swap, a deletion, or an insertion.
				31	*
				32	* This implementation allows the costs to be weighted:
				33	*
				34	* - w (as in "sWap")
				35	* - s (as in "Substitution")
				36	* - a (for insertion, AKA "Add")
				37	* - d (as in "Deletion")
				38	*
				39	* Note that this algorithm calculates a distance _iff_ d == a.
				40	*/
				41	int levenshtein(const char string1, const char string2,
				42	int w, int s, int a, int d)
				43	{
				44	int len1 = strlen(string1), len2 = strlen(string2);
				45	int row0 = malloc(sizeof(int) (len2 + 1));
				46	int row1 = malloc(sizeof(int) (len2 + 1));
				47	int row2 = malloc(sizeof(int) (len2 + 1));
				48	int i, j;
				49
				50	for (j = 0; j <= len2; j++)
				51	row1[j] = j * a;
				52	for (i = 0; i < len1; i++) {
				53	int *dummy;
				54
				55	row2[0] = (i + 1) * d;
				56	for (j = 0; j < len2; j++) {
				57	/* substitution */
				58	row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
				59	/* swap */
				60	if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
				61	string1[i] == string2[j - 1] &&
				62	row2[j + 1] > row0[j - 1] + w)
				63	row2[j + 1] = row0[j - 1] + w;
				64	/* deletion */
				65	if (row2[j + 1] > row1[j + 1] + d)
				66	row2[j + 1] = row1[j + 1] + d;
				67	/* insertion */
				68	if (row2[j + 1] > row2[j] + a)
				69	row2[j + 1] = row2[j] + a;
				70	}
				71
				72	dummy = row0;
				73	row0 = row1;
				74	row1 = row2;
				75	row2 = dummy;
				76	}
				77
				78	i = row1[len2];
				79	free(row0);
				80	free(row1);
				81	free(row2);
				82
				83	return i;
				84	}