blob: adbf1769d25a099498860260d8d085b5eb1fc518 [file] [log] [blame]
Guido van Rossum31affb21995-06-14 22:31:38 +00001/*
Guido van Rossumac562971995-06-14 22:49:20 +00002 [Header: soundexmodule.c,v 1.2 95/05/02 15:40:45 dwwillia Exp ]
Guido van Rossum31affb21995-06-14 22:31:38 +00003
4 Perform soundex comparisons on strings.
5
6 Soundex is an algorithm that hashes English strings into numerical value.
7 Strings that sound the same are hashed to the same value. This allows
8 for non-literal string matching.
9
10 From: David Wayne Williams <dwwillia@iucf.indiana.edu>
11*/
12
13#include <string.h>
14#include <ctype.h>
15#include "Python.h"
16
17void soundex_hash(char *str, char *result)
18{
19 char *sptr = str; /* pointer into str */
20 char *rptr = result; /* pointer into result */
21
22 if(*str == NULL)
23 {
24 strcpy(result,"000000");
25 return;
26 }
27
28 /* Preserve the first character of the input string.
29 */
30 *(rptr++) = toupper(*(sptr++));
31
32 /* Translate the rest of the input string into result. The following
33 transformations are used:
34
35 1) All vowles, W, and H, are skipped.
36
37 2) BFPV = 1
38 CGJKQSXZ = 2
39 DT = 3
40 L = 4
41 MN = 5
42
43 3) Only translate the first of adjacent equal translations. I.E.
44 remove duplicate digits.
45 */
46
47 for(;(rptr - result) < 6 && *sptr != NULL;sptr++)
48 {
49 switch (toupper(*sptr))
50 {
51 case 'W':
52 case 'H':
53 case 'A':
54 case 'I':
55 case 'O':
56 case 'U':
57 case 'Y':
58 break;
59 case 'B':
60 case 'F':
61 case 'P':
62 case 'V':
63 if(*(rptr - 1) != '1')
64 *(rptr++) = '1';
65 break;
66 case 'C':
67 case 'G':
68 case 'J':
69 case 'K':
70 case 'Q':
71 case 'S':
72 case 'X':
73 case 'Z':
74 if(*(rptr - 1) != '2')
75 *(rptr++) = '2';
76 break;
77 case 'D':
78 case 'T':
79 if(*(rptr - 1) != '3')
80 *(rptr++) = '3';
81 break;
82 case 'L':
83 if(*(rptr - 1) != '4')
84 *(rptr++) = '4';
85 break;
86 case 'M':
87 case 'N':
88 if(*(rptr - 1) != '5')
89 *(rptr++) = '5';
90 break;
91 default:
92 break;
93 }
94 }
95
96 /* Pad 0's on right side of string out to 6 characters.
97 */
98 for(; rptr < result + 6; rptr++)
99 *rptr = '0';
100
101 /* Terminate the result string.
102 */
103 *(result + 6) = NULL;
104}
105
106
107static PyObject *
108sound_similar(PyObject *self, PyObject *args)
109{
110 char *str1, *str2;
111 int return_value;
112 char res1[7], res2[7];
113
114 if(!PyArg_ParseTuple(args, "ss", &str1, &str2))
115 return NULL;
116
117 soundex_hash(str1, res1);
118 soundex_hash(str2, res2);
119
120 if(!strcmp(res1,res2))
121 return Py_BuildValue("i",1);
122 else
123 return Py_BuildValue("i",0);
124}
125
126/* Python Method Table.
127 */
128static PyMethodDef SoundexMethods[] =
129{
130 {"sound_similar", sound_similar, 1},
131 {NULL, NULL } /* sentinel */
132};
133
134
135/* Register the method table.
136 */
137void
138initsoundex()
139{
140 (void) Py_InitModule("soundex",SoundexMethods);
141}