blob: 3ef6824892afcb0a6c3b4e9282e764aa57239314 [file] [log] [blame]
Guido van Rossum31affb21995-06-14 22:31:38 +00001/*
Guido van Rossumac562971995-06-14 22:49:20 +00002 [Header: soundexmodule.c,v 1.2 95/05/02 15:40:45 dwwillia Exp ]
Guido van Rossum31affb21995-06-14 22:31:38 +00003
4 Perform soundex comparisons on strings.
5
6 Soundex is an algorithm that hashes English strings into numerical value.
7 Strings that sound the same are hashed to the same value. This allows
8 for non-literal string matching.
9
10 From: David Wayne Williams <dwwillia@iucf.indiana.edu>
Guido van Rossum50384121996-05-23 22:54:17 +000011
12 Apr 29 1996 - added get_soundex method that returns the soundex of a
13 string (chrish@qnx.com)
14 May 2 1996 - added doc strings (chrish@qnx.com)
Guido van Rossum31affb21995-06-14 22:31:38 +000015*/
16
17#include <string.h>
18#include <ctype.h>
19#include "Python.h"
20
Guido van Rossum50384121996-05-23 22:54:17 +000021static char soundex_module__doc__[] =
22"Perform Soundex comparisons on strings, allowing non-literal matching.";
23
Guido van Rossum3d86cc01996-05-24 20:50:57 +000024static void soundex_hash(char *str, char *result)
Guido van Rossum31affb21995-06-14 22:31:38 +000025{
26 char *sptr = str; /* pointer into str */
27 char *rptr = result; /* pointer into result */
28
Guido van Rossum60fca2c1996-12-05 23:41:26 +000029 if(*str == '\0')
Guido van Rossum31affb21995-06-14 22:31:38 +000030 {
31 strcpy(result,"000000");
32 return;
33 }
34
35 /* Preserve the first character of the input string.
36 */
37 *(rptr++) = toupper(*(sptr++));
38
39 /* Translate the rest of the input string into result. The following
40 transformations are used:
41
Guido van Rossum60fca2c1996-12-05 23:41:26 +000042 1) All vowels, W, and H, are skipped.
Guido van Rossum31affb21995-06-14 22:31:38 +000043
44 2) BFPV = 1
45 CGJKQSXZ = 2
46 DT = 3
47 L = 4
48 MN = 5
Guido van Rossum60fca2c1996-12-05 23:41:26 +000049 R = 6
Guido van Rossum31affb21995-06-14 22:31:38 +000050
51 3) Only translate the first of adjacent equal translations. I.E.
52 remove duplicate digits.
53 */
54
Guido van Rossum60fca2c1996-12-05 23:41:26 +000055 for(;(rptr - result) < 6 && *sptr != '\0';sptr++)
Guido van Rossum31affb21995-06-14 22:31:38 +000056 {
57 switch (toupper(*sptr))
58 {
59 case 'W':
60 case 'H':
61 case 'A':
62 case 'I':
63 case 'O':
64 case 'U':
65 case 'Y':
66 break;
67 case 'B':
68 case 'F':
69 case 'P':
70 case 'V':
71 if(*(rptr - 1) != '1')
72 *(rptr++) = '1';
73 break;
74 case 'C':
75 case 'G':
76 case 'J':
77 case 'K':
78 case 'Q':
79 case 'S':
80 case 'X':
81 case 'Z':
82 if(*(rptr - 1) != '2')
83 *(rptr++) = '2';
84 break;
85 case 'D':
86 case 'T':
87 if(*(rptr - 1) != '3')
88 *(rptr++) = '3';
89 break;
90 case 'L':
91 if(*(rptr - 1) != '4')
92 *(rptr++) = '4';
93 break;
94 case 'M':
95 case 'N':
96 if(*(rptr - 1) != '5')
97 *(rptr++) = '5';
98 break;
Guido van Rossum60fca2c1996-12-05 23:41:26 +000099 case 'R':
100 if(*(rptr -1) != '6')
101 *(rptr++) = '6';
Guido van Rossum31affb21995-06-14 22:31:38 +0000102 default:
103 break;
104 }
105 }
106
107 /* Pad 0's on right side of string out to 6 characters.
108 */
109 for(; rptr < result + 6; rptr++)
110 *rptr = '0';
111
112 /* Terminate the result string.
113 */
Guido van Rossum60fca2c1996-12-05 23:41:26 +0000114 *(result + 6) = '\0';
Guido van Rossum31affb21995-06-14 22:31:38 +0000115}
116
117
Guido van Rossum50384121996-05-23 22:54:17 +0000118/* Return the actual soundex value. */
119/* Added by Chris Herborth (chrish@qnx.com) */
120static char soundex_get_soundex__doc__[] =
121 "Return the (English) Soundex hash value for a string.";
122static PyObject *
123get_soundex(PyObject *self, PyObject *args)
124{
125 char *str;
Guido van Rossum50384121996-05-23 22:54:17 +0000126 char sdx[7];
127
128 if(!PyArg_ParseTuple( args, "s", &str))
129 return NULL;
130
131 soundex_hash(str, sdx);
132
133 return PyString_FromString(sdx);
134}
135
136static char soundex_sound_similar__doc__[] =
137 "Compare two strings to see if they sound similar (English).";
Guido van Rossum31affb21995-06-14 22:31:38 +0000138static PyObject *
139sound_similar(PyObject *self, PyObject *args)
140{
141 char *str1, *str2;
Guido van Rossum31affb21995-06-14 22:31:38 +0000142 char res1[7], res2[7];
143
144 if(!PyArg_ParseTuple(args, "ss", &str1, &str2))
145 return NULL;
146
147 soundex_hash(str1, res1);
148 soundex_hash(str2, res2);
149
150 if(!strcmp(res1,res2))
151 return Py_BuildValue("i",1);
152 else
153 return Py_BuildValue("i",0);
154}
155
156/* Python Method Table.
157 */
158static PyMethodDef SoundexMethods[] =
159{
Guido van Rossum50384121996-05-23 22:54:17 +0000160 {"sound_similar", sound_similar, 1, soundex_sound_similar__doc__},
161 {"get_soundex", get_soundex, 1, soundex_get_soundex__doc__},
162
Guido van Rossum31affb21995-06-14 22:31:38 +0000163 {NULL, NULL } /* sentinel */
164};
165
166
167/* Register the method table.
168 */
Guido van Rossum3886bb61998-12-04 18:50:17 +0000169DL_EXPORT(void)
Guido van Rossum31affb21995-06-14 22:31:38 +0000170initsoundex()
171{
Guido van Rossum50384121996-05-23 22:54:17 +0000172 (void) Py_InitModule4("soundex",
173 SoundexMethods,
174 soundex_module__doc__,
175 (PyObject *)NULL,
176 PYTHON_API_VERSION);
Guido van Rossum31affb21995-06-14 22:31:38 +0000177}