blob: cfa93399528aa8df3274b32755066858d5152e53 [file] [log] [blame]
Guido van Rossum31affb21995-06-14 22:31:38 +00001/*
Guido van Rossumac562971995-06-14 22:49:20 +00002 [Header: soundexmodule.c,v 1.2 95/05/02 15:40:45 dwwillia Exp ]
Guido van Rossum31affb21995-06-14 22:31:38 +00003
4 Perform soundex comparisons on strings.
5
6 Soundex is an algorithm that hashes English strings into numerical value.
7 Strings that sound the same are hashed to the same value. This allows
8 for non-literal string matching.
9
10 From: David Wayne Williams <dwwillia@iucf.indiana.edu>
Guido van Rossum50384121996-05-23 22:54:17 +000011
12 Apr 29 1996 - added get_soundex method that returns the soundex of a
13 string (chrish@qnx.com)
14 May 2 1996 - added doc strings (chrish@qnx.com)
Guido van Rossum31affb21995-06-14 22:31:38 +000015*/
16
17#include <string.h>
18#include <ctype.h>
19#include "Python.h"
20
Guido van Rossum50384121996-05-23 22:54:17 +000021static char soundex_module__doc__[] =
22"Perform Soundex comparisons on strings, allowing non-literal matching.";
23
Guido van Rossum31affb21995-06-14 22:31:38 +000024void soundex_hash(char *str, char *result)
25{
26 char *sptr = str; /* pointer into str */
27 char *rptr = result; /* pointer into result */
28
29 if(*str == NULL)
30 {
31 strcpy(result,"000000");
32 return;
33 }
34
35 /* Preserve the first character of the input string.
36 */
37 *(rptr++) = toupper(*(sptr++));
38
39 /* Translate the rest of the input string into result. The following
40 transformations are used:
41
42 1) All vowles, W, and H, are skipped.
43
44 2) BFPV = 1
45 CGJKQSXZ = 2
46 DT = 3
47 L = 4
48 MN = 5
49
50 3) Only translate the first of adjacent equal translations. I.E.
51 remove duplicate digits.
52 */
53
54 for(;(rptr - result) < 6 && *sptr != NULL;sptr++)
55 {
56 switch (toupper(*sptr))
57 {
58 case 'W':
59 case 'H':
60 case 'A':
61 case 'I':
62 case 'O':
63 case 'U':
64 case 'Y':
65 break;
66 case 'B':
67 case 'F':
68 case 'P':
69 case 'V':
70 if(*(rptr - 1) != '1')
71 *(rptr++) = '1';
72 break;
73 case 'C':
74 case 'G':
75 case 'J':
76 case 'K':
77 case 'Q':
78 case 'S':
79 case 'X':
80 case 'Z':
81 if(*(rptr - 1) != '2')
82 *(rptr++) = '2';
83 break;
84 case 'D':
85 case 'T':
86 if(*(rptr - 1) != '3')
87 *(rptr++) = '3';
88 break;
89 case 'L':
90 if(*(rptr - 1) != '4')
91 *(rptr++) = '4';
92 break;
93 case 'M':
94 case 'N':
95 if(*(rptr - 1) != '5')
96 *(rptr++) = '5';
97 break;
98 default:
99 break;
100 }
101 }
102
103 /* Pad 0's on right side of string out to 6 characters.
104 */
105 for(; rptr < result + 6; rptr++)
106 *rptr = '0';
107
108 /* Terminate the result string.
109 */
110 *(result + 6) = NULL;
111}
112
113
Guido van Rossum50384121996-05-23 22:54:17 +0000114/* Return the actual soundex value. */
115/* Added by Chris Herborth (chrish@qnx.com) */
116static char soundex_get_soundex__doc__[] =
117 "Return the (English) Soundex hash value for a string.";
118static PyObject *
119get_soundex(PyObject *self, PyObject *args)
120{
121 char *str;
122 int retval;
123 char sdx[7];
124
125 if(!PyArg_ParseTuple( args, "s", &str))
126 return NULL;
127
128 soundex_hash(str, sdx);
129
130 return PyString_FromString(sdx);
131}
132
133static char soundex_sound_similar__doc__[] =
134 "Compare two strings to see if they sound similar (English).";
Guido van Rossum31affb21995-06-14 22:31:38 +0000135static PyObject *
136sound_similar(PyObject *self, PyObject *args)
137{
138 char *str1, *str2;
139 int return_value;
140 char res1[7], res2[7];
141
142 if(!PyArg_ParseTuple(args, "ss", &str1, &str2))
143 return NULL;
144
145 soundex_hash(str1, res1);
146 soundex_hash(str2, res2);
147
148 if(!strcmp(res1,res2))
149 return Py_BuildValue("i",1);
150 else
151 return Py_BuildValue("i",0);
152}
153
154/* Python Method Table.
155 */
156static PyMethodDef SoundexMethods[] =
157{
Guido van Rossum50384121996-05-23 22:54:17 +0000158 {"sound_similar", sound_similar, 1, soundex_sound_similar__doc__},
159 {"get_soundex", get_soundex, 1, soundex_get_soundex__doc__},
160
Guido van Rossum31affb21995-06-14 22:31:38 +0000161 {NULL, NULL } /* sentinel */
162};
163
164
165/* Register the method table.
166 */
167void
168initsoundex()
169{
Guido van Rossum50384121996-05-23 22:54:17 +0000170 (void) Py_InitModule4("soundex",
171 SoundexMethods,
172 soundex_module__doc__,
173 (PyObject *)NULL,
174 PYTHON_API_VERSION);
Guido van Rossum31affb21995-06-14 22:31:38 +0000175}