Return to soundex.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard |
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 1997-2012 The PHP Group |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> |
16: +----------------------------------------------------------------------+
17: */
1.1.1.2 ! misho 18: /* $Id$ */
1.1 misho 19:
20: #include "php.h"
21: #include <stdlib.h>
22: #include <errno.h>
23: #include <ctype.h>
24: #include "php_string.h"
25:
26: /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
27: /* {{{ proto string soundex(string str)
28: Calculate the soundex key of a string */
29: PHP_FUNCTION(soundex)
30: {
31: char *str;
32: int i, _small, str_len, code, last;
33: char soundex[4 + 1];
34:
35: static char soundex_table[26] =
36: {0, /* A */
37: '1', /* B */
38: '2', /* C */
39: '3', /* D */
40: 0, /* E */
41: '1', /* F */
42: '2', /* G */
43: 0, /* H */
44: 0, /* I */
45: '2', /* J */
46: '2', /* K */
47: '4', /* L */
48: '5', /* M */
49: '5', /* N */
50: 0, /* O */
51: '1', /* P */
52: '2', /* Q */
53: '6', /* R */
54: '2', /* S */
55: '3', /* T */
56: 0, /* U */
57: '1', /* V */
58: 0, /* W */
59: '2', /* X */
60: 0, /* Y */
61: '2'}; /* Z */
62:
63: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &str_len) == FAILURE) {
64: return;
65: }
66: if (str_len == 0) {
67: RETURN_FALSE;
68: }
69:
70: /* build soundex string */
71: last = -1;
72: for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
73: /* convert chars to upper case and strip non-letter chars */
74: /* BUG: should also map here accented letters used in non */
75: /* English words or names (also found in English text!): */
76: /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
77: code = toupper((int)(unsigned char)str[i]);
78: if (code >= 'A' && code <= 'Z') {
79: if (_small == 0) {
80: /* remember first valid char */
81: soundex[_small++] = code;
82: last = soundex_table[code - 'A'];
83: }
84: else {
85: /* ignore sequences of consonants with same soundex */
86: /* code in trail, and vowels unless they separate */
87: /* consonant letters */
88: code = soundex_table[code - 'A'];
89: if (code != last) {
90: if (code != 0) {
91: soundex[_small++] = code;
92: }
93: last = code;
94: }
95: }
96: }
97: }
98: /* pad with '0' and terminate with 0 ;-) */
99: while (_small < 4) {
100: soundex[_small++] = '0';
101: }
102: soundex[_small] = '\0';
103:
104: RETURN_STRINGL(soundex, _small, 1);
105: }
106: /* }}} */
107:
108: /*
109: * Local variables:
110: * tab-width: 4
111: * c-basic-offset: 4
112: * End:
113: * vim600: sw=4 ts=4 fdm=marker
114: * vim<600: sw=4 ts=4
115: */