File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard / soundex.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 20:03:57 2014 UTC (10 years, 1 month ago) by misho
Branches: php, MAIN
CVS tags: v5_4_29, HEAD
php 5.4.29

    1: /*
    2:    +----------------------------------------------------------------------+
    3:    | PHP Version 5                                                        |
    4:    +----------------------------------------------------------------------+
    5:    | Copyright (c) 1997-2014 The PHP Group                                |
    6:    +----------------------------------------------------------------------+
    7:    | This source file is subject to version 3.01 of the PHP license,      |
    8:    | that is bundled with this package in the file LICENSE, and is        |
    9:    | available through the world-wide-web at the following url:           |
   10:    | http://www.php.net/license/3_01.txt                                  |
   11:    | If you did not receive a copy of the PHP license and are unable to   |
   12:    | obtain it through the world-wide-web, please send a note to          |
   13:    | license@php.net so we can mail you a copy immediately.               |
   14:    +----------------------------------------------------------------------+
   15:    | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no>       |
   16:    +----------------------------------------------------------------------+
   17:  */
   18: /* $Id: soundex.c,v 1.1.1.4 2014/06/15 20:03:57 misho Exp $ */
   19: 
   20: #include "php.h"
   21: #include <stdlib.h>
   22: #include <errno.h>
   23: #include <ctype.h>
   24: #include "php_string.h"
   25: 
   26: /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
   27: /* {{{ proto string soundex(string str)
   28:    Calculate the soundex key of a string */
   29: PHP_FUNCTION(soundex)
   30: {
   31: 	char	*str;
   32: 	int	i, _small, str_len, code, last;
   33: 	char	soundex[4 + 1];
   34: 
   35: 	static char soundex_table[26] =
   36: 	{0,							/* A */
   37: 	 '1',						/* B */
   38: 	 '2',						/* C */
   39: 	 '3',						/* D */
   40: 	 0,							/* E */
   41: 	 '1',						/* F */
   42: 	 '2',						/* G */
   43: 	 0,							/* H */
   44: 	 0,							/* I */
   45: 	 '2',						/* J */
   46: 	 '2',						/* K */
   47: 	 '4',						/* L */
   48: 	 '5',						/* M */
   49: 	 '5',						/* N */
   50: 	 0,							/* O */
   51: 	 '1',						/* P */
   52: 	 '2',						/* Q */
   53: 	 '6',						/* R */
   54: 	 '2',						/* S */
   55: 	 '3',						/* T */
   56: 	 0,							/* U */
   57: 	 '1',						/* V */
   58: 	 0,							/* W */
   59: 	 '2',						/* X */
   60: 	 0,							/* Y */
   61: 	 '2'};						/* Z */
   62: 
   63: 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &str_len) == FAILURE) {
   64: 		return;
   65: 	}
   66: 	if (str_len == 0) {
   67: 		RETURN_FALSE;
   68: 	}
   69: 
   70: 	/* build soundex string */
   71: 	last = -1;
   72: 	for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
   73: 		/* convert chars to upper case and strip non-letter chars */
   74: 		/* BUG: should also map here accented letters used in non */
   75: 		/* English words or names (also found in English text!): */
   76: 		/* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
   77: 		code = toupper((int)(unsigned char)str[i]);
   78: 		if (code >= 'A' && code <= 'Z') {
   79: 			if (_small == 0) {
   80: 				/* remember first valid char */
   81: 				soundex[_small++] = code;
   82: 				last = soundex_table[code - 'A'];
   83: 			}
   84: 			else {
   85: 				/* ignore sequences of consonants with same soundex */
   86: 				/* code in trail, and vowels unless they separate */
   87: 				/* consonant letters */
   88: 				code = soundex_table[code - 'A'];
   89: 				if (code != last) {
   90: 					if (code != 0) {
   91: 						soundex[_small++] = code;
   92: 					}
   93: 					last = code;
   94: 				}
   95: 			}
   96: 		}
   97: 	}
   98: 	/* pad with '0' and terminate with 0 ;-) */
   99: 	while (_small < 4) {
  100: 		soundex[_small++] = '0';
  101: 	}
  102: 	soundex[_small] = '\0';
  103: 
  104: 	RETURN_STRINGL(soundex, _small, 1);
  105: }
  106: /* }}} */
  107: 
  108: /*
  109:  * Local variables:
  110:  * tab-width: 4
  111:  * c-basic-offset: 4
  112:  * End:
  113:  * vim600: sw=4 ts=4 fdm=marker
  114:  * vim<600: sw=4 ts=4
  115:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>