File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard / levenshtein.c
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 20:03:57 2014 UTC (10 years, 1 month ago) by misho
Branches: php, MAIN
CVS tags: v5_4_29, HEAD
php 5.4.29

    1: /*
    2:    +----------------------------------------------------------------------+
    3:    | PHP Version 5                                                        |
    4:    +----------------------------------------------------------------------+
    5:    | Copyright (c) 1997-2014 The PHP Group                                |
    6:    +----------------------------------------------------------------------+
    7:    | This source file is subject to version 3.01 of the PHP license,      |
    8:    | that is bundled with this package in the file LICENSE, and is        |
    9:    | available through the world-wide-web at the following url:           |
   10:    | http://www.php.net/license/3_01.txt                                  |
   11:    | If you did not receive a copy of the PHP license and are unable to   |
   12:    | obtain it through the world-wide-web, please send a note to          |
   13:    | license@php.net so we can mail you a copy immediately.               |
   14:    +----------------------------------------------------------------------+
   15:    | Author: Hartmut Holzgraefe <hholzgra@php.net>                        |
   16:    +----------------------------------------------------------------------+
   17:  */
   18: /* $Id: levenshtein.c,v 1.1.1.4 2014/06/15 20:03:57 misho Exp $ */
   19: 
   20: #include "php.h"
   21: #include <stdlib.h>
   22: #include <errno.h>
   23: #include <ctype.h>
   24: #include "php_string.h"
   25: 
   26: #define LEVENSHTEIN_MAX_LENGTH 255
   27: 
   28: /* {{{ reference_levdist
   29:  * reference implementation, only optimized for memory usage, not speed */
   30: static int reference_levdist(const char *s1, int l1, const char *s2, int l2, int cost_ins, int cost_rep, int cost_del )
   31: {
   32: 	int *p1, *p2, *tmp;
   33: 	int i1, i2, c0, c1, c2;
   34: 
   35: 	if (l1 == 0) {
   36: 		return l2 * cost_ins;
   37: 	}
   38: 	if (l2 == 0) {
   39: 		return l1 * cost_del;
   40: 	}
   41: 
   42: 	if ((l1 > LEVENSHTEIN_MAX_LENGTH) || (l2 > LEVENSHTEIN_MAX_LENGTH)) {
   43: 		return -1;
   44: 	}
   45: 	p1 = safe_emalloc((l2 + 1), sizeof(int), 0);
   46: 	p2 = safe_emalloc((l2 + 1), sizeof(int), 0);
   47: 
   48: 	for (i2 = 0; i2 <= l2; i2++) {
   49: 		p1[i2] = i2 * cost_ins;
   50: 	}
   51: 	for (i1 = 0; i1 < l1 ; i1++) {
   52: 		p2[0] = p1[0] + cost_del;
   53: 
   54: 		for (i2 = 0; i2 < l2; i2++) {
   55: 			c0 = p1[i2] + ((s1[i1] == s2[i2]) ? 0 : cost_rep);
   56: 			c1 = p1[i2 + 1] + cost_del;
   57: 			if (c1 < c0) {
   58: 				c0 = c1;
   59: 			}
   60: 			c2 = p2[i2] + cost_ins;
   61: 			if (c2 < c0) {
   62: 				c0 = c2;
   63: 			}
   64: 			p2[i2 + 1] = c0;
   65: 		}
   66: 		tmp = p1;
   67: 		p1 = p2;
   68: 		p2 = tmp;
   69: 	}
   70: 	c0 = p1[l2];
   71: 
   72: 	efree(p1);
   73: 	efree(p2);
   74: 
   75: 	return c0;
   76: }
   77: /* }}} */
   78: 
   79: /* {{{ custom_levdist
   80:  */
   81: static int custom_levdist(char *str1, char *str2, char *callback_name TSRMLS_DC)
   82: {
   83: 	php_error_docref(NULL TSRMLS_CC, E_WARNING, "The general Levenshtein support is not there yet");
   84: 	/* not there yet */
   85: 
   86: 	return -1;
   87: }
   88: /* }}} */
   89: 
   90: /* {{{ proto int levenshtein(string str1, string str2[, int cost_ins, int cost_rep, int cost_del])
   91:    Calculate Levenshtein distance between two strings */
   92: PHP_FUNCTION(levenshtein)
   93: {
   94: 	int argc = ZEND_NUM_ARGS();
   95: 	char *str1, *str2;
   96: 	char *callback_name;
   97: 	int str1_len, str2_len, callback_len;
   98: 	long cost_ins, cost_rep, cost_del;
   99: 	int distance = -1;
  100: 
  101: 	switch (argc) {
  102: 		case 2: /* just two strings: use maximum performance version */
  103: 			if (zend_parse_parameters(2 TSRMLS_CC, "ss", &str1, &str1_len, &str2, &str2_len) == FAILURE) {
  104: 				return;
  105: 			}
  106: 			distance = reference_levdist(str1, str1_len, str2, str2_len, 1, 1, 1);
  107: 			break;
  108: 
  109: 		case 5: /* more general version: calc cost by ins/rep/del weights */
  110: 			if (zend_parse_parameters(5 TSRMLS_CC, "sslll", &str1, &str1_len, &str2, &str2_len, &cost_ins, &cost_rep, &cost_del) == FAILURE) {
  111: 				return;
  112: 			}
  113: 			distance = reference_levdist(str1, str1_len, str2, str2_len, cost_ins, cost_rep, cost_del);
  114: 			break;
  115: 
  116: 		case 3: /* most general version: calc cost by user-supplied function */
  117: 			if (zend_parse_parameters(3 TSRMLS_CC, "sss", &str1, &str1_len, &str2, &str2_len, &callback_name, &callback_len) == FAILURE) {
  118: 				return;
  119: 			}
  120: 			distance = custom_levdist(str1, str2, callback_name TSRMLS_CC);
  121: 			break;
  122: 
  123: 		default:
  124: 			WRONG_PARAM_COUNT;
  125: 	}
  126: 
  127: 	if (distance < 0 && /* TODO */ ZEND_NUM_ARGS() != 3) {
  128: 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument string(s) too long");
  129: 	}
  130: 
  131: 	RETURN_LONG(distance);
  132: }
  133: /* }}} */
  134: 
  135: /*
  136:  * Local variables:
  137:  * tab-width: 4
  138:  * c-basic-offset: 4
  139:  * End:
  140:  * vim600: sw=4 ts=4 fdm=marker
  141:  * vim<600: sw=4 ts=4
  142:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>