Return to levenshtein.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / standard |
1.1 ! misho 1: /* ! 2: +----------------------------------------------------------------------+ ! 3: | PHP Version 5 | ! 4: +----------------------------------------------------------------------+ ! 5: | Copyright (c) 1997-2012 The PHP Group | ! 6: +----------------------------------------------------------------------+ ! 7: | This source file is subject to version 3.01 of the PHP license, | ! 8: | that is bundled with this package in the file LICENSE, and is | ! 9: | available through the world-wide-web at the following url: | ! 10: | http://www.php.net/license/3_01.txt | ! 11: | If you did not receive a copy of the PHP license and are unable to | ! 12: | obtain it through the world-wide-web, please send a note to | ! 13: | license@php.net so we can mail you a copy immediately. | ! 14: +----------------------------------------------------------------------+ ! 15: | Author: Hartmut Holzgraefe <hholzgra@php.net> | ! 16: +----------------------------------------------------------------------+ ! 17: */ ! 18: /* $Id: levenshtein.c 321634 2012-01-01 13:15:04Z felipe $ */ ! 19: ! 20: #include "php.h" ! 21: #include <stdlib.h> ! 22: #include <errno.h> ! 23: #include <ctype.h> ! 24: #include "php_string.h" ! 25: ! 26: #define LEVENSHTEIN_MAX_LENGTH 255 ! 27: ! 28: /* {{{ reference_levdist ! 29: * reference implementation, only optimized for memory usage, not speed */ ! 30: static int reference_levdist(const char *s1, int l1, const char *s2, int l2, int cost_ins, int cost_rep, int cost_del ) ! 31: { ! 32: int *p1, *p2, *tmp; ! 33: int i1, i2, c0, c1, c2; ! 34: ! 35: if (l1 == 0) { ! 36: return l2 * cost_ins; ! 37: } ! 38: if (l2 == 0) { ! 39: return l1 * cost_del; ! 40: } ! 41: ! 42: if ((l1 > LEVENSHTEIN_MAX_LENGTH) || (l2 > LEVENSHTEIN_MAX_LENGTH)) { ! 43: return -1; ! 44: } ! 45: p1 = safe_emalloc((l2 + 1), sizeof(int), 0); ! 46: p2 = safe_emalloc((l2 + 1), sizeof(int), 0); ! 47: ! 48: for (i2 = 0; i2 <= l2; i2++) { ! 49: p1[i2] = i2 * cost_ins; ! 50: } ! 51: for (i1 = 0; i1 < l1 ; i1++) { ! 52: p2[0] = p1[0] + cost_del; ! 53: ! 54: for (i2 = 0; i2 < l2; i2++) { ! 55: c0 = p1[i2] + ((s1[i1] == s2[i2]) ? 0 : cost_rep); ! 56: c1 = p1[i2 + 1] + cost_del; ! 57: if (c1 < c0) { ! 58: c0 = c1; ! 59: } ! 60: c2 = p2[i2] + cost_ins; ! 61: if (c2 < c0) { ! 62: c0 = c2; ! 63: } ! 64: p2[i2 + 1] = c0; ! 65: } ! 66: tmp = p1; ! 67: p1 = p2; ! 68: p2 = tmp; ! 69: } ! 70: c0 = p1[l2]; ! 71: ! 72: efree(p1); ! 73: efree(p2); ! 74: ! 75: return c0; ! 76: } ! 77: /* }}} */ ! 78: ! 79: /* {{{ custom_levdist ! 80: */ ! 81: static int custom_levdist(char *str1, char *str2, char *callback_name TSRMLS_DC) ! 82: { ! 83: php_error_docref(NULL TSRMLS_CC, E_WARNING, "The general Levenshtein support is not there yet"); ! 84: /* not there yet */ ! 85: ! 86: return -1; ! 87: } ! 88: /* }}} */ ! 89: ! 90: /* {{{ proto int levenshtein(string str1, string str2[, int cost_ins, int cost_rep, int cost_del]) ! 91: Calculate Levenshtein distance between two strings */ ! 92: PHP_FUNCTION(levenshtein) ! 93: { ! 94: int argc = ZEND_NUM_ARGS(); ! 95: char *str1, *str2; ! 96: char *callback_name; ! 97: int str1_len, str2_len, callback_len; ! 98: long cost_ins, cost_rep, cost_del; ! 99: int distance = -1; ! 100: ! 101: switch (argc) { ! 102: case 2: /* just two strings: use maximum performance version */ ! 103: if (zend_parse_parameters(2 TSRMLS_CC, "ss", &str1, &str1_len, &str2, &str2_len) == FAILURE) { ! 104: return; ! 105: } ! 106: distance = reference_levdist(str1, str1_len, str2, str2_len, 1, 1, 1); ! 107: break; ! 108: ! 109: case 5: /* more general version: calc cost by ins/rep/del weights */ ! 110: if (zend_parse_parameters(5 TSRMLS_CC, "sslll", &str1, &str1_len, &str2, &str2_len, &cost_ins, &cost_rep, &cost_del) == FAILURE) { ! 111: return; ! 112: } ! 113: distance = reference_levdist(str1, str1_len, str2, str2_len, cost_ins, cost_rep, cost_del); ! 114: break; ! 115: ! 116: case 3: /* most general version: calc cost by user-supplied function */ ! 117: if (zend_parse_parameters(3 TSRMLS_CC, "sss", &str1, &str1_len, &str2, &str2_len, &callback_name, &callback_len) == FAILURE) { ! 118: return; ! 119: } ! 120: distance = custom_levdist(str1, str2, callback_name TSRMLS_CC); ! 121: break; ! 122: ! 123: default: ! 124: WRONG_PARAM_COUNT; ! 125: } ! 126: ! 127: if (distance < 0 && /* TODO */ ZEND_NUM_ARGS() != 3) { ! 128: php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument string(s) too long"); ! 129: } ! 130: ! 131: RETURN_LONG(distance); ! 132: } ! 133: /* }}} */ ! 134: ! 135: /* ! 136: * Local variables: ! 137: * tab-width: 4 ! 138: * c-basic-offset: 4 ! 139: * End: ! 140: * vim600: sw=4 ts=4 fdm=marker ! 141: * vim<600: sw=4 ts=4 ! 142: */