Annotation of embedaddon/php/ext/standard/levenshtein.c, revision 1.1.1.3

1.1       misho       1: /*
                      2:    +----------------------------------------------------------------------+
                      3:    | PHP Version 5                                                        |
                      4:    +----------------------------------------------------------------------+
1.1.1.3 ! misho       5:    | Copyright (c) 1997-2013 The PHP Group                                |
1.1       misho       6:    +----------------------------------------------------------------------+
                      7:    | This source file is subject to version 3.01 of the PHP license,      |
                      8:    | that is bundled with this package in the file LICENSE, and is        |
                      9:    | available through the world-wide-web at the following url:           |
                     10:    | http://www.php.net/license/3_01.txt                                  |
                     11:    | If you did not receive a copy of the PHP license and are unable to   |
                     12:    | obtain it through the world-wide-web, please send a note to          |
                     13:    | license@php.net so we can mail you a copy immediately.               |
                     14:    +----------------------------------------------------------------------+
                     15:    | Author: Hartmut Holzgraefe <hholzgra@php.net>                        |
                     16:    +----------------------------------------------------------------------+
                     17:  */
1.1.1.2   misho      18: /* $Id$ */
1.1       misho      19: 
                     20: #include "php.h"
                     21: #include <stdlib.h>
                     22: #include <errno.h>
                     23: #include <ctype.h>
                     24: #include "php_string.h"
                     25: 
                     26: #define LEVENSHTEIN_MAX_LENGTH 255
                     27: 
                     28: /* {{{ reference_levdist
                     29:  * reference implementation, only optimized for memory usage, not speed */
                     30: static int reference_levdist(const char *s1, int l1, const char *s2, int l2, int cost_ins, int cost_rep, int cost_del )
                     31: {
                     32:        int *p1, *p2, *tmp;
                     33:        int i1, i2, c0, c1, c2;
                     34: 
                     35:        if (l1 == 0) {
                     36:                return l2 * cost_ins;
                     37:        }
                     38:        if (l2 == 0) {
                     39:                return l1 * cost_del;
                     40:        }
                     41: 
                     42:        if ((l1 > LEVENSHTEIN_MAX_LENGTH) || (l2 > LEVENSHTEIN_MAX_LENGTH)) {
                     43:                return -1;
                     44:        }
                     45:        p1 = safe_emalloc((l2 + 1), sizeof(int), 0);
                     46:        p2 = safe_emalloc((l2 + 1), sizeof(int), 0);
                     47: 
                     48:        for (i2 = 0; i2 <= l2; i2++) {
                     49:                p1[i2] = i2 * cost_ins;
                     50:        }
                     51:        for (i1 = 0; i1 < l1 ; i1++) {
                     52:                p2[0] = p1[0] + cost_del;
                     53: 
                     54:                for (i2 = 0; i2 < l2; i2++) {
                     55:                        c0 = p1[i2] + ((s1[i1] == s2[i2]) ? 0 : cost_rep);
                     56:                        c1 = p1[i2 + 1] + cost_del;
                     57:                        if (c1 < c0) {
                     58:                                c0 = c1;
                     59:                        }
                     60:                        c2 = p2[i2] + cost_ins;
                     61:                        if (c2 < c0) {
                     62:                                c0 = c2;
                     63:                        }
                     64:                        p2[i2 + 1] = c0;
                     65:                }
                     66:                tmp = p1;
                     67:                p1 = p2;
                     68:                p2 = tmp;
                     69:        }
                     70:        c0 = p1[l2];
                     71: 
                     72:        efree(p1);
                     73:        efree(p2);
                     74: 
                     75:        return c0;
                     76: }
                     77: /* }}} */
                     78: 
                     79: /* {{{ custom_levdist
                     80:  */
                     81: static int custom_levdist(char *str1, char *str2, char *callback_name TSRMLS_DC)
                     82: {
                     83:        php_error_docref(NULL TSRMLS_CC, E_WARNING, "The general Levenshtein support is not there yet");
                     84:        /* not there yet */
                     85: 
                     86:        return -1;
                     87: }
                     88: /* }}} */
                     89: 
                     90: /* {{{ proto int levenshtein(string str1, string str2[, int cost_ins, int cost_rep, int cost_del])
                     91:    Calculate Levenshtein distance between two strings */
                     92: PHP_FUNCTION(levenshtein)
                     93: {
                     94:        int argc = ZEND_NUM_ARGS();
                     95:        char *str1, *str2;
                     96:        char *callback_name;
                     97:        int str1_len, str2_len, callback_len;
                     98:        long cost_ins, cost_rep, cost_del;
                     99:        int distance = -1;
                    100: 
                    101:        switch (argc) {
                    102:                case 2: /* just two strings: use maximum performance version */
                    103:                        if (zend_parse_parameters(2 TSRMLS_CC, "ss", &str1, &str1_len, &str2, &str2_len) == FAILURE) {
                    104:                                return;
                    105:                        }
                    106:                        distance = reference_levdist(str1, str1_len, str2, str2_len, 1, 1, 1);
                    107:                        break;
                    108: 
                    109:                case 5: /* more general version: calc cost by ins/rep/del weights */
                    110:                        if (zend_parse_parameters(5 TSRMLS_CC, "sslll", &str1, &str1_len, &str2, &str2_len, &cost_ins, &cost_rep, &cost_del) == FAILURE) {
                    111:                                return;
                    112:                        }
                    113:                        distance = reference_levdist(str1, str1_len, str2, str2_len, cost_ins, cost_rep, cost_del);
                    114:                        break;
                    115: 
                    116:                case 3: /* most general version: calc cost by user-supplied function */
                    117:                        if (zend_parse_parameters(3 TSRMLS_CC, "sss", &str1, &str1_len, &str2, &str2_len, &callback_name, &callback_len) == FAILURE) {
                    118:                                return;
                    119:                        }
                    120:                        distance = custom_levdist(str1, str2, callback_name TSRMLS_CC);
                    121:                        break;
                    122: 
                    123:                default:
                    124:                        WRONG_PARAM_COUNT;
                    125:        }
                    126: 
                    127:        if (distance < 0 && /* TODO */ ZEND_NUM_ARGS() != 3) {
                    128:                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument string(s) too long");
                    129:        }
                    130: 
                    131:        RETURN_LONG(distance);
                    132: }
                    133: /* }}} */
                    134: 
                    135: /*
                    136:  * Local variables:
                    137:  * tab-width: 4
                    138:  * c-basic-offset: 4
                    139:  * End:
                    140:  * vim600: sw=4 ts=4 fdm=marker
                    141:  * vim<600: sw=4 ts=4
                    142:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>