[vlc-devel] [PATCH] Introducing Intelligent Approximate playlist search

Rémi Denis-Courmont remi at remlab.net
Thu Mar 21 22:07:25 CET 2013


	Hello,

Le dimanche 17 mars 2013 00:25:20, Abhiram Ravi a écrit :
> Changes have been made as suggested. This is the revised patch.
> 
> ---
>  src/playlist/search.c |  213
> +++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 208 insertions(+), 5 deletions(-)
> 
> diff --git a/src/playlist/search.c b/src/playlist/search.c
> index 0c711ca..b5cd4b8 100644
> --- a/src/playlist/search.c
> +++ b/src/playlist/search.c
> @@ -5,6 +5,7 @@
>   * $Id$
>   *
>   * Authors: Clément Stenac <zorglub at videolan.org>
> + *             Abhiram Ravi <abhiram.ravi.s at gmail.com>
>   *
>   * This program is free software; you can redistribute it and/or modify it
>   * under the terms of the GNU Lesser General Public License as published
> by @@ -29,6 +30,82 @@
>  #include <vlc_playlist.h>
>  #include <vlc_charset.h>
>  #include "playlist_internal.h"
> +#include <string.h>
> +#include <limits.h>
> +#include <wctype.h>
> +
> +//The delimiter string for splitting meta and query strings
> +#define REG_SPLIT N_("()[ ,.-]_")
> +//Maximum allowed error in Levenshtein Distance
> +#define MAX_ERROR 1
> +//TODO: Add a cmdline param/opt to GUI and allow user to set MAX_ERROR
> +
> +/*************************************************************************
> ** + * Levenshtein Distance between two strings
> +
> ***************************************************************************
> / +
> +/**
> + * A dynamic programming algorithm to
> + * calculate the levenshtein distance between any
> + * two strings ( used for matching query and playlist items )
> + * @param psz_str_1: The first string
> + * @param psz_str_2: The second string
> + * @return the edit distance between the two strings
> + */
> +static int levenshtein_distance ( const char* psz_str_1, const char*
> psz_str_2 )
> +{
> +    // Duplicate, convert the strings to lower case
> +    char* psz_1 = strdup ( psz_str_1 );
> +    char* psz_2 = strdup ( psz_str_2 );
> +    for ( char *p = psz_1; *p; ++p ) *p = towlower ( *p );
> +    for ( char *p = psz_2; *p; ++p ) *p = towlower ( *p );

This is a case of too easy to be true. That is unfortunatly not how ISO C 
towlower() works.

> +
> +    char char_1, char_2;
> +    int i_cost;
> +    size_t l_1 = strlen ( psz_1 );
> +    size_t l_2 = strlen ( psz_2 );
> +
> +    if ( l_1 == 0 )
> +        return l_2;
> +    if ( l_2 == 0 )
> +        return l_1;
> +
> +    //A flat 2D array
> +    int* iarray_distance = xmalloc ( ( l_1 + 1 ) * ( l_2 + 1) * sizeof (
> int ) );
> +    int i_col = l_2 + 1;
> +    for ( size_t i = 0; i <= l_1; i++ )
> +        iarray_distance [i * i_col] = i;
> +    for ( size_t i = 0; i <= l_2; i++ )
> +        iarray_distance [i] = i;
> +    for ( size_t i = 1; i <= l_1; i++ )
> +    {
> +        char_1 = psz_1 [i - 1];
> +        for ( size_t j = 1; j <= l_2; j++ )
> +        {
> +            char_2 = psz_2 [j - 1];
> +            if ( char_1 == char_2 )
> +                i_cost = 0;
> +            else
> +                i_cost = 1;
> +
> +            iarray_distance [i * i_col + j] = INT_MAX;
> +            if ( iarray_distance [(i - 1) * i_col + j] + 1 <
> iarray_distance [i * i_col + j] )
> +                iarray_distance [i * i_col + j] = iarray_distance [(i - 1)
> * i_col + j] + 1;
> +            if ( iarray_distance [i * i_col + (j - 1)] + 1 <
> iarray_distance [i * i_col + j] )
> +                iarray_distance [i * i_col + j] = iarray_distance [i *
> i_col +(j - 1)] + 1;
> +            if ( iarray_distance [(i - 1) * i_col + (j - 1)] + i_cost <
> iarray_distance [i * i_col + j] )
> +                iarray_distance [i * i_col + j] = iarray_distance [(i - 1)
> * i_col + (j - 1)] + i_cost;
> +        }
> +    }
> +    int answer = iarray_distance [l_1 * i_col + l_2];
> +
> +    // Freeing the memory
> +    free ( psz_1 );
> +    free ( psz_2 );
> +    free ( iarray_distance );
> +
> +    return answer;
> +}
> 
>  /*************************************************************************
> ** * Item search functions
> @@ -85,6 +162,84 @@ playlist_item_t* playlist_ItemGetByInput( playlist_t *
> p_playlist,
>   * Live search handling
> 
> ***************************************************************************
> /
> 
> +/*
> + * Split a given sentence based on a string of characters to use for the
> split
> + * \param ppp_split_destination - the array of words obtained after
> splitting
> + * \param psz_sentence - the input string
> + * \param psz_reg_split - the string of characters to use for the split
> + * @return - the number of words in the sentence
> + * */
> +static int split_into_words ( char*** ppp_split_destination, const char*
> psz_sentence, const char* psz_reg_split )
> +{
> +    // Making a copy to work with strtok
> +    char* psz_sentence_copy = strdup ( psz_sentence );

xstrdup() if you don't check for errors. Also above and below.

> +    const char* psz_buf;
> +
> +    // Allocating space for the required number of words
> +    char** pp_sentence_split = ( char ** ) xmalloc ( strlen (
> psz_sentence_copy ) * sizeof ( char* ) );
> +
> +    //A save pointer for strtok_r

Please avoid self-evident comments.

> +    char* p_saveptr;
> +
> +    psz_buf = strtok_r ( psz_sentence_copy, psz_reg_split, &p_saveptr );
> +
> +    int i_count = 0;
> +
> +    // Running strtok and copying all the words obtained to a new split
> string array
> +    while ( psz_buf != NULL )
> +    {
> +        pp_sentence_split [i_count] = strdup ( psz_buf );
> +        psz_buf = strtok_r ( NULL, psz_reg_split, &p_saveptr );
> +        i_count++;
> +    }
> +    *ppp_split_destination = pp_sentence_split;
> +    return i_count;
> +
> +}

-- 
Rémi Denis-Courmont
http://www.remlab.net/



More information about the vlc-devel mailing list