<div dir="ltr">Second revision. Converted characters to codepoints to take care of towlower() ( just like how vlc_strcasestr() does it )<br>---<br> src/playlist/search.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++--<br>
1 file changed, 218 insertions(+), 5 deletions(-)<br><br>diff --git a/src/playlist/search.c b/src/playlist/search.c<br>index 0c711ca..9960668 100644<br>--- a/src/playlist/search.c<br>+++ b/src/playlist/search.c<br>@@ -5,6 +5,7 @@<br>
* $Id$<br> *<br> * Authors: Clément Stenac <<a href="mailto:zorglub@videolan.org" target="_blank">zorglub@videolan.org</a>><br>+ * Abhiram Ravi <<a href="mailto:abhiram.ravi.s@gmail.com" target="_blank">abhiram.ravi.s@gmail.com</a>><br>
*<br> * This program is free software; you can redistribute it and/or modify it<br> * under the terms of the GNU Lesser General Public License as published by<br>@@ -29,6 +30,93 @@<br> #include <vlc_playlist.h><br>
#include <vlc_charset.h><br> #include "playlist_internal.h"<br>+#include <string.h><br>+#include <limits.h><br>+#include <wctype.h><br>+<br>+//The delimiter string for splitting meta and query strings<br>
+#define REG_SPLIT N_("()[ ,.-]_")<br>+//Maximum allowed error in Levenshtein Distance<br>+#define MAX_ERROR 1<br>+//TODO: Add a cmdline param/opt to GUI and allow user to set MAX_ERROR<br>+<br>+/***************************************************************************<br>
+ * Levenshtein Distance between two strings<br>+ ***************************************************************************/<br>+<br>+/**<br>+ * A dynamic programming algorithm to<br>+ * calculate the levenshtein distance between any<br>
+ * two strings ( used for matching query and playlist items )<br>+ * @param psz_str_1: The first string<br>+ * @param psz_str_2: The second string<br>+ * @return the edit distance between the two strings<br>+ */<br>+static int levenshtein_distance ( const char* psz_str_1, const char* psz_str_2 )<br>
+{<br>+ // Duplicate, convert the strings to lower case<br>+ char* psz_1 = xstrdup ( psz_str_1 );<br>+ char* psz_2 = xstrdup ( psz_str_2 );<br>+ uint32_t cph;<br>+ ssize_t s;<br>+ for ( char *p = psz_1; *p; ++p )<br>
+ {<br>+ s = vlc_towc ( p, &cph );<br>+ if ( s <= 0 ) continue;<br>+ *p = towlower ( cph );<br>+ }<br>+ for ( char *p = psz_2; *p; ++p )<br>+ {<br>+ s = vlc_towc ( p, &cph );<br>
+ if ( s <= 0 ) continue;<br>+ *p = towlower ( cph );<br>+ }<br>+ char char_1, char_2;<br>+ int i_cost;<br>+ size_t l_1 = strlen ( psz_1 );<br>+ size_t l_2 = strlen ( psz_2 );<br>+<br>+ if ( l_1 == 0 )<br>
+ return l_2;<br>+ if ( l_2 == 0 )<br>+ return l_1;<br>+<br>+ //A flat 2D array<br>+ int* iarray_distance = xmalloc ( ( l_1 + 1 ) * ( l_2 + 1) * sizeof ( int ) );<br>+ int i_col = l_2 + 1;<br>+ for ( size_t i = 0; i <= l_1; i++ )<br>
+ iarray_distance [i * i_col] = i;<br>+ for ( size_t i = 0; i <= l_2; i++ )<br>+ iarray_distance [i] = i;<br>+ for ( size_t i = 1; i <= l_1; i++ )<br>+ {<br>+ char_1 = psz_1 [i - 1];<br>
+ for ( size_t j = 1; j <= l_2; j++ )<br>+ {<br>+ char_2 = psz_2 [j - 1];<br>+ if ( char_1 == char_2 )<br>+ i_cost = 0;<br>+ else<br>+ i_cost = 1;<br>
+<br>+ iarray_distance [i * i_col + j] = INT_MAX;<br>+ if ( iarray_distance [(i - 1) * i_col + j] + 1 < iarray_distance [i * i_col + j] )<br>+ iarray_distance [i * i_col + j] = iarray_distance [(i - 1) * i_col + j] + 1;<br>
+ if ( iarray_distance [i * i_col + (j - 1)] + 1 < iarray_distance [i * i_col + j] )<br>+ iarray_distance [i * i_col + j] = iarray_distance [i * i_col +(j - 1)] + 1;<br>+ if ( iarray_distance [(i - 1) * i_col + (j - 1)] + i_cost < iarray_distance [i * i_col + j] )<br>
+ iarray_distance [i * i_col + j] = iarray_distance [(i - 1) * i_col + (j - 1)] + i_cost;<br>+ }<br>+ }<br>+ int answer = iarray_distance [l_1 * i_col + l_2];<br>+<br>+ // Freeing the memory<br>
+ free ( psz_1 );<br>+ free ( psz_2 );<br>+ free ( iarray_distance );<br>+<br>+ return answer;<br>+}<br> <br> /***************************************************************************<br> * Item search functions<br>
@@ -85,6 +173,83 @@ playlist_item_t* playlist_ItemGetByInput( playlist_t * p_playlist,<br> * Live search handling<br> ***************************************************************************/<br> <br>+/*<br>+ * Split a given sentence based on a string of characters to use for the split<br>
+ * \param ppp_split_destination - the array of words obtained after splitting<br>+ * \param psz_sentence - the input string<br>+ * \param psz_reg_split - the string of characters to use for the split<br>+ * @return - the number of words in the sentence<br>
+ * */<br>+static int split_into_words ( char*** ppp_split_destination, const char* psz_sentence, const char* psz_reg_split )<br>+{<br>+ // Making a copy to work with strtok<br>+ char* psz_sentence_copy = xstrdup ( psz_sentence );<br>
+ const char* psz_buf;<br>+<br>+ // Allocating space for the required number of words<br>+ char** pp_sentence_split = ( char ** ) xmalloc ( strlen ( psz_sentence_copy ) * sizeof ( char* ) );<br>+<br>+ char* p_saveptr;<br>
+<br>+ psz_buf = strtok_r ( psz_sentence_copy, psz_reg_split, &p_saveptr );<br>+<br>+ int i_count = 0;<br>+<br>+ // Running strtok and copying all the words obtained to a new split string array<br>+ while ( psz_buf != NULL )<br>
+ {<br>+ pp_sentence_split [i_count] = xstrdup ( psz_buf );<br>+ psz_buf = strtok_r ( NULL, psz_reg_split, &p_saveptr );<br>+ i_count++;<br>+ }<br>+ *ppp_split_destination = pp_sentence_split;<br>
+ return i_count;<br>+<br>+}<br>+<br>+/*<br>+ * Match a given string with a set of query words.<br>+ * \param psz_info - the string that needs to be matched with<br>+ * \param pp_query_split - the array of query words<br>
+ * \param i_query_split - number of query words<br>+ * \param bitvector_matches - to keep track of matches<br>+ * */<br>+static void execute_match ( const char* psz_info, char** pp_query_split, int i_query_split, bool* bitvector_matches )<br>
+{<br>+ char** pp_info_split = NULL;<br>+ int i_info_split = split_into_words( &pp_info_split, psz_info, REG_SPLIT );<br>+<br>+ for ( int i = 0; i < i_query_split; i++ )<br>+ {<br>+ for ( int j = 0; j < i_info_split; j++ )<br>
+ {<br>+ bitvector_matches[i] |= ( pp_info_split[j] && levenshtein_distance( pp_info_split[j], pp_query_split[i] ) <= MAX_ERROR ) ;<br>+ }<br>+ }<br>+<br>+ for ( int i = 0; i < i_info_split; i++ )<br>
+ {<br>+ free ( pp_info_split[i] );<br>+ }<br>+ free ( pp_info_split );<br>+}<br>+<br>+/*<br>+ * Checks if a boolean array contains all 1's<br>+ * \param bitvector_matches - the boolean array<br>+ * \param len - length of the array<br>
+ * @return - true if all bits are 1.<br>+ * - false otherwise<br>+ * */<br>+static bool super_match ( bool* bitvector_matches, size_t len )<br>+{<br>+ bool b_super_match = 1;<br>+ for ( size_t i = 0; i < len; i++ )<br>
+ {<br>+ b_super_match &= bitvector_matches[i];<br>+ }<br>+ return b_super_match;<br>+}<br> /**<br> * Enable all items in the playlist<br> * @param p_root: the current root item<br>@@ -103,6 +268,9 @@ static void playlist_LiveSearchClean( playlist_item_t *p_root )<br>
<br> /**<br> * Enable/Disable items in the playlist according to the search argument<br>+ * Every string is split into words (split based on REG_SPLIT) and an item is<br>+ * enabled only if all the words in the query string have found a<br>
+ * match(approximated by MAX_ERROR) in the metadata.<br> * @param p_root: the current root item<br> * @param psz_string: the string to search<br> * @return true if an item match<br>@@ -111,12 +279,20 @@ static bool playlist_LiveSearchUpdateInternal( playlist_item_t *p_root,<br>
const char *psz_string, bool b_recursive )<br> {<br> int i;<br>+ // Splitting the query words into strings for separate word-matching<br>+ char** pp_query_split = NULL;<br>
+ int i_query_split = split_into_words( &pp_query_split, psz_string, REG_SPLIT );<br>+<br>+ // The bit vector of size = number of words in query string - used to ensure that ALL the<br>+ // words in the query string match before enabling the item in the playlist<br>
+ bool* bitvector_matches = xmalloc ( i_query_split * sizeof ( bool ) );<br>+<br> bool b_match = false;<br> for( i = 0 ; i < p_root->i_children ; i ++ )<br> {<br> bool b_enable = false;<br> playlist_item_t *p_item = p_root->pp_children[i];<br>
- // Go recurssively if their is some children<br>+ // Go recursively if there are some children<br> if( b_recursive && p_item->i_children >= 0 &&<br> playlist_LiveSearchUpdateInternal( p_item, psz_string, true ) )<br>
{<br>@@ -133,14 +309,50 @@ static bool playlist_LiveSearchUpdateInternal( playlist_item_t *p_root,<br> const char *psz_title = vlc_meta_Get( p_item->p_input->p_meta, vlc_meta_Title );<br> if( !psz_title )<br>
psz_title = p_item->p_input->psz_name;<br>+<br> const char *psz_album = vlc_meta_Get( p_item->p_input->p_meta, vlc_meta_Album );<br> const char *psz_artist = vlc_meta_Get( p_item->p_input->p_meta, vlc_meta_Artist );<br>
- b_enable = ( psz_title && vlc_strcasestr( psz_title, psz_string ) ) ||<br>- ( psz_album && vlc_strcasestr( psz_album, psz_string ) ) ||<br>- ( psz_artist && vlc_strcasestr( psz_artist, psz_string ) );<br>
+<br>+ // Initializing the bit vector to 0s<br>+ memset ( bitvector_matches, 0, i_query_split );<br>+<br>+ // First check if the words of the query are direct substrings, and enable matches for<br>
+ // the corresponding query words<br>+ for ( int i = 0; i < i_query_split; i++ )<br>+ {<br>+ bitvector_matches[i] |=<br>+ ( ( psz_title && vlc_strcasestr( psz_title, pp_query_split[i] ) ) ||<br>
+ ( psz_album && vlc_strcasestr( psz_album, pp_query_split[i] ) ) ||<br>+ ( psz_artist && vlc_strcasestr( psz_artist, pp_query_split[i] ) ) );<br>+ }<br>
+<br>+ // Matching the query words and each word of the title, album and artist.<br>+ // Uses approximate matching with the help of levenshtein distance.<br>+ if ( psz_title )<br>
+ execute_match ( psz_title, pp_query_split, i_query_split, bitvector_matches );<br>+ if ( psz_album )<br>+ execute_match ( psz_album, pp_query_split, i_query_split, bitvector_matches );<br>
+ if ( psz_artist )<br>+ execute_match ( psz_artist, pp_query_split, i_query_split, bitvector_matches );<br>+<br>+ // Enabled if all the query words matched.<br>+ b_enable |= super_match ( bitvector_matches, i_query_split );<br>
+<br> }<br> else<br>- b_enable = p_item->p_input->psz_name && vlc_strcasestr( p_item->p_input->psz_name, psz_string );<br>+ {<br>+ // If there is no meta, then we just consider the name<br>
+ const char* psz_title = p_item->p_input->psz_name;<br>+<br>+ // Initializing the bit vector to 0s<br>+ memset ( bitvector_matches, 0, i_query_split );<br>+<br>+ if ( psz_title )<br>
+ execute_match( psz_title, pp_query_split, i_query_split, bitvector_matches );<br>+<br>+ // Enabled if all the query words matched.<br>+ b_enable |= super_match ( bitvector_matches, i_query_split );<br>
+ }<br> vlc_mutex_unlock( &p_item->p_input->lock );<br> }<br> <br>@@ -151,6 +363,7 @@ static bool playlist_LiveSearchUpdateInternal( playlist_item_t *p_root,<br> <br> b_match |= b_enable;<br>
}<br>+ free ( bitvector_matches );<br> return b_match;<br> }<br> <br>-- <br>1.7.9.5<br><div class="gmail_extra"><div><br></div>
</div></div>