<div dir="ltr">(Sorry, it was a bad mistake.)<br>Fixed it! Now works for any UTF-8 string.<br><div><br>---<br> src/playlist/search.c | 239 +++++++++++++++++++++++++++++++++++++++++++++++--<br> 1 file changed, 234 insertions(+), 5 deletions(-)<br>
<br>diff --git a/src/playlist/search.c b/src/playlist/search.c<br>index 0c711ca..f2e5ed4 100644<br>--- a/src/playlist/search.c<br>+++ b/src/playlist/search.c<br>@@ -5,6 +5,7 @@<br> * $Id$<br> *<br> * Authors: Clément Stenac <<a href="mailto:zorglub@videolan.org">zorglub@videolan.org</a>><br>
+ * Abhiram Ravi <<a href="mailto:abhiram.ravi.s@gmail.com">abhiram.ravi.s@gmail.com</a>><br> *<br> * This program is free software; you can redistribute it and/or modify it<br> * under the terms of the GNU Lesser General Public License as published by<br>
@@ -29,6 +30,109 @@<br> #include <vlc_playlist.h><br> #include <vlc_charset.h><br> #include "playlist_internal.h"<br>+#include <string.h><br>+#include <limits.h><br>+#include <wctype.h><br>
+<br>+//The delimiter string for splitting meta and query strings<br>+#define REG_SPLIT N_("()[ ,.-]_")<br>+//Maximum allowed error in Levenshtein Distance<br>+#define MAX_ERROR 1<br>+//TODO: Add a cmdline param/opt to GUI and allow user to set MAX_ERROR<br>
+<br>+/***************************************************************************<br>+ * Levenshtein Distance between two strings<br>+ ***************************************************************************/<br>+/**<br>
+ * Converts a UTF-8 string into lowercase, The function handles multi-byte<br>+ * codepoints correctly, and puts them in an array of uint32_t's.<br>+ * @param psz_string : The original string<br>+ * @param pszw_output : The output array into which the codepoints are written.<br>
+ * @return : The length of the codepoint array<br>+ * XXX - pszw_output must be already malloc'ed<br>+ */<br>+static int to_lowercase ( const char* psz_string, uint32_t* pszw_output )<br>+{<br>+ ssize_t s;<br>+ size_t len = 0;<br>
+ uint32_t cph;<br>+ const char *p = psz_string;<br>+ for ( ; *p; )<br>+ {<br>+ s = vlc_towc ( p, &cph );<br>+ if ( s <= 0 )<br>+ {<br>+ s = vlc_towc ( p, &(uint32_t) { 0 });<br>
+ p += s;<br>+ continue;<br>+ }<br>+ *pszw_output = towlower ( cph );<br>+ pszw_output ++;<br>+ p += s;<br>+ len ++;<br>+ }<br>+ return len;<br>+}<br>+/**<br>
+ * A dynamic programming algorithm to<br>+ * calculate the levenshtein distance between any<br>+ * two strings ( used for matching query and playlist items )<br>+ * @param psz_str_1: The first string<br>+ * @param psz_str_2: The second string<br>
+ * @return the edit distance between the two strings<br>+ */<br>+static int levenshtein_distance ( const char* psz_str_1, const char* psz_str_2 )<br>+{<br>+ // Convert the strings to lowercase<br>+ uint32_t* psz_wide_1 = xmalloc ( strlen ( psz_str_1 ) * sizeof ( uint32_t ) );<br>
+ uint32_t* psz_wide_2 = xmalloc ( strlen ( psz_str_2 ) * sizeof ( uint32_t ) );<br>+ size_t l_1 = to_lowercase( psz_str_1, psz_wide_1 );<br>+ size_t l_2 = to_lowercase( psz_str_2, psz_wide_2 );<br>+<br>+ uint32_t char_1, char_2;<br>
+ int i_cost;<br>+<br>+ if ( l_1 == 0 )<br>+ return l_2;<br>+ if ( l_2 == 0 )<br>+ return l_1;<br>+<br>+ //A flat 2D array<br>+ int* iarray_distance = xmalloc ( ( l_1 + 1 ) * ( l_2 + 1) * sizeof ( int ) );<br>
+ int i_col = l_2 + 1;<br>+ for ( size_t i = 0; i <= l_1; i++ )<br>+ iarray_distance [i * i_col] = i;<br>+ for ( size_t i = 0; i <= l_2; i++ )<br>+ iarray_distance [i] = i;<br>+ for ( size_t i = 1; i <= l_1; i++ )<br>
+ {<br>+ char_1 = psz_wide_1 [i - 1];<br>+ for ( size_t j = 1; j <= l_2; j++ )<br>+ {<br>+ char_2 = psz_wide_2 [j - 1];<br>+ if ( char_1 == char_2 )<br>+ i_cost = 0;<br>
+ else<br>+ i_cost = 1;<br>+<br>+ iarray_distance [i * i_col + j] = INT_MAX;<br>+ if ( iarray_distance [(i - 1) * i_col + j] + 1 < iarray_distance [i * i_col + j] )<br>+ iarray_distance [i * i_col + j] = iarray_distance [(i - 1) * i_col + j] + 1;<br>
+ if ( iarray_distance [i * i_col + (j - 1)] + 1 < iarray_distance [i * i_col + j] )<br>+ iarray_distance [i * i_col + j] = iarray_distance [i * i_col +(j - 1)] + 1;<br>+ if ( iarray_distance [(i - 1) * i_col + (j - 1)] + i_cost < iarray_distance [i * i_col + j] )<br>
+ iarray_distance [i * i_col + j] = iarray_distance [(i - 1) * i_col + (j - 1)] + i_cost;<br>+ }<br>+ }<br>+ int answer = iarray_distance [l_1 * i_col + l_2];<br>+<br>+ // Freeing before returning the answer<br>
+ free ( psz_wide_1 );<br>+ free ( psz_wide_2 );<br>+ free ( iarray_distance );<br>+<br>+ return answer;<br>+}<br> <br> /***************************************************************************<br> * Item search functions<br>
@@ -85,6 +189,83 @@ playlist_item_t* playlist_ItemGetByInput( playlist_t * p_playlist,<br> * Live search handling<br> ***************************************************************************/<br> <br>+/*<br>+ * Split a given sentence based on a string of characters to use for the split<br>
+ * \param ppp_split_destination - the array of words obtained after splitting<br>+ * \param psz_sentence - the input string<br>+ * \param psz_reg_split - the string of characters to use for the split<br>+ * @return - the number of words in the sentence<br>
+ * */<br>+static int split_into_words ( char*** ppp_split_destination, const char* psz_sentence, const char* psz_reg_split )<br>+{<br>+ // Making a copy to work with strtok<br>+ char* psz_sentence_copy = xstrdup ( psz_sentence );<br>
+ const char* psz_buf;<br>+<br>+ // Allocating space for the required number of words<br>+ char** pp_sentence_split = ( char ** ) xmalloc ( strlen ( psz_sentence_copy ) * sizeof ( char* ) );<br>+<br>+ char* p_saveptr;<br>
+<br>+ psz_buf = strtok_r ( psz_sentence_copy, psz_reg_split, &p_saveptr );<br>+<br>+ int i_count = 0;<br>+<br>+ // Running strtok and copying all the words obtained to a new split string array<br>+ while ( psz_buf != NULL )<br>
+ {<br>+ pp_sentence_split [i_count] = xstrdup ( psz_buf );<br>+ psz_buf = strtok_r ( NULL, psz_reg_split, &p_saveptr );<br>+ i_count++;<br>+ }<br>+ *ppp_split_destination = pp_sentence_split;<br>
+ return i_count;<br>+<br>+}<br>+<br>+/*<br>+ * Match a given string with a set of query words.<br>+ * \param psz_info - the string that needs to be matched with<br>+ * \param pp_query_split - the array of query words<br>
+ * \param i_query_split - number of query words<br>+ * \param bitvector_matches - to keep track of matches<br>+ * */<br>+static void execute_match ( const char* psz_info, char** pp_query_split, int i_query_split, bool* bitvector_matches )<br>
+{<br>+ char** pp_info_split = NULL;<br>+ int i_info_split = split_into_words( &pp_info_split, psz_info, REG_SPLIT );<br>+<br>+ for ( int i = 0; i < i_query_split; i++ )<br>+ {<br>+ for ( int j = 0; j < i_info_split; j++ )<br>
+ {<br>+ bitvector_matches[i] |= ( pp_info_split[j] && levenshtein_distance( pp_info_split[j], pp_query_split[i] ) <= MAX_ERROR ) ;<br>+ }<br>+ }<br>+<br>+ for ( int i = 0; i < i_info_split; i++ )<br>
+ {<br>+ free ( pp_info_split[i] );<br>+ }<br>+ free ( pp_info_split );<br>+}<br>+<br>+/*<br>+ * Checks if a boolean array contains all 1's<br>+ * \param bitvector_matches - the boolean array<br>+ * \param len - length of the array<br>
+ * @return - true if all bits are 1.<br>+ * - false otherwise<br>+ * */<br>+static bool super_match ( bool* bitvector_matches, size_t len )<br>+{<br>+ bool b_super_match = 1;<br>+ for ( size_t i = 0; i < len; i++ )<br>
+ {<br>+ b_super_match &= bitvector_matches[i];<br>+ }<br>+ return b_super_match;<br>+}<br> /**<br> * Enable all items in the playlist<br> * @param p_root: the current root item<br>@@ -103,6 +284,9 @@ static void playlist_LiveSearchClean( playlist_item_t *p_root )<br>
<br> /**<br> * Enable/Disable items in the playlist according to the search argument<br>+ * Every string is split into words (split based on REG_SPLIT) and an item is<br>+ * enabled only if all the words in the query string have found a<br>
+ * match(approximated by MAX_ERROR) in the metadata.<br> * @param p_root: the current root item<br> * @param psz_string: the string to search<br> * @return true if an item match<br>@@ -111,12 +295,20 @@ static bool playlist_LiveSearchUpdateInternal( playlist_item_t *p_root,<br>
const char *psz_string, bool b_recursive )<br> {<br> int i;<br>+ // Splitting the query words into strings for separate word-matching<br>+ char** pp_query_split = NULL;<br>
+ int i_query_split = split_into_words( &pp_query_split, psz_string, REG_SPLIT );<br>+<br>+ // The bit vector of size = number of words in query string - used to ensure that ALL the<br>+ // words in the query string match before enabling the item in the playlist<br>
+ bool* bitvector_matches = xmalloc ( i_query_split * sizeof ( bool ) );<br>+<br> bool b_match = false;<br> for( i = 0 ; i < p_root->i_children ; i ++ )<br> {<br> bool b_enable = false;<br> playlist_item_t *p_item = p_root->pp_children[i];<br>
- // Go recurssively if their is some children<br>+ // Go recursively if there are some children<br> if( b_recursive && p_item->i_children >= 0 &&<br> playlist_LiveSearchUpdateInternal( p_item, psz_string, true ) )<br>
{<br>@@ -133,14 +325,50 @@ static bool playlist_LiveSearchUpdateInternal( playlist_item_t *p_root,<br> const char *psz_title = vlc_meta_Get( p_item->p_input->p_meta, vlc_meta_Title );<br> if( !psz_title )<br>
psz_title = p_item->p_input->psz_name;<br>+<br> const char *psz_album = vlc_meta_Get( p_item->p_input->p_meta, vlc_meta_Album );<br> const char *psz_artist = vlc_meta_Get( p_item->p_input->p_meta, vlc_meta_Artist );<br>
- b_enable = ( psz_title && vlc_strcasestr( psz_title, psz_string ) ) ||<br>- ( psz_album && vlc_strcasestr( psz_album, psz_string ) ) ||<br>- ( psz_artist && vlc_strcasestr( psz_artist, psz_string ) );<br>
+<br>+ // Initializing the bit vector to 0s<br>+ memset ( bitvector_matches, 0, i_query_split );<br>+<br>+ // First check if the words of the query are direct substrings, and enable matches for<br>
+ // the corresponding query words<br>+ for ( int i = 0; i < i_query_split; i++ )<br>+ {<br>+ bitvector_matches[i] |=<br>+ ( ( psz_title && vlc_strcasestr( psz_title, pp_query_split[i] ) ) ||<br>
+ ( psz_album && vlc_strcasestr( psz_album, pp_query_split[i] ) ) ||<br>+ ( psz_artist && vlc_strcasestr( psz_artist, pp_query_split[i] ) ) );<br>+ }<br>
+<br>+ // Matching the query words and each word of the title, album and artist.<br>+ // Uses approximate matching with the help of levenshtein distance.<br>+ if ( psz_title )<br>
+ execute_match ( psz_title, pp_query_split, i_query_split, bitvector_matches );<br>+ if ( psz_album )<br>+ execute_match ( psz_album, pp_query_split, i_query_split, bitvector_matches );<br>
+ if ( psz_artist )<br>+ execute_match ( psz_artist, pp_query_split, i_query_split, bitvector_matches );<br>+<br>+ // Enabled if all the query words matched.<br>+ b_enable |= super_match ( bitvector_matches, i_query_split );<br>
+<br> }<br> else<br>- b_enable = p_item->p_input->psz_name && vlc_strcasestr( p_item->p_input->psz_name, psz_string );<br>+ {<br>+ // If there is no meta, then we just consider the name<br>
+ const char* psz_title = p_item->p_input->psz_name;<br>+<br>+ // Initializing the bit vector to 0s<br>+ memset ( bitvector_matches, 0, i_query_split );<br>+<br>+ if ( psz_title )<br>
+ execute_match( psz_title, pp_query_split, i_query_split, bitvector_matches );<br>+<br>+ // Enabled if all the query words matched.<br>+ b_enable |= super_match ( bitvector_matches, i_query_split );<br>
+ }<br> vlc_mutex_unlock( &p_item->p_input->lock );<br> }<br> <br>@@ -151,6 +379,7 @@ static bool playlist_LiveSearchUpdateInternal( playlist_item_t *p_root,<br> <br> b_match |= b_enable;<br>
}<br>+ free ( bitvector_matches );<br> return b_match;<br> }<br> <br>-- <br>1.7.9.5<br><br></div></div>