[vlc-commits] commit: Add vlc_strcasestr() ( Rémi Denis-Courmont )

git at videolan.org git at videolan.org
Fri Oct 8 20:44:23 CEST 2010


vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Fri Oct  8 21:02:23 2010 +0300| [027cc99e4fdf949846225d8c23e97c78ad32f4b3] | committer: Rémi Denis-Courmont 

Add vlc_strcasestr()

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=027cc99e4fdf949846225d8c23e97c78ad32f4b3
---

 include/vlc_charset.h |    1 +
 src/libvlccore.sym    |    1 +
 src/text/unicode.c    |   44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 0 deletions(-)

diff --git a/include/vlc_charset.h b/include/vlc_charset.h
index 8674aee..c79753c 100644
--- a/include/vlc_charset.h
+++ b/include/vlc_charset.h
@@ -40,6 +40,7 @@ VLC_EXPORT( char *, ToLocaleDup, ( const char * ) LIBVLC_USED );
 
 VLC_EXPORT( int, utf8_vfprintf, ( FILE *stream, const char *fmt, va_list ap ) );
 VLC_EXPORT( int, utf8_fprintf, ( FILE *, const char *, ... ) LIBVLC_FORMAT( 2, 3 ) );
+VLC_EXPORT( char *, vlc_strcasestr, (const char *, const char *) LIBVLC_USED );
 
 VLC_EXPORT( char *, EnsureUTF8, ( char * ) );
 VLC_EXPORT( const char *, IsUTF8, ( const char * ) LIBVLC_USED );
diff --git a/src/libvlccore.sym b/src/libvlccore.sym
index 6b293c8..413ab79 100644
--- a/src/libvlccore.sym
+++ b/src/libvlccore.sym
@@ -462,6 +462,7 @@ vlc_opendir
 vlc_readdir
 vlc_scandir
 vlc_stat
+vlc_strcasestr
 vlc_unlink
 vlc_rename
 vlc_dup
diff --git a/src/text/unicode.c b/src/text/unicode.c
index bed5f1f..c39edaf 100644
--- a/src/text/unicode.c
+++ b/src/text/unicode.c
@@ -41,6 +41,7 @@
 #  include <tchar.h>
 #endif
 #include <errno.h>
+#include <wctype.h>
 
 #if defined (ASSUME_UTF8)
 /* Cool */
@@ -340,6 +341,49 @@ static size_t vlc_towc (const char *str, uint32_t *restrict pwc)
     return charlen;
 }
 
+/**
+ * Look for an UTF-8 string within another one in a case-insensitive fashion.
+ * Beware that this is quite slow. Contrary to strcasestr(), this function
+ * works regardless of the system character encoding, and handles multibyte
+ * code points correctly.
+
+ * @param haystack string to look into
+ * @param needle string to look for
+ * @return a pointer to the first occurence of the needle within the haystack,
+ * or NULL if no occurence were found.
+ */
+char *vlc_strcasestr (const char *haystack, const char *needle)
+{
+    ssize_t s;
+
+    do
+    {
+        const char *h = haystack, *n = needle;
+
+        for (;;)
+        {
+            uint32_t cph, cpn;
+
+            s = vlc_towc (n, &cpn);
+            if (s == 0)
+                return (char *)haystack;
+            if (unlikely(s < 0))
+                return NULL;
+            n += s;
+
+            s = vlc_towc (h, &cph);
+            if (s <= 0 || towlower (cph) != towlower (cpn))
+                break;
+            h += s;
+        }
+
+        s = vlc_towc (haystack, &(uint32_t) { 0 });
+        haystack += s;
+    }
+    while (s != 0);
+
+    return NULL;
+}
 
 /**
  * Replaces invalid/overlong UTF-8 sequences with question marks.



More information about the vlc-commits mailing list