[vlc-commits] Inline IsUTF8() and EnsureUTF8()

Rémi Denis-Courmont git at videolan.org
Sun Nov 29 14:14:03 CET 2015


vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sun Nov 29 14:31:49 2015 +0200| [7b331c5407c7398795e4b806419fd96ace73c33f] | committer: Rémi Denis-Courmont

Inline IsUTF8() and EnsureUTF8()

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=7b331c5407c7398795e4b806419fd96ace73c33f
---

 include/vlc_charset.h |   61 ++++++++++++++++++++++++++++++++++++++++++++++---
 src/libvlccore.sym    |    2 --
 src/text/unicode.c    |   45 ------------------------------------
 3 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/include/vlc_charset.h b/include/vlc_charset.h
index 253405d..8867065 100644
--- a/include/vlc_charset.h
+++ b/include/vlc_charset.h
@@ -50,6 +50,64 @@
  */
 VLC_API size_t vlc_towc(const char *str, uint32_t *restrict pwc);
 
+/**
+ * Checks UTF-8 validity.
+ *
+ * Checks whether a null-terminated string is a valid UTF-8 bytes sequence.
+ *
+ * \param str string to check
+ *
+ * \retval str the string is a valid null-terminated UTF-8 sequence
+ * \retval NULL the string is not an UTF-8 sequence
+ */
+VLC_USED static inline const char *IsUTF8(const char *str)
+{
+    size_t n;
+    uint32_t cp;
+
+    while ((n = vlc_towc(str, &cp)) != 0)
+        if (likely(n != (size_t)-1))
+            str += n;
+        else
+            return NULL;
+    return str;
+}
+
+/**
+ * Removes non-UTF-8 sequences.
+ *
+ * Replaces invalid or <i>over-long</i> UTF-8 bytes sequences within a
+ * null-terminated string with question marks. This is so that the string can
+ * be printed at least partially.
+ *
+ * \warning Do not use this were correctness is critical. use IsUTF8() and
+ * handle the error case instead. This function is mainly for display or debug.
+ *
+ * \note Converting from Latin-1 to UTF-8 in place is not possible (the string
+ * size would be increased). So it is not attempted even if it would otherwise
+ * be less disruptive.
+ *
+ * \retval str the string is a valid null-terminated UTF-8 sequence
+ *             (i.e. no changes were made)
+ * \retval NULL the string is not an UTF-8 sequence
+ */
+static inline char *EnsureUTF8(char *str)
+{
+    char *ret = str;
+    size_t n;
+    uint32_t cp;
+
+    while ((n = vlc_towc(str, &cp)) != 0)
+        if (likely(n != (size_t)-1))
+            str += n;
+        else
+        {
+            *str++ = '?';
+            ret = NULL;
+        }
+    return ret;
+}
+
 /* iconv wrappers (defined in src/extras/libc.c) */
 typedef void *vlc_iconv_t;
 VLC_API vlc_iconv_t vlc_iconv_open( const char *, const char * ) VLC_USED;
@@ -62,9 +120,6 @@ VLC_API int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap );
 VLC_API int utf8_fprintf( FILE *, const char *, ... ) VLC_FORMAT( 2, 3 );
 VLC_API char * vlc_strcasestr(const char *, const char *) VLC_USED;
 
-VLC_API char * EnsureUTF8( char * );
-VLC_API const char * IsUTF8( const char * ) VLC_USED;
-
 VLC_API char * FromCharset( const char *charset, const void *data, size_t data_size ) VLC_USED;
 VLC_API void * ToCharset( const char *charset, const char *in, size_t *outsize ) VLC_USED;
 
diff --git a/src/libvlccore.sym b/src/libvlccore.sym
index 4be47ac..54a606a 100644
--- a/src/libvlccore.sym
+++ b/src/libvlccore.sym
@@ -107,7 +107,6 @@ dialog_Unregister
 dialog_VFatal
 encode_URI_component
 EndMD5
-EnsureUTF8
 es_format_Clean
 es_format_Copy
 es_format_Init
@@ -233,7 +232,6 @@ input_Stop
 input_vaControl
 input_Close
 intf_Create
-IsUTF8
 libvlc_InternalAddIntf
 libvlc_InternalPlay
 libvlc_InternalCleanup
diff --git a/src/text/unicode.c b/src/text/unicode.c
index 87a665f..c274cf2 100644
--- a/src/text/unicode.c
+++ b/src/text/unicode.c
@@ -229,51 +229,6 @@ char *vlc_strcasestr (const char *haystack, const char *needle)
 }
 
 /**
- * Replaces invalid/overlong UTF-8 sequences with question marks.
- * Note that it is not possible to convert from Latin-1 to UTF-8 on the fly,
- * so we don't try that, even though it would be less disruptive.
- *
- * @return str if it was valid UTF-8, NULL if not.
- */
-char *EnsureUTF8( char *str )
-{
-    char *ret = str;
-    size_t n;
-    uint32_t cp;
-
-    while ((n = vlc_towc (str, &cp)) != 0)
-        if (likely(n != (size_t)-1))
-            str += n;
-        else
-        {
-            *str++ = '?';
-            ret = NULL;
-        }
-    return ret;
-}
-
-
-/**
- * Checks whether a string is a valid UTF-8 byte sequence.
- *
- * @param str nul-terminated string to be checked
- *
- * @return str if it was valid UTF-8, NULL if not.
- */
-const char *IsUTF8( const char *str )
-{
-    size_t n;
-    uint32_t cp;
-
-    while ((n = vlc_towc (str, &cp)) != 0)
-        if (likely(n != (size_t)-1))
-            str += n;
-        else
-            return NULL;
-    return str;
-}
-
-/**
  * Converts a string from the given character encoding to utf-8.
  *
  * @return a nul-terminated utf-8 string, or null in case of error.



More information about the vlc-commits mailing list