[vlc-commits] Inline IsUTF8() and EnsureUTF8()
Rémi Denis-Courmont
git at videolan.org
Sun Nov 29 14:14:03 CET 2015
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sun Nov 29 14:31:49 2015 +0200| [7b331c5407c7398795e4b806419fd96ace73c33f] | committer: Rémi Denis-Courmont
Inline IsUTF8() and EnsureUTF8()
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=7b331c5407c7398795e4b806419fd96ace73c33f
---
include/vlc_charset.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++---
src/libvlccore.sym | 2 --
src/text/unicode.c | 45 ------------------------------------
3 files changed, 58 insertions(+), 50 deletions(-)
diff --git a/include/vlc_charset.h b/include/vlc_charset.h
index 253405d..8867065 100644
--- a/include/vlc_charset.h
+++ b/include/vlc_charset.h
@@ -50,6 +50,64 @@
*/
VLC_API size_t vlc_towc(const char *str, uint32_t *restrict pwc);
+/**
+ * Checks UTF-8 validity.
+ *
+ * Checks whether a null-terminated string is a valid UTF-8 bytes sequence.
+ *
+ * \param str string to check
+ *
+ * \retval str the string is a valid null-terminated UTF-8 sequence
+ * \retval NULL the string is not an UTF-8 sequence
+ */
+VLC_USED static inline const char *IsUTF8(const char *str)
+{
+ size_t n;
+ uint32_t cp;
+
+ while ((n = vlc_towc(str, &cp)) != 0)
+ if (likely(n != (size_t)-1))
+ str += n;
+ else
+ return NULL;
+ return str;
+}
+
+/**
+ * Removes non-UTF-8 sequences.
+ *
+ * Replaces invalid or <i>over-long</i> UTF-8 bytes sequences within a
+ * null-terminated string with question marks. This is so that the string can
+ * be printed at least partially.
+ *
+ * \warning Do not use this were correctness is critical. use IsUTF8() and
+ * handle the error case instead. This function is mainly for display or debug.
+ *
+ * \note Converting from Latin-1 to UTF-8 in place is not possible (the string
+ * size would be increased). So it is not attempted even if it would otherwise
+ * be less disruptive.
+ *
+ * \retval str the string is a valid null-terminated UTF-8 sequence
+ * (i.e. no changes were made)
+ * \retval NULL the string is not an UTF-8 sequence
+ */
+static inline char *EnsureUTF8(char *str)
+{
+ char *ret = str;
+ size_t n;
+ uint32_t cp;
+
+ while ((n = vlc_towc(str, &cp)) != 0)
+ if (likely(n != (size_t)-1))
+ str += n;
+ else
+ {
+ *str++ = '?';
+ ret = NULL;
+ }
+ return ret;
+}
+
/* iconv wrappers (defined in src/extras/libc.c) */
typedef void *vlc_iconv_t;
VLC_API vlc_iconv_t vlc_iconv_open( const char *, const char * ) VLC_USED;
@@ -62,9 +120,6 @@ VLC_API int utf8_vfprintf( FILE *stream, const char *fmt, va_list ap );
VLC_API int utf8_fprintf( FILE *, const char *, ... ) VLC_FORMAT( 2, 3 );
VLC_API char * vlc_strcasestr(const char *, const char *) VLC_USED;
-VLC_API char * EnsureUTF8( char * );
-VLC_API const char * IsUTF8( const char * ) VLC_USED;
-
VLC_API char * FromCharset( const char *charset, const void *data, size_t data_size ) VLC_USED;
VLC_API void * ToCharset( const char *charset, const char *in, size_t *outsize ) VLC_USED;
diff --git a/src/libvlccore.sym b/src/libvlccore.sym
index 4be47ac..54a606a 100644
--- a/src/libvlccore.sym
+++ b/src/libvlccore.sym
@@ -107,7 +107,6 @@ dialog_Unregister
dialog_VFatal
encode_URI_component
EndMD5
-EnsureUTF8
es_format_Clean
es_format_Copy
es_format_Init
@@ -233,7 +232,6 @@ input_Stop
input_vaControl
input_Close
intf_Create
-IsUTF8
libvlc_InternalAddIntf
libvlc_InternalPlay
libvlc_InternalCleanup
diff --git a/src/text/unicode.c b/src/text/unicode.c
index 87a665f..c274cf2 100644
--- a/src/text/unicode.c
+++ b/src/text/unicode.c
@@ -229,51 +229,6 @@ char *vlc_strcasestr (const char *haystack, const char *needle)
}
/**
- * Replaces invalid/overlong UTF-8 sequences with question marks.
- * Note that it is not possible to convert from Latin-1 to UTF-8 on the fly,
- * so we don't try that, even though it would be less disruptive.
- *
- * @return str if it was valid UTF-8, NULL if not.
- */
-char *EnsureUTF8( char *str )
-{
- char *ret = str;
- size_t n;
- uint32_t cp;
-
- while ((n = vlc_towc (str, &cp)) != 0)
- if (likely(n != (size_t)-1))
- str += n;
- else
- {
- *str++ = '?';
- ret = NULL;
- }
- return ret;
-}
-
-
-/**
- * Checks whether a string is a valid UTF-8 byte sequence.
- *
- * @param str nul-terminated string to be checked
- *
- * @return str if it was valid UTF-8, NULL if not.
- */
-const char *IsUTF8( const char *str )
-{
- size_t n;
- uint32_t cp;
-
- while ((n = vlc_towc (str, &cp)) != 0)
- if (likely(n != (size_t)-1))
- str += n;
- else
- return NULL;
- return str;
-}
-
-/**
* Converts a string from the given character encoding to utf-8.
*
* @return a nul-terminated utf-8 string, or null in case of error.
More information about the vlc-commits
mailing list