[vlc-commits] commit: Add ToCharset() helper to convert from UTF-8 ( Rémi Denis-Courmont )

git at videolan.org git at videolan.org
Tue Oct 26 20:11:37 CEST 2010


vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Tue Oct 26 20:48:46 2010 +0300| [f8471e1db4bea3b055d03776347742ed1f8898f2] | committer: Rémi Denis-Courmont 

Add ToCharset() helper to convert from UTF-8

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=f8471e1db4bea3b055d03776347742ed1f8898f2
---

 include/vlc_charset.h |    1 +
 src/libvlccore.sym    |    1 +
 src/text/unicode.c    |   54 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 0 deletions(-)

diff --git a/include/vlc_charset.h b/include/vlc_charset.h
index 55974a9..07f9dee 100644
--- a/include/vlc_charset.h
+++ b/include/vlc_charset.h
@@ -109,6 +109,7 @@ static inline char *FromLatin1 (const char *latin)
 }
 
 VLC_EXPORT( char *, FromCharset, ( const char *charset, const void *data, size_t data_size ) LIBVLC_USED );
+VLC_EXPORT( void *, ToCharset, ( const char *charset, const char *in, size_t *outsize ) LIBVLC_USED );
 
 VLC_EXPORT( double, us_strtod, ( const char *, char ** ) LIBVLC_USED );
 VLC_EXPORT( float, us_strtof, ( const char *, char ** ) LIBVLC_USED );
diff --git a/src/libvlccore.sym b/src/libvlccore.sym
index 03938fb..8d05c37 100644
--- a/src/libvlccore.sym
+++ b/src/libvlccore.sym
@@ -438,6 +438,7 @@ subpicture_region_Delete
 subpicture_region_New
 tls_ClientCreate
 tls_ClientDelete
+ToCharset
 ToLocale
 ToLocaleDup
 update_Check
diff --git a/src/text/unicode.c b/src/text/unicode.c
index c39edaf..83e82e8 100644
--- a/src/text/unicode.c
+++ b/src/text/unicode.c
@@ -466,3 +466,57 @@ char *FromCharset(const char *charset, const void *data, size_t data_size)
     return out;
 }
 
+/**
+ * Converts a nul-terminated UTF-8 string to a given character encoding.
+ * @param charset iconv name of the character set
+ * @param in nul-terminated UTF-8 string
+ * @param outsize pointer to hold the byte size of result
+ *
+ * @return A pointer to the result, which must be released using free().
+ * The UTF-8 nul terminator is included in the conversion if the target
+ * character encoding supports it. However it is not included in the returned
+ * byte size.
+ * In case of error, NULL is returned and the byte size is undefined.
+ */
+void *ToCharset(const char *charset, const char *in, size_t *outsize)
+{
+    vlc_iconv_t hd = vlc_iconv_open (charset, "UTF-8");
+    if (hd == (vlc_iconv_t)(-1))
+        return NULL;
+
+    const size_t inlen = strlen (in);
+    void *res;
+
+    for (unsigned mul = 4; mul < 16; mul++)
+    {
+        size_t outlen = mul * (inlen + 1);
+        res = malloc (outlen);
+        if (unlikely(res == NULL))
+            break;
+
+        const char *inp = in;
+        char *outp = res;
+        size_t inb = inlen + 1;
+        size_t outb = outlen;
+
+        if (vlc_iconv (hd, &inp, &inb, &outp, &outb) != (size_t)(-1))
+        {
+            *outsize = outlen - outb;
+            inb = 1; /* append nul terminator if possible */
+            if (vlc_iconv (hd, &inp, &inb, &outp, &outb) != (size_t)(-1))
+                break;
+            if (errno == EILSEQ) /* cannot translate nul terminator!? */
+                break;
+        }
+
+        free (res);
+        if (errno != E2BIG) /* conversion failure */
+        {
+            res = NULL;
+            break;
+        }
+    }
+    vlc_iconv_close (hd);
+    return res;
+}
+



More information about the vlc-commits mailing list