[vlc-devel] commit: resolve_xml_special_chars handles non-ASCII Unicode code points ( Rémi Denis-Courmont )

git version control git at videolan.org
Sun Apr 5 12:22:31 CEST 2009


vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sun Apr  5 13:21:31 2009 +0300| [79de221529a46aa6f247d71fbbbcc20105802a91] | committer: Rémi Denis-Courmont 

resolve_xml_special_chars handles non-ASCII Unicode code points

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=79de221529a46aa6f247d71fbbbcc20105802a91
---

 src/text/strings.c |   43 +++++++++++++++++++++++++++++++------------
 1 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/src/text/strings.c b/src/text/strings.c
index 77f175a..71caaaa 100644
--- a/src/text/strings.c
+++ b/src/text/strings.c
@@ -384,23 +384,42 @@ void resolve_xml_special_chars( char *psz_value )
     {
         if( *psz_value == '&' )
         {
-            const char *psz_value1 = psz_value + 1;
-            if( *psz_value1 == '#' )
-            {
+            if( psz_value[1] == '#' )
+            {   /* &#xxx; Unicode code point */
                 char *psz_end;
-                int i = strtol( psz_value+2, &psz_end, 10 );
+                unsigned long cp = strtoul( psz_value+2, &psz_end, 10 );
                 if( *psz_end == ';' )
                 {
-                    if( i >= 32 && i <= 126 )
+                    psz_value = psz_end + 1;
+                    if( cp == 0 )
+                        (void)0; /* skip nuls */
+                    else
+                    if( cp <= 0x7F )
+                    {
+                        *p_pos =            cp;
+                    }
+                    else
+                    /* Unicode code point outside ASCII.
+                     * &#xxx; representation is longer than UTF-8 :) */
+                    if( cp <= 0x7FF )
                     {
-                        *p_pos = (char)i;
-                        psz_value = psz_end+1;
+                        *p_pos++ = 0xC0 |  (cp >>  6);
+                        *p_pos   = 0x80 |  (cp        & 0x3F);
                     }
                     else
+                    if( cp <= 0xFFFF )
                     {
-                        /* Unhandled code, FIXME */
-                        *p_pos = *psz_value;
-                        psz_value++;
+                        *p_pos++ = 0xE0 |  (cp >> 12);
+                        *p_pos++ = 0x80 | ((cp >>  6) & 0x3F);
+                        *p_pos   = 0x80 |  (cp        & 0x3F);
+                    }
+                    else
+                    if( cp <= 0x1FFFFF ) /* Outside the BMP */
+                    {   /* Unicode stops at 10FFFF, but who cares? */
+                        *p_pos++ = 0xF0 |  (cp >> 18);
+                        *p_pos++ = 0x80 | ((cp >> 12) & 0x3F);
+                        *p_pos++ = 0x80 | ((cp >>  6) & 0x3F);
+                        *p_pos   = 0x80 |  (cp        & 0x3F);
                     }
                 }
                 else
@@ -411,10 +430,10 @@ void resolve_xml_special_chars( char *psz_value )
                 }
             }
             else
-            {
+            {   /* Well-known XML entity */
                 const struct xml_entity_s *ent;
 
-                ent = bsearch (psz_value1, xml_entities,
+                ent = bsearch (psz_value + 1, xml_entities,
                                sizeof (xml_entities) / sizeof (*ent),
                                sizeof (*ent), cmp_entity);
                 if (ent != NULL)




More information about the vlc-devel mailing list