[vlc-devel] commit: IsUTF8: reject surrogates and non-Unicode code points ( Rémi Denis-Courmont )
git version control
git at videolan.org
Sun Feb 7 10:41:55 CET 2010
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Sun Feb 7 11:40:52 2010 +0200| [9ae7d4e604370f2dec236494c48bc33b685a014b] | committer: Rémi Denis-Courmont
IsUTF8: reject surrogates and non-Unicode code points
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=9ae7d4e604370f2dec236494c48bc33b685a014b
---
src/text/unicode.c | 21 +++++++++++++++------
1 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/src/text/unicode.c b/src/text/unicode.c
index 1c0501a..77b7684 100644
--- a/src/text/unicode.c
+++ b/src/text/unicode.c
@@ -304,7 +304,7 @@ static char *CheckUTF8( char *str, char rep )
goto error;
}
- assert (charlen >= 2);
+ assert (charlen >= 2 && charlen <= 4);
uint32_t cp = c & ~((0xff >> (7 - charlen)) << (7 - charlen));
for (int i = 1; i < charlen; i++)
@@ -318,11 +318,20 @@ static char *CheckUTF8( char *str, char rep )
cp = (cp << 6) | (ptr[i] & 0x3f);
}
- if (cp < 128) // overlong (special case for ASCII)
- goto error;
- if (cp < (1u << (5 * charlen - 3))) // overlong
- goto error;
-
+ switch (charlen)
+ {
+ case 4:
+ if (cp > 0x10FFFF) // beyond Unicode
+ goto error;
+ case 3:
+ if (cp >= 0xD800 && cp < 0xC000) // UTF-16 surrogate
+ goto error;
+ case 2:
+ if (cp < 128) // ASCII overlong
+ goto error;
+ if (cp < (1u << (5 * charlen - 3))) // overlong
+ goto error;
+ }
ptr += charlen;
continue;
More information about the vlc-devel
mailing list