[bTSstream-devel] [Git][videolan/bitstream][master] 2 commits: Fix invalid XML characters
Christophe Massiot (@cmassiot)
gitlab at videolan.org
Wed Jun 4 14:07:53 UTC 2025
Christophe Massiot pushed to branch master at VideoLAN / bitstream
Commits:
f123c9c0 by Clément Vasseur at 2025-04-07T17:10:59+02:00
Fix invalid XML characters
Make sure characters which are not valid UTF-8 or XML characters are
properly replaced by the unicode replacement character.
- - - - -
70d6a2fe by Christophe Massiot at 2025-06-04T16:07:45+02:00
Merge branch 'nto-xml-invalid-chars'
- - - - -
1 changed file:
- common.h
Changes:
=====================================
common.h
=====================================
@@ -51,52 +51,86 @@ typedef enum print_type_t {
typedef void (*f_print)(void *, const char *, ...) __attribute__ ((format(printf, 2, 3)));
typedef char * (*f_iconv)(void *, const char *, char *, size_t);
-static inline const char *bitstream_xml_escape_char(char c)
+static inline size_t bitstream_xml_escape_chars(const char *str, char *out)
{
- switch (c) {
- case '<': return "<";
- case '>': return ">";
- case '"': return """;
- case '\'': return "'";
- case '&': return "&";
- }
- return NULL;
-}
+ size_t len = 1;
+
+ while (*str) {
+ uint32_t codepoint = 0xFFFD;
+ const char *p = str;
+ int size = 1;
+
+ unsigned char c = *str++;
+ if (c < 0x80) {
+ codepoint = c;
+ switch (c) {
+ case '<': p = "<"; size = 4; break;
+ case '>': p = ">"; size = 4; break;
+ case '"': p = """; size = 6; break;
+ case '\'': p = "'"; size = 6; break;
+ case '&': p = "&"; size = 5; break;
+ }
+ } else if ((c & 0xE0) == 0xC0) {
+ if ((str[0] & 0xC0) == 0x80) {
+ codepoint = ((c & 0x1F) << 6) |
+ (str[0] & 0x3F);
+ size = 2;
+ str += 1;
+ }
+ } else if ((c & 0xF0) == 0xE0) {
+ if ((str[0] & 0xC0) == 0x80 &&
+ (str[1] & 0xC0) == 0x80) {
+ codepoint = ((c & 0x0F) << 12) |
+ ((str[0] & 0x3F) << 6) |
+ (str[1] & 0x3F);
+ size = 3;
+ str += 2;
+ }
+ } else if ((c & 0xF8) == 0xF0) {
+ if ((str[0] & 0xC0) == 0x80 &&
+ (str[1] & 0xC0) == 0x80 &&
+ (str[2] & 0xC0) == 0x80) {
+ codepoint = ((c & 0x07) << 18) |
+ ((str[0] & 0x3F) << 12) |
+ ((str[1] & 0x3F) << 6) |
+ (str[2] & 0x3F);
+ size = 4;
+ str += 3;
+ }
+ }
-static inline size_t bitstream_xml_escape_len(const char *str)
-{
- size_t len = str ? strlen(str) : 0;
- size_t out_len = 0;
- for (unsigned i = 0; i < len; i++) {
- const char *esc = bitstream_xml_escape_char(str[i]);
- out_len += esc ? strlen(esc) : 1;
+ // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
+ if (codepoint == 0xFFFD ||
+ !(codepoint == 0x9 ||
+ codepoint == 0xA ||
+ codepoint == 0xD ||
+ (codepoint >= 0x20 && codepoint <= 0xD7FF) ||
+ (codepoint >= 0xE000 && codepoint <= 0xFFFD) ||
+ (codepoint >= 0x10000 && codepoint <= 0x10FFFF))) {
+ p = "\uFFFD";
+ size = 3;
+ }
+
+ if (out) {
+ memcpy(out, p, size);
+ out += size;
+ }
+ len += size;
}
- return out_len;
+
+ if (out)
+ *out = '\0';
+ return len;
}
static inline char *bitstream_xml_escape(const char *str)
{
if (!str)
return NULL;
-
- size_t len = strlen(str);
- size_t out_len = bitstream_xml_escape_len(str);
- char *out = (char *)malloc(out_len + 1);
+ char *out = (char *)malloc(bitstream_xml_escape_chars(str, NULL));
if (!out)
return NULL;
-
- char *tmp = out;
- for (unsigned i = 0; i < len; i++) {
- const char *esc = bitstream_xml_escape_char(str[i]);
- if (esc) {
- size_t esc_len = strlen(esc);
- memcpy(tmp, esc, esc_len);
- tmp += esc_len;
- }
- else
- *tmp++ = str[i];
- }
- *tmp = '\0';
+ bitstream_xml_escape_chars(str, out);
return out;
}
View it on GitLab: https://code.videolan.org/videolan/bitstream/-/compare/fc71ca6d9da88e82ada96588ebf2e121cd3ad583...70d6a2fe5d53f46d69578531c1ba63bdd986b189
--
View it on GitLab: https://code.videolan.org/videolan/bitstream/-/compare/fc71ca6d9da88e82ada96588ebf2e121cd3ad583...70d6a2fe5d53f46d69578531c1ba63bdd986b189
You're receiving this email because of your account on code.videolan.org.
VideoLAN code repository instance
More information about the biTStream-devel
mailing list