[dvblast-devel] [PATCH 6/9] Remove invalid UTF-8 characters from text output

Georgi Chorbadzhiyski gf at unixsol.org
Sat Jan 20 13:42:54 CET 2018


Sometimes broadcasters put "strange" characters in DVB strings and
when the strings are converted to UTF-8 the output contains
invalid characters.

In order to avoid this problem, this commit instructs iconv
library to ignore characters that can not be converted to
UTF-8 by adding switching the default output charset from
UTF-8 to UTF-8//IGNORE to the output charset.
---
 NEWS         | 1 +
 dvblast.c    | 8 ++++----
 dvblastctl.c | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index 92b3926..d6da445 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,7 @@ Changes between 3.2 and -next
 -----------------------------
   * Add support for getting EIT present/following for chosen service in dvblastctl
   * Add support for getting EIT schedule for chosen service in dvblastctl
+  * Switch default string charset to UTF-8//IGNORE
 
 Changes between 3.1 and 3.2:
 ----------------------------
diff --git a/dvblast.c b/dvblast.c
index e0403f7..8424d05 100644
--- a/dvblast.c
+++ b/dvblast.c
@@ -92,7 +92,7 @@ int b_select_pmts = 0;
 int b_random_tsid = 0;
 char *psz_udp_src = NULL;
 int i_asi_adapter = 0;
-const char *psz_native_charset = "UTF-8";
+const char *psz_native_charset = "UTF-8//IGNORE";
 print_type_t i_print_type = PRINT_TEXT;
 bool b_print_enabled = false;
 FILE *print_fh;
@@ -114,7 +114,7 @@ static mtime_t i_latency_global = DEFAULT_OUTPUT_LATENCY;
 static mtime_t i_retention_global = DEFAULT_MAX_RETENTION;
 static int i_ttl_global = 64;
 
-static const char *psz_dvb_charset = "UTF-8";
+static const char *psz_dvb_charset = "UTF-8//IGNORE";
 static iconv_t conf_iconv = (iconv_t)-1;
 static uint16_t i_network_id = 0xffff;
 static dvb_string_t network_name;
@@ -697,8 +697,8 @@ void usage()
     msg_Raw( NULL, "Misc:" );
     msg_Raw( NULL, "  -h --help             display this full help" );
     msg_Raw( NULL, "  -i --priority <RT priority>" );
-    msg_Raw( NULL, "  -j --system-charset   character set used for printing messages (default UTF-8)" );
-    msg_Raw( NULL, "  -J --dvb-charset      character set used in output DVB tables (default UTF-8)" );
+    msg_Raw( NULL, "  -j --system-charset   character set used for printing messages (default UTF-8//IGNORE)" );
+    msg_Raw( NULL, "  -J --dvb-charset      character set used in output DVB tables (default UTF-8//IGNORE)" );
     msg_Raw( NULL, "  -l --logger           use syslog for logging messages instead of stderr" );
     msg_Raw( NULL, "  -g --logger-ident     program name that will be used in syslog messages" );
     msg_Raw( NULL, "  -x --print            print interesting events on stdout in a given format" );
diff --git a/dvblastctl.c b/dvblastctl.c
index d1386e8..62a379c 100644
--- a/dvblastctl.c
+++ b/dvblastctl.c
@@ -116,7 +116,7 @@ static char *iconv_append_null(const char *p_string, size_t i_length)
     return psz_string;
 }
 
-const char *psz_native_charset = "UTF-8";
+const char *psz_native_charset = "UTF-8//IGNORE";
 
 char *psi_iconv(void *_unused, const char *psz_encoding,
                   char *p_string, size_t i_length)
-- 
2.14.1



More information about the dvblast-devel mailing list