[vlc-commits] access: cdrom: support cdtext non ascii charsets

Francois Cartegnie git at videolan.org
Tue Jun 25 18:48:11 CEST 2019


vlc | branch: master | Francois Cartegnie <fcvlcdev at free.fr> | Mon Jun  3 23:26:49 2019 +0200| [68c791d16bba97b00fa55dcd2a98d6954b666dbc] | committer: Francois Cartegnie

access: cdrom: support cdtext non ascii charsets

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=68c791d16bba97b00fa55dcd2a98d6954b666dbc
---

 NEWS                       |   1 +
 modules/access/vcd/cdrom.c | 250 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 193 insertions(+), 58 deletions(-)

diff --git a/NEWS b/NEWS
index cf75202611..54ca5daf6c 100644
--- a/NEWS
+++ b/NEWS
@@ -41,6 +41,7 @@ Access:
  * Added avaudiocapture module as a replacement for qtsound, which is removed now
  * Audio CD data tracks are now correctly detected and skipped
  * Deprecates Audio CD CDDB lookups in favor of more accurate Musicbrainz
+ * Improved CD-TEXT and added Shift-JIS encoding support
 
 Access output:
  * Added support for the RIST (Reliable Internet Stream Transport) Protocol
diff --git a/modules/access/vcd/cdrom.c b/modules/access/vcd/cdrom.c
index 04b9627fc0..2e6d38a35d 100644
--- a/modules/access/vcd/cdrom.c
+++ b/modules/access/vcd/cdrom.c
@@ -1177,31 +1177,153 @@ static int os2_vcd_open( vlc_object_t * p_this, const char *psz_dev,
 #endif
 
 /* */
-static void astrcat( char **ppsz_dst, char *psz_src )
+#define CDTEXT_MAX_BLOCKS 8
+#define CDTEXT_MAX_TRACKS 0x7f
+#define CDTEXT_PACK_SIZE 18
+#define CDTEXT_PACK_HEADER 4
+#define CDTEXT_PACK_PAYLOAD 12
+#define CDTEXT_TEXT_BUFFER 160 /* arbitrary from the sony docs,
+                                  < theorical max 12 * (256 - 4) */
+enum cdtext_charset_e
 {
-    char *psz_old = *ppsz_dst;
+    CDTEXT_CHARSET_ISO88591 = 0x00,
+    CDTEXT_CHARSET_ASCII7BIT = 0x01,
+    CDTEXT_CHARSET_MSJIS = 0x80,
+};
 
-    if( !psz_old )
+static void CdTextAppendPayload( const char *buffer, size_t i_len,
+                                 enum cdtext_charset_e e_charset, char **ppsz_text )
+{
+    size_t i_alloc = *ppsz_text ? strlen( *ppsz_text ) : 0;
+    size_t i_extend;
+    const char *from_charset;
+    switch( e_charset )
     {
-        *ppsz_dst = strdup( psz_src );
+        case CDTEXT_CHARSET_ASCII7BIT:
+            i_extend = i_len;
+            from_charset = NULL;
+            break;
+        case CDTEXT_CHARSET_ISO88591:
+            i_extend = i_len * 2;
+            from_charset = "ISO-8859-1";
+            break;
+        case CDTEXT_CHARSET_MSJIS:
+            i_extend = i_len * 4;
+            from_charset = "SHIFT-JIS";
+            break;
+        default: /* no known conversion */
+            return;
     }
-    else if( psz_src )
+    size_t i_newsize = i_alloc + i_extend * 2 + 1;
+
+    char *psz_realloc = realloc( *ppsz_text, i_newsize );
+    if( !psz_realloc )
+        return;
+    *ppsz_text = psz_realloc;
+
+    /* copy/convert result */
+    if ( from_charset == NULL )
+    {
+        memcpy( &psz_realloc[i_alloc], buffer, i_len );
+        psz_realloc[i_alloc + i_len] = 0;
+        EnsureUTF8( psz_realloc );
+    }
+    else
     {
-        if( asprintf( ppsz_dst, "%s%s", psz_old, psz_src ) < 0 )
-            *ppsz_dst = psz_old;
+        vlc_iconv_t ic = vlc_iconv_open( "UTF-8", from_charset );
+        if( ic != (vlc_iconv_t) -1 )
+        {
+            const char *psz_in = buffer;
+            size_t i_in = i_len;
+            char *psz_out = &psz_realloc[i_alloc];
+            size_t i_out = i_extend;
+            if( VLC_ICONV_ERR != vlc_iconv( ic, &psz_in, &i_in, &psz_out, &i_out ) )
+                psz_realloc[i_alloc + i_extend - i_out] = 0;
+            vlc_iconv_close( ic );
+        }
+    }
+}
+
+/* Payload length without terminating 0 */
+static size_t CdTextPayloadLength( const char *p_buffer, size_t i_buffer,
+                                   bool b_doublebytes )
+{
+    if( b_doublebytes )
+    {
+        size_t i_len = 0;
+        for( size_t i=0; i<i_buffer/2; i++ )
+        {
+            if(p_buffer[0] == 0 && p_buffer[1] == 0)
+                break;
+            i_len += 2;
+            p_buffer += 2;
+        }
+        return i_len;
+    }
+    else return strnlen( p_buffer, i_buffer );
+}
+
+static void CdTextParsePackText( const uint8_t *p_pack,
+                                 enum cdtext_charset_e e_charset,
+                                 size_t *pi_textbuffer,
+                                 char *textbuffer,
+                                 int *pi_last_track,
+                                 char *pppsz_info[CDTEXT_MAX_TRACKS + 1][0x10] )
+{
+    const uint8_t i_pack_type = p_pack[0];
+    uint8_t i_track = p_pack[1] & 0x7f;
+    const bool b_double_byte = p_pack[3] & 0x80;
+    const uint8_t i_char_position = p_pack[3] & 0x0f;
+
+    if( i_char_position == 0 )
+        *pi_textbuffer = 0; /* not using remains */
+
+    const uint8_t *p_start = &p_pack[CDTEXT_PACK_HEADER];
+    const uint8_t *p_end = p_start + CDTEXT_PACK_PAYLOAD;
+
+    for( const uint8_t *p_readpos = p_start; p_readpos < p_end ; )
+    {
+        size_t i_payload = CdTextPayloadLength( (char *)p_readpos,
+                                                p_end - p_readpos,
+                                                b_double_byte );
+        /* update max used track # */
+        if( i_payload > 0 )
+            *pi_last_track = __MAX( *pi_last_track, i_track );
+
+        /* copy out segment to buffer */
+        size_t i_append = i_payload;
+        if( *pi_textbuffer + i_payload >= CDTEXT_TEXT_BUFFER )
+            i_append = CDTEXT_TEXT_BUFFER - *pi_textbuffer;
+        memcpy( &textbuffer[*pi_textbuffer], p_readpos, i_append );
+        *pi_textbuffer += i_append;
+
+        /* end of pack or just first split ? */
+        if( &p_readpos[i_payload] < p_end ) /* not continuing */
+        {
+            /* commit */
+            if(*pi_textbuffer > 0)
+            {
+                CdTextAppendPayload( textbuffer, *pi_textbuffer, e_charset,
+                                     &pppsz_info[i_track][i_pack_type-0x80] );
+                *pi_textbuffer = 0;
+
+                if(++i_track > CDTEXT_MAX_TRACKS) /* increment for next part of the split */
+                    break;
+            }
+            /* set read pointer for next track in same pack */
+            p_readpos = p_readpos + i_payload + (b_double_byte ? 2 : 1);
+        }
         else
-            free( psz_old );
+        {
+            p_readpos = p_end;
+        }
     }
 }
 
-/* */
-#define CDTEXT_PACK_SIZE 18
-#define CDTEXT_PACK_HEADER 4
-#define CDTEXT_PACK_PAYLOAD 12
 static int CdTextParse( vlc_meta_t ***ppp_tracks, int *pi_tracks,
                         const uint8_t *p_buffer, int i_buffer )
 {
-    char *pppsz_info[128][0x10];
+    char *pppsz_info[CDTEXT_MAX_TRACKS + 1][0x10];
     int i_track_last = -1;
     if( i_buffer < 4 )
         return -1;
@@ -1237,49 +1359,65 @@ static int CdTextParse( vlc_meta_t ***ppp_tracks, int *pi_tracks,
 
     memset( pppsz_info, 0, sizeof(pppsz_info) );
 
-    for( int i = 0; i < i_buffer/CDTEXT_PACK_SIZE; i++ )
+    enum cdtext_charset_e e_textpackcharset;
+    if( bsznfopayl[0] )
     {
-        const uint8_t *p_block = &p_buffer[CDTEXT_PACK_SIZE*i];
-        char psz_text[CDTEXT_PACK_PAYLOAD+1];
+        e_textpackcharset = bsznfopayl[0][0];
+        /* use superset to fix broken decl */
+        if( e_textpackcharset == CDTEXT_CHARSET_ASCII7BIT )
+            e_textpackcharset = CDTEXT_CHARSET_ISO88591;
+    }
+    else e_textpackcharset = CDTEXT_CHARSET_ASCII7BIT;
 
-        const int i_pack_type = p_block[0];
-        if( i_pack_type < 0x80 || i_pack_type > 0x8f )
-            continue;
-
-        const int i_track_number = (p_block[1] >> 0)&0x7f;
-        const int i_extension_flag = ( p_block[1] >> 7)& 0x01;
-        if( i_extension_flag )
-            continue;
-        const uint8_t i_block_number = (p_pack[3] >> 4) & 0x07;
-        if( i_block_number > 0 )
-            continue;
+    /* capture buffer */
+    char textbuffer[CDTEXT_TEXT_BUFFER];
+    size_t i_textbuffer = 0;
+    uint8_t i_prev_pack_type = 0x00;
 
+    for( int i = 0; i < i_buffer/CDTEXT_PACK_SIZE; i++ )
+    {
+        const uint8_t *p_pack = &p_buffer[CDTEXT_PACK_SIZE*i];
+        const uint8_t i_pack_type = p_pack[0];
         //const int i_sequence_number = p_block[2];
-        //const int i_charater_position = (p_block[3] >> 0) &0x0f;
-        //const int i_block_number = (p_block[3] >> 4) &0x07;
-        /* TODO unicode support
-         * I need a sample */
-        //const int i_unicode = ( p_block[3] >> 7)&0x01;
+        const uint8_t i_block_number = (p_pack[3] >> 4) & 0x07;
         //const int i_crc = (p_block[4+12] << 8) | (p_block[4+13] << 0);
 
-        /* */
-        memcpy( psz_text, &p_block[CDTEXT_PACK_HEADER], CDTEXT_PACK_PAYLOAD );
-        psz_text[CDTEXT_PACK_PAYLOAD] = '\0';
+        /* non flushed text buffer */
+        if(i_textbuffer && i_pack_type != i_prev_pack_type)
+            i_textbuffer = 0;
+        i_prev_pack_type = i_pack_type;
+
+        uint8_t i_track = p_pack[1] & 0x7f;
+        if( i_track > CDTEXT_MAX_TRACKS ||
+            (p_pack[1] & 0x80) /* extension flag */ ||
+            i_block_number > 0 /* support only first language */
+           )
+        {
+            i_prev_pack_type = 0x00;
+            continue;
+        }
 
         /* */
-        int i_track =  i_track_number;
-        char *psz_track = &psz_text[0];
-        while( i_track <= 127 && psz_track < &psz_text[CDTEXT_PACK_PAYLOAD] )
+        switch( i_pack_type )
         {
-            //fprintf( stderr, "t=%d psz_track=%p end=%p", i_track, (void *)psz_track, (void *)&psz_text[12] );
-            if( *psz_track )
+            case 0x80:
+            case 0x81:
+            case 0x85:
+            case 0x87:
             {
-                astrcat( &pppsz_info[i_track][i_pack_type-0x80], psz_track );
-                i_track_last = __MAX( i_track_last, i_track );
+                CdTextParsePackText( p_pack, e_textpackcharset,
+                                     &i_textbuffer, textbuffer,
+                                     &i_track_last, pppsz_info );
+                break;
             }
-
-            i_track++;
-            psz_track += 1 + strlen(psz_track);
+            case 0x82:
+            case 0x83:
+            case 0x84:
+            case 0x86:
+            case 0x8d:
+            case 0x8e:
+            default:
+                continue;
         }
     }
 
@@ -1295,10 +1433,6 @@ static int CdTextParse( vlc_meta_t ***ppp_tracks, int *pi_tracks,
         for( int i = 0; i <= i_track_last; i++ )
         {
             /* */
-            if( pppsz_info[i][j] )
-                EnsureUTF8( pppsz_info[i][j] );
-
-            /* */
             const char *psz_default = pppsz_info[0][j];
             const char *psz_value = pppsz_info[i][j];
 
@@ -1311,9 +1445,9 @@ static int CdTextParse( vlc_meta_t ***ppp_tracks, int *pi_tracks,
                 if( !p_track )
                     continue;
             }
-            switch( j )
+            switch( 0x80 + j )
             {
-            case 0x00: /* Album/Title */
+            case 0x80: /* Album/Title */
                 if( i == 0 )
                 {
                     vlc_meta_SetAlbum( p_track, psz_value );
@@ -1326,23 +1460,23 @@ static int CdTextParse( vlc_meta_t ***ppp_tracks, int *pi_tracks,
                         vlc_meta_SetAlbum( p_track, psz_default );
                 }
                 break;
-            case 0x01: /* Performer */
+            case 0x81: /* Performer */
                 vlc_meta_SetArtist( p_track,
                                     psz_value ? psz_value : psz_default );
                 break;
-            case 0x05: /* Messages */
+            case 0x85: /* Messages */
                 vlc_meta_SetDescription( p_track,
                                          psz_value ? psz_value : psz_default );
                 break;
-            case 0x07: /* Genre */
+            case 0x87: /* Genre */
                 vlc_meta_SetGenre( p_track,
                                    psz_value ? psz_value : psz_default );
                 break;
             /* FIXME unsupported:
-             * 0x02: songwriter
-             * 0x03: composer
-             * 0x04: arrenger
-             * 0x06: disc id */
+             * 0x82: songwriter
+             * 0x83: composer
+             * 0x84: arrenger
+             * 0x86: disc id */
             }
         }
     }



More information about the vlc-commits mailing list