[vlc-commits] [Git][videolan/vlc][master] 5 commits: demux: mkv: add LZO1X decompression code from lzokay

Steve Lhomme (@robUx4) gitlab at videolan.org
Tue Dec 2 09:28:03 UTC 2025



Steve Lhomme pushed to branch master at VideoLAN / VLC


Commits:
235c0fef by Steve Lhomme at 2025-12-02T08:53:08+00:00
demux: mkv: add LZO1X decompression code from lzokay

lzokay [^1] is a C++ MIT-licensed library to (de)compress LZO1X.

[^1]: https://github.com/AxioDL/lzokay

- - - - -
4e82b1e8 by Steve Lhomme at 2025-12-02T08:53:08+00:00
demux: mkv: remove unused lzo1x compression code

- - - - -
1736cc96 by Steve Lhomme at 2025-12-02T08:53:08+00:00
demux: mkv: use standard way to get size_t maximum for lzo1x

- - - - -
5ea45b52 by Steve Lhomme at 2025-12-02T08:53:08+00:00
demux: mkv: use VLC-like 16-bit little endian reader for lzo1x

In C++20 we could use std::endian [^1].

[^1]: https://en.cppreference.com/w/cpp/types/endian.html

- - - - -
8af818ec by Steve Lhomme at 2025-12-02T08:53:08+00:00
demux: mkv: add support for LZO1X decompression via lzokay

lzokay [^1] is a C++ MIT-licensed library to (de)compress LZO1X.

[^1]: https://github.com/AxioDL/lzokay

- - - - -


8 changed files:

- modules/demux/Makefile.am
- modules/demux/meson.build
- + modules/demux/mkv/lzokay.cpp
- + modules/demux/mkv/lzokay.hpp
- modules/demux/mkv/matroska_segment_parse.cpp
- modules/demux/mkv/mkv.cpp
- modules/demux/mkv/util.cpp
- modules/demux/mkv/util.hpp


Changes:

=====================================
modules/demux/Makefile.am
=====================================
@@ -205,6 +205,7 @@ libmkv_plugin_la_SOURCES = \
 	demux/mkv/chapter_command_script_common.hpp demux/mkv/chapter_command_script_common.cpp \
 	demux/mkv/stream_io_callback.hpp demux/mkv/stream_io_callback.cpp \
 	demux/mkv/vlc_colors.c demux/mkv/vlc_colors.h \
+	demux/mkv/lzokay.cpp demux/mkv/lzokay.hpp \
 	demux/vobsub.h \
 	demux/mkv/mkv.hpp demux/mkv/mkv.cpp \
         demux/av1_unpack.h codec/webvtt/helpers.h \


=====================================
modules/demux/meson.build
=====================================
@@ -293,6 +293,7 @@ if libebml_dep.found() and libmatroska_dep.found()
             'mkv/chapter_command_script.cpp',
             'mkv/chapter_command_script_common.cpp',
             'mkv/stream_io_callback.cpp',
+            'mkv/lzokay.cpp',
             'mkv/vlc_colors.c',
             'mp4/libmp4.c',
             '../packetizer/dts_header.c',


=====================================
modules/demux/mkv/lzokay.cpp
=====================================
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2018 Jack Andersen
+ * SPDX-License-Identifier: MIT
+ * https://github.com/AxioDL/lzokay
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "lzokay.hpp"
+#include <cstring>
+#include <limits>
+
+/*
+ * Based on documentation from the Linux sources: Documentation/lzo.txt
+ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/lzo.txt
+ */
+
+namespace lzokay {
+
+static inline uint16_t get_le16(const void *p)
+{
+  uint16_t val;
+
+  memcpy (&val, p, sizeof (val));
+#ifdef WORDS_BIGENDIAN
+  val = (val << 8) | (val >> 8);
+#endif
+  return val;
+}
+
+constexpr std::size_t Max255Count = std::numeric_limits<size_t>::max() / 255 - 2;
+
+#define NEEDS_IN(count) \
+  if (inp + (count) > inp_end) { \
+    dst_size = outp - dst; \
+    return EResult::InputOverrun; \
+  }
+
+#define NEEDS_OUT(count) \
+  if (outp + (count) > outp_end) { \
+    dst_size = outp - dst; \
+    return EResult::OutputOverrun; \
+  }
+
+#define CONSUME_ZERO_BYTE_LENGTH \
+  std::size_t offset; \
+  { \
+    const uint8_t *old_inp = inp; \
+    while (*inp == 0) ++inp; \
+    offset = inp - old_inp; \
+    if (offset > Max255Count) { \
+      dst_size = outp - dst; \
+      return EResult::Error; \
+    } \
+  }
+
+// constexpr uint32_t M1Marker = 0x0;
+// constexpr uint32_t M2Marker = 0x40;
+constexpr uint32_t M3Marker = 0x20;
+constexpr uint32_t M4Marker = 0x10;
+
+EResult decompress(const uint8_t* src, std::size_t src_size,
+                   uint8_t* dst, std::size_t init_dst_size,
+                   std::size_t& dst_size) {
+  dst_size = init_dst_size;
+
+  if (src_size < 3) {
+    dst_size = 0;
+    return EResult::InputOverrun;
+  }
+
+  const uint8_t* inp = src;
+  const uint8_t* inp_end = src + src_size;
+  uint8_t* outp = dst;
+  uint8_t* outp_end = dst + dst_size;
+  uint8_t* lbcur;
+  std::size_t lblen;
+  std::size_t state = 0;
+  std::size_t nstate = 0;
+
+  /* First byte encoding */
+  if (*inp >= 22) {
+    /* 22..255 : copy literal string
+     *           length = (byte - 17) = 4..238
+     *           state = 4 [ don't copy extra literals ]
+     *           skip byte
+     */
+    std::size_t len = *inp++ - uint8_t(17);
+    NEEDS_IN(len)
+    NEEDS_OUT(len)
+    for (std::size_t i = 0; i < len; ++i)
+      *outp++ = *inp++;
+    state = 4;
+  } else if (*inp >= 18) {
+    /* 18..21 : copy 0..3 literals
+     *          state = (byte - 17) = 0..3  [ copy <state> literals ]
+     *          skip byte
+     */
+    nstate = *inp++ - uint8_t(17);
+    state = nstate;
+    NEEDS_IN(nstate)
+    NEEDS_OUT(nstate)
+    for (std::size_t i = 0; i < nstate; ++i)
+      *outp++ = *inp++;
+  }
+  /* 0..17 : follow regular instruction encoding, see below. It is worth
+   *         noting that codes 16 and 17 will represent a block copy from
+   *         the dictionary which is empty, and that they will always be
+   *         invalid at this place.
+   */
+
+  while (true) {
+    NEEDS_IN(1)
+    uint8_t inst = *inp++;
+    if (inst & 0xC0) {
+      /* [M2]
+       * 1 L L D D D S S  (128..255)
+       *   Copy 5-8 bytes from block within 2kB distance
+       *   state = S (copy S literals after this block)
+       *   length = 5 + L
+       * Always followed by exactly one byte : H H H H H H H H
+       *   distance = (H << 3) + D + 1
+       *
+       * 0 1 L D D D S S  (64..127)
+       *   Copy 3-4 bytes from block within 2kB distance
+       *   state = S (copy S literals after this block)
+       *   length = 3 + L
+       * Always followed by exactly one byte : H H H H H H H H
+       *   distance = (H << 3) + D + 1
+       */
+      NEEDS_IN(1)
+      lbcur = outp - ((*inp++ << 3) + ((inst >> 2) & 0x7) + 1);
+      lblen = std::size_t(inst >> 5) + 1;
+      nstate = inst & uint8_t(0x3);
+    } else if (inst & M3Marker) {
+      /* [M3]
+       * 0 0 1 L L L L L  (32..63)
+       *   Copy of small block within 16kB distance (preferably less than 34B)
+       *   length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
+       * Always followed by exactly one LE16 :  D D D D D D D D : D D D D D D S S
+       *   distance = D + 1
+       *   state = S (copy S literals after this block)
+       */
+      lblen = std::size_t(inst & uint8_t(0x1f)) + 2;
+      if (lblen == 2) {
+        CONSUME_ZERO_BYTE_LENGTH
+        NEEDS_IN(1)
+        lblen += offset * 255 + 31 + *inp++;
+      }
+      NEEDS_IN(2)
+      nstate = get_le16(inp);
+      inp += 2;
+      lbcur = outp - ((nstate >> 2) + 1);
+      nstate &= 0x3;
+    } else if (inst & M4Marker) {
+      /* [M4]
+       * 0 0 0 1 H L L L  (16..31)
+       *   Copy of a block within 16..48kB distance (preferably less than 10B)
+       *   length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte)
+       * Always followed by exactly one LE16 :  D D D D D D D D : D D D D D D S S
+       *   distance = 16384 + (H << 14) + D
+       *   state = S (copy S literals after this block)
+       *   End of stream is reached if distance == 16384
+       */
+      lblen = std::size_t(inst & uint8_t(0x7)) + 2;
+      if (lblen == 2) {
+        CONSUME_ZERO_BYTE_LENGTH
+        NEEDS_IN(1)
+        lblen += offset * 255 + 7 + *inp++;
+      }
+      NEEDS_IN(2)
+      nstate = get_le16(inp);
+      inp += 2;
+      lbcur = outp - (((inst & 0x8) << 11) + (nstate >> 2));
+      nstate &= 0x3;
+      if (lbcur == outp)
+        break; /* Stream finished */
+      lbcur -= 16384;
+    } else {
+      /* [M1] Depends on the number of literals copied by the last instruction. */
+      if (state == 0) {
+        /* If last instruction did not copy any literal (state == 0), this
+         * encoding will be a copy of 4 or more literal, and must be interpreted
+         * like this :
+         *
+         *    0 0 0 0 L L L L  (0..15)  : copy long literal string
+         *    length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte)
+         *    state = 4  (no extra literals are copied)
+         */
+        std::size_t len = inst + 3;
+        if (len == 3) {
+          CONSUME_ZERO_BYTE_LENGTH
+          NEEDS_IN(1)
+          len += offset * 255 + 15 + *inp++;
+        }
+        /* copy_literal_run */
+        NEEDS_IN(len)
+        NEEDS_OUT(len)
+        for (std::size_t i = 0; i < len; ++i)
+          *outp++ = *inp++;
+        state = 4;
+        continue;
+      } else if (state != 4) {
+        /* If last instruction used to copy between 1 to 3 literals (encoded in
+         * the instruction's opcode or distance), the instruction is a copy of a
+         * 2-byte block from the dictionary within a 1kB distance. It is worth
+         * noting that this instruction provides little savings since it uses 2
+         * bytes to encode a copy of 2 other bytes but it encodes the number of
+         * following literals for free. It must be interpreted like this :
+         *
+         *    0 0 0 0 D D S S  (0..15)  : copy 2 bytes from <= 1kB distance
+         *    length = 2
+         *    state = S (copy S literals after this block)
+         *  Always followed by exactly one byte : H H H H H H H H
+         *    distance = (H << 2) + D + 1
+         */
+        NEEDS_IN(1)
+        nstate = inst & uint8_t(0x3);
+        lbcur = outp - ((inst >> 2) + (*inp++ << 2) + 1);
+        lblen = 2;
+      } else {
+        /* If last instruction used to copy 4 or more literals (as detected by
+         * state == 4), the instruction becomes a copy of a 3-byte block from the
+         * dictionary from a 2..3kB distance, and must be interpreted like this :
+         *
+         *    0 0 0 0 D D S S  (0..15)  : copy 3 bytes from 2..3 kB distance
+         *    length = 3
+         *    state = S (copy S literals after this block)
+         *  Always followed by exactly one byte : H H H H H H H H
+         *    distance = (H << 2) + D + 2049
+         */
+        NEEDS_IN(1)
+        nstate = inst & uint8_t(0x3);
+        lbcur = outp - ((inst >> 2) + (*inp++ << 2) + 2049);
+        lblen = 3;
+      }
+    }
+    if (lbcur < dst) {
+      dst_size = outp - dst;
+      return EResult::LookbehindOverrun;
+    }
+    NEEDS_IN(nstate)
+    NEEDS_OUT(lblen + nstate)
+    /* Copy lookbehind */
+    for (std::size_t i = 0; i < lblen; ++i)
+      *outp++ = *lbcur++;
+    state = nstate;
+    /* Copy literal */
+    for (std::size_t i = 0; i < nstate; ++i)
+      *outp++ = *inp++;
+  }
+
+  dst_size = outp - dst;
+  if (lblen != 3) /* Ensure terminating M4 was encountered */
+    return EResult::Error;
+  if (inp == inp_end)
+    return EResult::Success;
+  else if (inp < inp_end)
+    return EResult::InputNotConsumed;
+  else
+    return EResult::InputOverrun;
+}
+
+}


=====================================
modules/demux/mkv/lzokay.hpp
=====================================
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018 Jack Andersen
+ * SPDX-License-Identifier: MIT
+ * https://github.com/AxioDL/lzokay
+ */
+
+#pragma once
+#include <cstddef>
+#include <cstdint>
+
+namespace lzokay {
+
+enum class EResult {
+  LookbehindOverrun = -4,
+  OutputOverrun = -3,
+  InputOverrun = -2,
+  Error = -1,
+  Success = 0,
+  InputNotConsumed = 1,
+};
+
+
+EResult decompress(const uint8_t* src, std::size_t src_size,
+                   uint8_t* dst, std::size_t dst_size,
+                   std::size_t& out_size);
+
+}


=====================================
modules/demux/mkv/matroska_segment_parse.cpp
=====================================
@@ -463,6 +463,7 @@ void matroska_segment_c::ParseTrackEntry( const KaxTrackEntry *m )
             vars.tk->i_compression_type = static_cast<uint32_t>( compalg );
             debug( vars, "Compression Algorithm: %i", vars.tk->i_compression_type );
             if ( ( vars.tk->i_compression_type != MATROSKA_COMPRESSION_ZLIB ) &&
+                 ( vars.tk->i_compression_type != MATROSKA_COMPRESSION_LZOX ) &&
                  ( vars.tk->i_compression_type != MATROSKA_COMPRESSION_HEADER ) )
             {
                 msg_Err( vars.p_demuxer, "Track Compression method %d not supported", vars.tk->i_compression_type );
@@ -1091,6 +1092,15 @@ void matroska_segment_c::ParseTrackEntry( const KaxTrackEntry *m )
             return;
         }
 #endif
+        if( p_track->i_compression_type == MATROSKA_COMPRESSION_LZOX &&
+            p_track->i_encoding_scope & MATROSKA_ENCODING_SCOPE_PRIVATE &&
+            p_track->i_extra_data && p_track->p_extra_data &&
+            !lzo1x_decompress_extra( &sys.demuxer, *p_track ) )
+        {
+            msg_Err(&sys.demuxer, "Couldn't handle the track %u compression", p_track->i_number );
+            delete p_track;
+            return;
+        }
         if( !TrackInit( p_track ) )
         {
             msg_Err(&sys.demuxer, "Couldn't init track %u", p_track->i_number );


=====================================
modules/demux/mkv/mkv.cpp
=====================================
@@ -624,6 +624,14 @@ static void BlockDecode( demux_t *p_demux, KaxBlock *block, KaxSimpleBlock *simp
         }
         else
 #endif
+        if( track.i_compression_type == MATROSKA_COMPRESSION_LZOX &&
+            track.i_encoding_scope & MATROSKA_ENCODING_SCOPE_ALL_FRAMES )
+        {
+            p_block = block_lzo1x_decompress( VLC_OBJECT(p_demux), p_block );
+            if( p_block == NULL )
+                break;
+        }
+        else
         if( track.i_compression_type == MATROSKA_COMPRESSION_HEADER &&
             track.i_encoding_scope & MATROSKA_ENCODING_SCOPE_ALL_FRAMES )
         {


=====================================
modules/demux/mkv/util.cpp
=====================================
@@ -26,6 +26,8 @@
 #include "virtual_segment.hpp"
 #include "../../codec/webvtt/helpers.h"
 
+#include "lzokay.hpp"
+
 namespace mkv {
 
 /*****************************************************************************
@@ -151,6 +153,76 @@ block_t *block_zlib_decompress( vlc_object_t *p_this, block_t *p_in_block ) {
 }
 #endif
 
+bool lzo1x_decompress_extra( demux_t * p_demux, mkv_track_t & tk )
+{
+    lzokay::EResult result;
+    size_t n = 0, total_out = 0;
+    uint8_t * p_new_extra = nullptr;
+
+    msg_Dbg(p_demux,"Inflating private data");
+
+    do
+    {
+        n++;
+        void *alloc = realloc(p_new_extra, n * 1024);
+        if( alloc == nullptr )
+        {
+            msg_Err( p_demux, "Couldn't allocate buffer to inflate data, ignore track %u",
+                      tk.i_number );
+            free(p_new_extra);
+            return false;
+        }
+
+        p_new_extra = static_cast<uint8_t *>( alloc );
+        result = lzokay::decompress( tk.p_extra_data, tk.i_extra_data,
+                                     p_new_extra, n * 1024, total_out );
+    }
+    while ( result == lzokay::EResult::OutputOverrun );
+
+    if( result != lzokay::EResult::Success )
+    {
+        msg_Err( p_demux, "LZO1X private data decompression failed. Result: %d", (int)result );
+        free(p_new_extra);
+        return false;
+    }
+
+    free( tk.p_extra_data );
+    tk.i_extra_data = total_out;
+    tk.p_extra_data = p_new_extra;
+
+    return true;
+}
+
+block_t *block_lzo1x_decompress( vlc_object_t *p_this, block_t *p_in_block ) {
+    lzokay::EResult result;
+    size_t dstsize = 0, n;
+    block_t *p_block;
+
+    n = 0;
+    p_block = block_Alloc( 0 );
+    do
+    {
+        n++;
+        p_block = block_Realloc( p_block, 0, n * 1000 );
+        result = lzokay::decompress( p_in_block->p_buffer, p_in_block->i_buffer,
+                                     p_block->p_buffer, p_block->i_buffer, dstsize );
+    }
+    while( result == lzokay::EResult::OutputOverrun );
+
+    if( result != lzokay::EResult::Success )
+    {
+        msg_Err( p_this, "LZO1X decompression failed. Result: %d", (int)result );
+        block_Release( p_block );
+        return p_in_block;
+    }
+
+    p_block = block_Realloc( p_block, 0, dstsize );
+    p_block->i_buffer = dstsize;
+    block_Release( p_in_block );
+
+    return p_block;
+}
+
 /* Utility function for BlockDecode */
 block_t *MemToBlock( uint8_t *p_mem, size_t i_mem, size_t offset)
 {


=====================================
modules/demux/mkv/util.hpp
=====================================
@@ -30,6 +30,8 @@ namespace mkv {
 int32_t zlib_decompress_extra( demux_t * p_demux, mkv_track_t & tk );
 block_t *block_zlib_decompress( vlc_object_t *p_this, block_t *p_in_block );
 #endif
+bool lzo1x_decompress_extra( demux_t * p_demux, mkv_track_t & tk );
+block_t *block_lzo1x_decompress( vlc_object_t *p_this, block_t *p_in_block );
 
 block_t *MemToBlock( uint8_t *p_mem, size_t i_mem, size_t offset);
 void handle_real_audio(demux_t * p_demux, mkv_track_t * p_tk, block_t * p_blk, vlc_tick_t i_pts);



View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/1f75ed8f520d134be4f0799eedc5baec2a0c4f0b...8af818eca9e0e33e47bd222e18f2365e415f88af

-- 
View it on GitLab: https://code.videolan.org/videolan/vlc/-/compare/1f75ed8f520d134be4f0799eedc5baec2a0c4f0b...8af818eca9e0e33e47bd222e18f2365e415f88af
You're receiving this email because of your account on code.videolan.org.


VideoLAN code repository instance


More information about the vlc-commits mailing list