[x264-devel] Patch - Altivec Quant 4x4x4

Philipp Sibler philipp.sibler at googlemail.com
Sun Sep 29 18:23:53 CEST 2013


Hi x264,

this patch introduces an Altivec version of the 4x4x4 quantization step. 
On the current master branch the 4x4x4 quantization on PowerPC Altivec 
machines defaults to the plain scalar C routine.

Patch was tested on a PowerMac G4 and generates an encoding speedup of 
about 14 percent there.

Cheers,
Philipp
-------------- next part --------------
>From 951350060c745c1c33bf814f87621e2763143a50 Mon Sep 17 00:00:00 2001
From: Philipp Sibler <philipp.sibler at gmail.com>
Date: Sun, 29 Sep 2013 17:48:36 +0200
Subject: [PATCH] Introduced Altivec version of quant 4x4x4

---
 common/ppc/quant.c |   17 +++++++++++++++++
 common/ppc/quant.h |    1 +
 common/quant.c     |    1 +
 3 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/common/ppc/quant.c b/common/ppc/quant.c
index f11938a..0fff340 100644
--- a/common/ppc/quant.c
+++ b/common/ppc/quant.c
@@ -90,6 +90,23 @@ int x264_quant_4x4_altivec( int16_t dct[16], uint16_t mf[16], uint16_t bias[16]
     return vec_any_ne(nz, zero_s16v);
 }
 
+int x264_quant_4x4x4_altivec( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
+{
+    int nza = 0;
+    int nz = 0;
+
+	nz = x264_quant_4x4_altivec(dct[0], mf, bias);
+	nza |= (!!nz);
+	nz = x264_quant_4x4_altivec(dct[1], mf, bias);
+	nza |= (!!nz)<<1;
+	nz = x264_quant_4x4_altivec(dct[2], mf, bias);
+	nza |= (!!nz)<<2;
+	nz = x264_quant_4x4_altivec(dct[3], mf, bias);
+	nza |= (!!nz)<<3;
+	
+    return nza;
+}
+
 // DC quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
 #define QUANT_16_U_DC( idx0, idx1 )                                 \
 {                                                                   \
diff --git a/common/ppc/quant.h b/common/ppc/quant.h
index 1f789c3..2f22d91 100644
--- a/common/ppc/quant.h
+++ b/common/ppc/quant.h
@@ -27,6 +27,7 @@
 #define X264_PPC_QUANT_H
 
 int x264_quant_4x4_altivec( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] );
+int x264_quant_4x4x4_altivec( int16_t dct[4][16], int16_t mf[16], int16_t bias[16] );
 int x264_quant_8x8_altivec( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] );
 
 int x264_quant_4x4_dc_altivec( int16_t dct[16], int mf, int bias );
diff --git a/common/quant.c b/common/quant.c
index 7aa851e..8101535 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -717,6 +717,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
         pf->quant_4x4_dc = x264_quant_4x4_dc_altivec;
         pf->quant_4x4 = x264_quant_4x4_altivec;
         pf->quant_8x8 = x264_quant_8x8_altivec;
+		pf->quant_4x4x4 = x264_quant_4x4x4_altivec;
 
         pf->dequant_4x4 = x264_dequant_4x4_altivec;
         pf->dequant_8x8 = x264_dequant_8x8_altivec;
-- 
1.7.7.4



More information about the x264-devel mailing list