[x265] [PATCH 1 of 2] psy-rd: Ported hadamard_ac transform functions to x265 for uniform blocksizes(8, 16, 32, 64)

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Mon Mar 17 07:31:53 CET 2014


# HG changeset patch
# User Sumalatha Polureddy
# Date 1395035947 -19800
# Node ID 6f38933d38248001389f1c5f90f97dfce3a4ae2c
# Parent  ba3ddc1848fff0fb8e96ca8f0453dd84171f213f
psy-rd: Ported hadamard_ac transform functions to x265 for uniform blocksizes(8,16,32,64)

diff -r ba3ddc1848ff -r 6f38933d3824 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Fri Mar 14 12:56:01 2014 -0500
+++ b/source/common/pixel.cpp	Mon Mar 17 11:29:07 2014 +0530
@@ -852,6 +852,70 @@
         dst  += dstStride;
     }
 }
+
+uint64_t pixelHadamardAc8x8(pixel *pix, intptr_t stride)
+{
+    sum2_t tmp[32];
+    sum2_t a0, a1, a2, a3, dc;
+    sum2_t sum4 = 0, sum8 = 0;
+    for (int i = 0; i < 8; i++, pix+=stride)
+    {
+        sum2_t *t = tmp + (i & 3) + (i & 4) * 4;
+        a0 = (pix[0] + pix[1]) + ((sum2_t)(pix[0] - pix[1]) << BITS_PER_SUM);
+        a1 = (pix[2] + pix[3]) + ((sum2_t)(pix[2] - pix[3]) << BITS_PER_SUM);
+        t[0] = a0 + a1;
+        t[4] = a0 - a1;
+        a2 = (pix[4] + pix[5]) + ((sum2_t)(pix[4] - pix[5]) << BITS_PER_SUM);
+        a3 = (pix[6] + pix[7]) + ((sum2_t)(pix[6] - pix[7]) << BITS_PER_SUM);
+        t[8] = a2 + a3;
+        t[12] = a2 - a3;
+    }
+    for (int i = 0; i < 8; i++)
+    {
+        HADAMARD4(a0, a1, a2, a3, tmp[i * 4 + 0], tmp[i * 4 + 1], tmp[i * 4 + 2], tmp[i * 4 + 3]);
+        tmp[i * 4 + 0] = a0;
+        tmp[i * 4 + 1] = a1;
+        tmp[i * 4 + 2] = a2;
+        tmp[i * 4 + 3] = a3;
+        sum4 += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
+    }
+    for (int i = 0; i < 8; i++)
+    {
+        HADAMARD4(a0, a1, a2, a3, tmp[i], tmp[8 + i], tmp[16 + i], tmp[24 + i]);
+        sum8 += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
+    }
+    dc = (sum_t)(tmp[0] + tmp[8] + tmp[16] + tmp[24]);
+    sum4 = (sum_t)sum4 + (sum4 >> BITS_PER_SUM) - dc;
+    sum8 = (sum_t)sum8 + (sum8 >> BITS_PER_SUM) - dc;
+    return ((uint64_t)sum8 << 32) + sum4;
+}
+
+uint64_t pixelHadamardAc16x16(pixel *pix, intptr_t stride)
+{
+    uint64_t sum = pixelHadamardAc8x8(pix, stride);
+    sum += pixelHadamardAc8x8(pix + 8, stride);
+    sum +=pixelHadamardAc8x8(pix + 8 * stride, stride);
+    sum += pixelHadamardAc8x8(pix + 8 * stride + 8, stride);
+    return ((sum >> 34) << 32) + ((uint32_t)sum >> 1);
+}
+uint64_t pixelHadamardAc32x32(pixel *pix, intptr_t stride)
+{
+    uint64_t sum = pixelHadamardAc16x16(pix, stride);
+    sum += pixelHadamardAc16x16(pix + 16, stride);
+    sum += pixelHadamardAc16x16(pix + 16 * stride, stride);
+    sum += pixelHadamardAc16x16(pix + 16 * stride + 16, stride);
+    return ((sum >> 34) << 32) + ((uint32_t)sum >> 1);
+}
+
+uint64_t pixelHadamardAc64x64(pixel *pix, intptr_t stride)
+{
+    uint64_t sum = pixelHadamardAc32x32(pix, stride);
+    sum += pixelHadamardAc32x32(pix + 32, stride);
+    sum += pixelHadamardAc32x32(pix + 32 * stride, stride);
+    sum += pixelHadamardAc32x32(pix + 32 * stride + 16, stride);
+    return ((sum >> 34) << 32) + ((uint32_t)sum >> 1);
+}
+
 }  // end anonymous namespace
 
 namespace x265 {
@@ -1099,5 +1163,11 @@
     p.var[BLOCK_32x32] = pixel_var<32>;
     p.var[BLOCK_64x64] = pixel_var<64>;
     p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
+
+    p.pixelHadamardAc[LUMA_8x8] = pixelHadamardAc8x8;
+    p.pixelHadamardAc[LUMA_16x16] = pixelHadamardAc16x16;
+    p.pixelHadamardAc[LUMA_32x32] = pixelHadamardAc32x32;
+    p.pixelHadamardAc[LUMA_64x64] = pixelHadamardAc64x64;
+
 }
 }
diff -r ba3ddc1848ff -r 6f38933d3824 source/common/primitives.h
--- a/source/common/primitives.h	Fri Mar 14 12:56:01 2014 -0500
+++ b/source/common/primitives.h	Mon Mar 17 11:29:07 2014 +0530
@@ -163,6 +163,7 @@
 typedef void (*addAvg_t)(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
 
 typedef void (*saoCuOrgE0_t)(pixel * rec, int8_t * offsetEo, int lcuWidth, int8_t signLeft);
+typedef uint64_t (*pixelHadamardAc_t)(pixel *pix, intptr_t stride);
 
 /* Define a structure containing function pointers to optimized encoder
  * primitives.  Each pointer can reference either an assembly routine,
@@ -234,6 +235,8 @@
     // sao primitives
     saoCuOrgE0_t      saoCuOrgE0;
 
+    pixelHadamardAc_t pixelHadamardAc[NUM_LUMA_PARTITIONS];
+
     struct
     {
         filter_pp_t     filter_vpp[NUM_LUMA_PARTITIONS];


More information about the x265-devel mailing list