[x265] [PATCH] asm: assembly code for IntraPred_DC[8x8]

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Thu Nov 21 11:41:26 CET 2013


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385029557 -19800
#      Thu Nov 21 15:55:57 2013 +0530
# Node ID 5768141583e8a6a828bb1837a789b9efd2f0493c
# Parent  db1151bb4974f1288745ba39dfd6e1838113feb7
asm: assembly code for IntraPred_DC[8x8]

diff -r db1151bb4974 -r 5768141583e8 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Nov 20 18:36:04 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp	Thu Nov 21 15:55:57 2013 +0530
@@ -656,6 +656,7 @@
         p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4;
         p.quant = x265_quant_sse4;
         p.intra_pred_dc[BLOCK_4x4] = x265_intra_pred_dc4_sse4;
+        p.intra_pred_dc[BLOCK_8x8] = x265_intra_pred_dc8_sse4;
     }
     if (cpuMask & X265_CPU_AVX)
     {
diff -r db1151bb4974 -r 5768141583e8 source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm	Wed Nov 20 18:36:04 2013 -0600
+++ b/source/common/x86/intrapred.asm	Thu Nov 21 15:55:57 2013 +0530
@@ -94,3 +94,82 @@
 .end:
 
     RET
+
+
+;-------------------------------------------------------------------------------------------
+; void intra_pred_dc(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter)
+;-------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_dc8, 5, 7, 3, above, left, dst, dstStride, filter
+
+    pxor            m0,            m0
+    movh            m1,            [r0]
+    movh            m2,            [r1]
+    punpcklqdq      m1,            m2
+    psadbw          m1,            m0
+    pshufd          m2,            m1, 2
+    paddw           m1,            m2
+
+    movd            r5d,           m1
+    add             r5d,           8
+    shr             r5d,           4     ; sum = sum / 16
+    movd            m1,            r5d
+    pshufb          m1,            m0    ; m1 = byte [dc_val ...]
+
+    test            r4d,           r4d
+
+    ; store DC 8x8
+    mov             r6,            r2
+    movh            [r2],          m1
+    movh            [r2 + r3],     m1
+    lea             r2,            [r2 + 2 * r3]
+    movh            [r2],          m1
+    movh            [r2 + r3],     m1
+    lea             r2,            [r2 + 2 * r3]
+    movh            [r2],          m1
+    movh            [r2 + r3],     m1
+    lea             r2,            [r2 + 2 * r3]
+    movh            [r2],          m1
+    movh            [r2 + r3],     m1
+
+    ; Do DC Filter
+    jz              .end
+    lea             r4d,           [r5d * 2 + 2]  ; r4d = DC * 2 + 2
+    add             r5d,           r4d            ; r5d = DC * 3 + 2
+    movd            m1,            r5d
+    pshuflw         m1,            m1, 0          ; m1 = pixDCx3
+    pshufd          m1,            m1, 0
+
+    ; filter top
+    pmovzxbw        m2,            [r0]
+    paddw           m2,            m1
+    psraw           m2,            2
+    packuswb        m2,            m2
+    movh            [r6],          m2
+
+    ; filter top-left
+    movzx           r0d, byte      [r0]
+    add             r4d,           r0d
+    movzx           r0d, byte      [r1]
+    add             r0d,           r4d
+    shr             r0d,           2
+    mov             [r6],          r0b
+
+    ; filter left
+    add             r6,            r3
+    pmovzxbw        m2,            [r1 + 1]
+    paddw           m2,            m1
+    psraw           m2,            2
+    packuswb        m2,            m2
+    pextrb          [r6],          m2, 0
+    pextrb          [r6 + r3],     m2, 1
+    pextrb          [r6 + 2 * r3], m2, 2
+    lea             r6,            [r6 + r3 * 2]
+    pextrb          [r6 + r3],     m2, 3
+    pextrb          [r6 + 2 * r3], m2, 4
+    pextrb          [r6 + 4 * r3], m2, 6
+    lea             r3,            [r3 * 3]
+    pextrb          [r6 + r3],     m2, 5
+
+.end
+    RET
diff -r db1151bb4974 -r 5768141583e8 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Wed Nov 20 18:36:04 2013 -0600
+++ b/source/common/x86/intrapred.h	Thu Nov 21 15:55:57 2013 +0530
@@ -27,5 +27,6 @@
 #define X265_INTRAPRED_H
 
 void x265_intra_pred_dc4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
+void x265_intra_pred_dc8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
 
 #endif // ifndef X265_INTRAPRED_H


More information about the x265-devel mailing list