[x265] [PATCH 13 of 29] intrapred: cleanup intra_pred_dc and intra_pred_planar older code
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Jan 13 08:11:21 CET 2015
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1419400410 -19800
# Wed Dec 24 11:23:30 2014 +0530
# Node ID 558439d703fdc46fe47ce315c33fe982f8269a79
# Parent 44a17e7764a38817869d4a58d6764b14841eb2ec
intrapred: cleanup intra_pred_dc and intra_pred_planar older code
new asm and unit test code is available for these primitives, this older code is no longer used
diff -r 44a17e7764a3 -r 558439d703fd source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Dec 23 19:11:31 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Dec 24 11:23:30 2014 +0530
@@ -1409,10 +1409,6 @@
p.quant = x265_quant_sse4;
p.nquant = x265_nquant_sse4;
p.dequant_normal = x265_dequant_normal_sse4;
- p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4;
- p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4;
- p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4;
- p.intra_pred[0][BLOCK_32x32] = x265_intra_pred_planar32_sse4;
p.intra_pred_new[0][BLOCK_4x4] = x265_intra_pred_planar4_new_sse4;
p.intra_pred_new[0][BLOCK_8x8] = x265_intra_pred_planar8_new_sse4;
@@ -1696,10 +1692,6 @@
p.dequant_normal = x265_dequant_normal_sse4;
p.weight_pp = x265_weight_pp_sse4;
p.weight_sp = x265_weight_sp_sse4;
- p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4;
- p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4;
- p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4;
- p.intra_pred[0][BLOCK_32x32] = x265_intra_pred_planar32_sse4;
p.intra_pred_new[0][BLOCK_4x4] = x265_intra_pred_planar4_new_sse4;
p.intra_pred_new[0][BLOCK_8x8] = x265_intra_pred_planar8_new_sse4;
diff -r 44a17e7764a3 -r 558439d703fd source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Tue Dec 23 19:11:31 2014 +0530
+++ b/source/common/x86/intrapred.h Wed Dec 24 11:23:30 2014 +0530
@@ -26,21 +26,11 @@
#ifndef X265_INTRAPRED_H
#define X265_INTRAPRED_H
-void x265_intra_pred_dc4_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_dc8_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_dc16_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_dc32_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-
void x265_intra_pred_dc4_new_sse4 (pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter);
void x265_intra_pred_dc8_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int filter);
void x265_intra_pred_dc16_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int filter);
void x265_intra_pred_dc32_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int filter);
-void x265_intra_pred_planar4_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
-void x265_intra_pred_planar8_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
-void x265_intra_pred_planar16_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
-void x265_intra_pred_planar32_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
-
void x265_intra_pred_planar4_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int);
void x265_intra_pred_planar8_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int);
void x265_intra_pred_planar16_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int);
diff -r 44a17e7764a3 -r 558439d703fd source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Tue Dec 23 19:11:31 2014 +0530
+++ b/source/common/x86/intrapred16.asm Wed Dec 24 11:23:30 2014 +0530
@@ -448,71 +448,6 @@
jnz .loop
RET
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar4, 4,7,5
- add r2, 2
- add r3, 2
- add r1, r1
- movh m0, [r3] ; topRow[i] = above[i];
- punpcklqdq m0, m0
-
- pxor m1, m1
- movd m2, [r2 + 8] ; bottomLeft = left[4]
- movzx r6d, word [r3 + 8] ; topRight = above[4];
- pshuflw m2, m2, 0
- pshufd m2, m2, 0
-
- psubw m2, m0 ; bottomRow[i] = bottomLeft - topRow[i]
- psllw m0, 2
- punpcklqdq m3, m2, m1
- psubw m0, m3
- paddw m2, m2
-
-%macro COMP_PRED_PLANAR_2ROW 1
- movzx r4d, word [r2 + %1]
- lea r4d, [r4d * 4 + 4]
- movd m3, r4d
- pshuflw m3, m3, 0
-
- movzx r4d, word [r2 + %1 + 2]
- lea r4d, [r4d * 4 + 4]
- movd m4, r4d
- pshuflw m4, m4, 0
- punpcklqdq m3, m4 ; horPred
-
- movzx r4d, word [r2 + %1]
- mov r5d, r6d
- sub r5d, r4d
- movd m4, r5d
- pshuflw m4, m4, 0
-
- movzx r4d, word [r2 + %1 + 2]
- mov r5d, r6d
- sub r5d, r4d
- movd m1, r5d
- pshuflw m1, m1, 0
- punpcklqdq m4, m1 ; rightColumnN
-
- pmullw m4, [multi_2Row]
- paddw m3, m4
- paddw m0, m2
- paddw m3, m0
- psraw m3, 3
-
- movh [r0], m3
- pshufd m3, m3, 0xAE
- movh [r0 + r1], m3
- lea r0, [r0 + 2 * r1]
-%endmacro
-
- COMP_PRED_PLANAR_2ROW 0
- COMP_PRED_PLANAR_2ROW 4
-%undef COMP_PRED_PLANAR_2ROW
- RET
-
;---------------------------------------------------------------------------------------
; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
;---------------------------------------------------------------------------------------
@@ -565,69 +500,6 @@
movh [r0 + r1], m1
RET
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar8, 4,4,7
- add r2, 2
- add r3, 2
- add r1, r1
- movu m1, [r3] ; v_topRow
- movu m2, [r2] ; v_leftColumn
-
- movd m3, [r3 + 16] ; topRight = above[8];
- movd m4, [r2 + 16] ; bottomLeft = left[8];
-
- pshuflw m3, m3, 0
- pshufd m3, m3, 0
- pshuflw m4, m4, 0
- pshufd m4, m4, 0
-
- psubw m4, m1 ; v_bottomRow
- psubw m3, m2 ; v_rightColumn
-
- psllw m1, 3 ; v_topRow
- psllw m2, 3 ; v_leftColumn
-
- paddw m6, m2, [pw_8]
-
-%macro PRED_PLANAR_ROW8 1
- %if (%1 < 4)
- pshuflw m5, m6, 0x55 * %1
- pshufd m5, m5, 0
- pshuflw m2, m3, 0x55 * %1
- pshufd m2, m2, 0
- %else
- pshufhw m5, m6, 0x55 * (%1 - 4)
- pshufd m5, m5, 0xAA
- pshufhw m2, m3, 0x55 * (%1 - 4)
- pshufd m2, m2, 0xAA
- %endif
-
- pmullw m2, [multiL]
- paddw m5, m2
- paddw m1, m4
- paddw m5, m1
- psraw m5, 4
-
- movu [r0], m5
- add r0, r1
-
-%endmacro
-
- PRED_PLANAR_ROW8 0
- PRED_PLANAR_ROW8 1
- PRED_PLANAR_ROW8 2
- PRED_PLANAR_ROW8 3
- PRED_PLANAR_ROW8 4
- PRED_PLANAR_ROW8 5
- PRED_PLANAR_ROW8 6
- PRED_PLANAR_ROW8 7
-
-%undef PRED_PLANAR_ROW8
- RET
-
;---------------------------------------------------------------------------------------
; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
;---------------------------------------------------------------------------------------
@@ -679,187 +551,6 @@
INTRA_PRED_PLANAR8 7
RET
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-%if (BIT_DEPTH == 12)
-
-%if (ARCH_X86_64 == 1)
-cglobal intra_pred_planar16, 4,7,8+3
-%define bottomRow0 m7
-%define bottomRow1 m8
-%define bottomRow2 m9
-%define bottomRow3 m10
-%else
-cglobal intra_pred_planar16, 4,7,8, 0-3*mmsize
-%define bottomRow0 [rsp + 0*mmsize]
-%define bottomRow1 [rsp + 1*mmsize]
-%define bottomRow2 [rsp + 2*mmsize]
-%define bottomRow3 m7
-%endif
-
- add r2, 2
- add r3, 2
- add r1, r1
-
- pxor m0, m0
-
- ; bottomRow
- movzx r4d, word [r2 + 16*2]
- movd m1, r4d
- pshufd m1, m1, 0 ; m1 = bottomLeft
- movu m2, [r3]
- pmovzxwd m3, m2
- punpckhwd m2, m0
- psubd m4, m1, m3
- mova bottomRow0, m4
- psubd m4, m1, m2
- mova bottomRow1, m4
- movu m2, [r3 + 16]
- pmovzxwd m3, m2
- punpckhwd m2, m0
- psubd m4, m1, m3
- mova bottomRow2, m4
- psubd m1, m2
- mova bottomRow3, m1
-
- ; topRow
- pmovzxwd m0, [r3 + 0*8]
- pslld m0, 4
- pmovzxwd m1, [r3 + 1*8]
- pslld m1, 4
- pmovzxwd m2, [r3 + 2*8]
- pslld m2, 4
- pmovzxwd m3, [r3 + 3*8]
- pslld m3, 4
-
- xor r6, r6
-.loopH:
- movzx r4d, word [r2 + r6*2]
- movzx r5d, word [r3 + 16*2] ; r5 = topRight
- sub r5d, r4d
- movd m5, r5d
- pshuflw m5, m5, 0
- pmullw m5, [multiL]
- pmovsxwd m5, m5 ; m5 = rightCol
- add r4d, r4d
- lea r4d, [r4d * 8 + 16]
- movd m4, r4d
- pshufd m4, m4, 0 ; m4 = horPred
- paddd m4, m5
- pshufd m6, m5, 0xFF ; m6 = [4 4 4 4]
-
- ; 0-3
- paddd m0, bottomRow0
- paddd m5, m0, m4
- psrad m5, 5
- packusdw m5, m5
- movh [r0 + 0*8], m5
-
- ; 4-7
- paddd m4, m6
- paddd m1, bottomRow1
- paddd m5, m1, m4
- psrad m5, 5
- packusdw m5, m5
- movh [r0 + 1*8], m5
-
- ; 8-11
- paddd m4, m6
- paddd m2, bottomRow2
- paddd m5, m2, m4
- psrad m5, 5
- packusdw m5, m5
- movh [r0 + 2*8], m5
-
- ; 12-15
- paddd m4, m6
- paddd m3, bottomRow3
- paddd m5, m3, m4
- psrad m5, 5
- packusdw m5, m5
- movh [r0 + 3*8], m5
-
- add r0, r1
- inc r6d
- cmp r6d, 16
- jnz .loopH
- RET
-
-%else ; BIT_DEPTH == 10
-INIT_XMM sse4
-cglobal intra_pred_planar16, 4,6,7
- add r2, 2
- add r3, 2
- add r1, r1
-
- movu m1, [r3] ; topRow[0-7]
- movu m2, [r3 + 16] ; topRow[8-15]
-
- movd m3, [r2 + 32]
- pshuflw m3, m3, 0
- pshufd m3, m3, 0
- movzx r4d, word [r3 + 32] ; topRight = above[16]
-
- psubw m4, m3, m1 ; v_bottomRow[0]
- psubw m3, m2 ; v_bottomRow[1]
-
- psllw m1, 4
- psllw m2, 4
-
-%macro PRED_PLANAR_ROW16 1
- movzx r5d, word [r2 + %1 * 2]
- add r5d, r5d
- lea r5d, [r5d * 8 + 16]
- movd m5, r5d
- pshuflw m5, m5, 0
- pshufd m5, m5, 0 ; horPred
-
- movzx r5d, word [r2 + %1 * 2]
- mov r3d, r4d
- sub r3d, r5d
- movd m0, r3d
- pshuflw m0, m0, 0
- pshufd m0, m0, 0
-
- pmullw m6, m0, [multiL]
- paddw m6, m5
- paddw m1, m4
- paddw m6, m1
- psraw m6, 5
-
- pmullw m0, m0, [multiH]
- paddw m5, m0
- paddw m2, m3
- paddw m5, m2
- psraw m5, 5
-
- movu [r0], m6
- movu [r0 + 16], m5
- add r0, r1
-%endmacro
-
- PRED_PLANAR_ROW16 0
- PRED_PLANAR_ROW16 1
- PRED_PLANAR_ROW16 2
- PRED_PLANAR_ROW16 3
- PRED_PLANAR_ROW16 4
- PRED_PLANAR_ROW16 5
- PRED_PLANAR_ROW16 6
- PRED_PLANAR_ROW16 7
- PRED_PLANAR_ROW16 8
- PRED_PLANAR_ROW16 9
- PRED_PLANAR_ROW16 10
- PRED_PLANAR_ROW16 11
- PRED_PLANAR_ROW16 12
- PRED_PLANAR_ROW16 13
- PRED_PLANAR_ROW16 14
- PRED_PLANAR_ROW16 15
-%undef PRED_PLANAR_ROW16
- RET
-%endif
-
;---------------------------------------------------------------------------------------
; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
;---------------------------------------------------------------------------------------
@@ -1166,197 +857,6 @@
mov rsp, r6
RET
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-%if (ARCH_X86_64 == 1)
-cglobal intra_pred_planar32, 4,7,8+8, 0-4*mmsize
- %define bottomRow0 m8
- %define bottomRow1 m9
- %define bottomRow2 m10
- %define bottomRow3 m11
- %define bottomRow4 m12
- %define bottomRow5 m13
- %define bottomRow6 m14
- %define bottomRow7 m15
- %define tmp0 [rsp + 0*mmsize]
- %define tmp1 [rsp + 1*mmsize]
- %define tmp2 [rsp + 2*mmsize]
- %define tmp3 [rsp + 3*mmsize]
-%else
-cglobal intra_pred_planar32, 4,7,8, 0-12*mmsize
- %define bottomRow0 [rsp + 0*mmsize]
- %define bottomRow1 [rsp + 1*mmsize]
- %define bottomRow2 [rsp + 2*mmsize]
- %define bottomRow3 [rsp + 3*mmsize]
- %define bottomRow4 [rsp + 4*mmsize]
- %define bottomRow5 [rsp + 5*mmsize]
- %define bottomRow6 [rsp + 6*mmsize]
- %define bottomRow7 [rsp + 7*mmsize]
- %define tmp0 [rsp + 8*mmsize]
- %define tmp1 [rsp + 9*mmsize]
- %define tmp2 [rsp + 10*mmsize]
- %define tmp3 [rsp + 11*mmsize]
-%endif
-
- add r2, 2
- add r3, 2
- add r1, r1
-
- pxor m0, m0
-
- ; bottomRow
- movzx r4d, word [r2 + 32*2]
- movd m1, r4d
- pshufd m1, m1, 0 ; m1 = bottomLeft
- movu m2, [r3]
- pmovzxwd m3, m2
- punpckhwd m2, m0
- psubd m4, m1, m3
- mova bottomRow0, m4
- psubd m4, m1, m2
- mova bottomRow1, m4
- movu m2, [r3 + 16]
- pmovzxwd m3, m2
- punpckhwd m2, m0
- psubd m4, m1, m3
- mova bottomRow2, m4
- psubd m4, m1, m2
- mova bottomRow3, m4
-
- movu m2, [r3 + 32]
- pmovzxwd m3, m2
- punpckhwd m2, m0
- psubd m4, m1, m3
- mova bottomRow4, m4
- psubd m4, m1, m2
- mova bottomRow5, m4
- movu m2, [r3 + 48]
- pmovzxwd m3, m2
- punpckhwd m2, m0
- psubd m4, m1, m3
- mova bottomRow6, m4
- psubd m1, m2
- mova bottomRow7, m1
-
- ; topRow
- pmovzxwd m0, [r3 + 0*8]
- pslld m0, 5
- pmovzxwd m1, [r3 + 1*8]
- pslld m1, 5
- pmovzxwd m2, [r3 + 2*8]
- pslld m2, 5
- pmovzxwd m3, [r3 + 3*8]
- pslld m3, 5
-
- pmovzxwd m4, [r3 + 4*8]
- pslld m4, 5
- mova tmp0, m4
- pmovzxwd m4, [r3 + 5*8]
- pslld m4, 5
- mova tmp1, m4
- pmovzxwd m4, [r3 + 6*8]
- pslld m4, 5
- mova tmp2, m4
- pmovzxwd m4, [r3 + 7*8]
- pslld m4, 5
- mova tmp3, m4
-
- xor r6, r6
-.loopH:
- movzx r4d, word [r2 + r6*2]
- movzx r5d, word [r3 + 32*2] ; r5 = topRight
- sub r5d, r4d
- movd m5, r5d
- pshuflw m5, m5, 0
- pmullw m5, [multiL]
- pmovsxwd m5, m5 ; m5 = rightCol
- shl r4d, 5
- add r4d, 32
- movd m4, r4d
- pshufd m4, m4, 0 ; m4 = horPred
- paddd m4, m5
- pshufd m6, m5, 0xFF ; m6 = [4 4 4 4]
-
- ; 0-3
- paddd m0, bottomRow0
- paddd m5, m0, m4
- psrad m5, 6
- packusdw m5, m5
- movh [r0 + 0*8], m5
-
- ; 4-7
- paddd m4, m6
- paddd m1, bottomRow1
- paddd m5, m1, m4
- psrad m5, 6
- packusdw m5, m5
- movh [r0 + 1*8], m5
-
- ; 8-11
- paddd m4, m6
- paddd m2, bottomRow2
- paddd m5, m2, m4
- psrad m5, 6
- packusdw m5, m5
- movh [r0 + 2*8], m5
-
- ; 12-15
- paddd m4, m6
- paddd m3, bottomRow3
- paddd m5, m3, m4
- psrad m5, 6
- packusdw m5, m5
- movh [r0 + 3*8], m5
-
- ; 16-19
- paddd m4, m6
- mova m7, tmp0
- paddd m7, bottomRow4
- mova tmp0, m7
- paddd m7, m4
- psrad m7, 6
- packusdw m7, m7
- movh [r0 + 4*8], m7
-
- ; 20-23
- paddd m4, m6
- mova m7, tmp1
- paddd m7, bottomRow5
- mova tmp1, m7
- paddd m7, m4
- psrad m7, 6
- packusdw m7, m7
- movh [r0 + 5*8], m7
-
- ; 24-27
- paddd m4, m6
- mova m7, tmp2
- paddd m7, bottomRow6
- mova tmp2, m7
- paddd m7, m4
- psrad m7, 6
- packusdw m7, m7
- movh [r0 + 6*8], m7
-
- ; 28-31
- paddd m4, m6
- mova m7, tmp3
- paddd m7, bottomRow7
- mova tmp3, m7
- paddd m7, m4
- psrad m7, 6
- packusdw m7, m7
- movh [r0 + 7*8], m7
-
- add r0, r1
- inc r6d
- cmp r6d, 32
- jnz .loopH
-
- RET
-
;-----------------------------------------------------------------------------
; void intraPredAng(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
;-----------------------------------------------------------------------------
diff -r 44a17e7764a3 -r 558439d703fd source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Tue Dec 23 19:11:31 2014 +0530
+++ b/source/common/x86/intrapred8.asm Wed Dec 24 11:23:30 2014 +0530
@@ -415,70 +415,6 @@
RET
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar4, 4,7,5
- inc r2
- inc r3
- pmovzxbw m0, [r3] ; topRow[i] = above[i];
- punpcklqdq m0, m0
-
- pxor m1, m1
- movd m2, [r2 + 4] ; bottomLeft = left[4]
- movzx r6d, byte [r3 + 4] ; topRight = above[4];
- pshufb m2, m1
- punpcklbw m2, m1
- psubw m2, m0 ; bottomRow[i] = bottomLeft - topRow[i]
- psllw m0, 2
- punpcklqdq m3, m2, m1
- psubw m0, m3
- paddw m2, m2
-
-%macro COMP_PRED_PLANAR_2ROW 1
- movzx r4d, byte [r2 + %1]
- lea r4d, [r4d * 4 + 4]
- movd m3, r4d
- pshuflw m3, m3, 0
-
- movzx r4d, byte [r2 + %1 + 1]
- lea r4d, [r4d * 4 + 4]
- movd m4, r4d
- pshuflw m4, m4, 0
- punpcklqdq m3, m4 ; horPred
-
- movzx r4d, byte [r2 + %1]
- mov r5d, r6d
- sub r5d, r4d
- movd m4, r5d
- pshuflw m4, m4, 0
-
- movzx r4d, byte [r2 + %1 + 1]
- mov r5d, r6d
- sub r5d, r4d
- movd m1, r5d
- pshuflw m1, m1, 0
- punpcklqdq m4, m1 ; rightColumnN
-
- pmullw m4, [multi_2Row]
- paddw m3, m4
- paddw m0, m2
- paddw m3, m0
- psraw m3, 3
- packuswb m3, m3
-
- movd [r0], m3
- pshufd m3, m3, 0x55
- movd [r0 + r1], m3
- lea r0, [r0 + 2 * r1]
-%endmacro
-
- COMP_PRED_PLANAR_2ROW 0
- COMP_PRED_PLANAR_2ROW 2
-
- RET
-
;---------------------------------------------------------------------------------------
; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
;---------------------------------------------------------------------------------------
@@ -533,68 +469,6 @@
movd [r0 + r1], m5
RET
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar8, 4,4,7
- inc r2
- inc r3
- pxor m0, m0
- pmovzxbw m1, [r3] ; v_topRow
- pmovzxbw m2, [r2] ; v_leftColumn
-
- movd m3, [r3 + 8] ; topRight = above[8];
- movd m4, [r2 + 8] ; bottomLeft = left[8];
-
- pshufb m3, m0
- pshufb m4, m0
- punpcklbw m3, m0 ; v_topRight
- punpcklbw m4, m0 ; v_bottomLeft
-
- psubw m4, m1 ; v_bottomRow
- psubw m3, m2 ; v_rightColumn
-
- psllw m1, 3 ; v_topRow
- psllw m2, 3 ; v_leftColumn
-
- paddw m6, m2, [pw_8]
-
-%macro PRED_PLANAR_ROW8 1
- %if (%1 < 4)
- pshuflw m5, m6, 0x55 * %1
- pshufd m5, m5, 0
- pshuflw m2, m3, 0x55 * %1
- pshufd m2, m2, 0
- %else
- pshufhw m5, m6, 0x55 * (%1 - 4)
- pshufd m5, m5, 0xAA
- pshufhw m2, m3, 0x55 * (%1 - 4)
- pshufd m2, m2, 0xAA
- %endif
-
- pmullw m2, [multiL]
- paddw m5, m2
- paddw m1, m4
- paddw m5, m1
- psraw m5, 4
- packuswb m5, m5
-
- movh [r0], m5
- lea r0, [r0 + r1]
-
-%endmacro
-
- PRED_PLANAR_ROW8 0
- PRED_PLANAR_ROW8 1
- PRED_PLANAR_ROW8 2
- PRED_PLANAR_ROW8 3
- PRED_PLANAR_ROW8 4
- PRED_PLANAR_ROW8 5
- PRED_PLANAR_ROW8 6
- PRED_PLANAR_ROW8 7
- RET
-
;---------------------------------------------------------------------------------------
; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
;---------------------------------------------------------------------------------------
@@ -647,79 +521,6 @@
INTRA_PRED_PLANAR8 7
RET
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar16, 4,6,8
- inc r2
- inc r3
- pxor m0, m0
- pmovzxbw m1, [r3] ; topRow[0-7]
- pmovzxbw m2, [r3 + 8] ; topRow[8-15]
-
- movd m3, [r2 + 16]
- pshufb m3, m0
- punpcklbw m3, m0 ; v_bottomLeft = left[16]
- movzx r4d, byte [r3 + 16] ; topRight = above[16]
-
- psubw m4, m3, m1 ; v_bottomRow[0]
- psubw m5, m3, m2 ; v_bottomRow[1]
-
- psllw m1, 4
- psllw m2, 4
-
-%macro PRED_PLANAR_ROW16 1
- movzx r5d, byte [r2 + %1]
- add r5d, r5d
- lea r5d, [r5d * 8 + 16]
- movd m3, r5d
- pshuflw m3, m3, 0
- pshufd m3, m3, 0 ; horPred
-
- movzx r5d, byte [r2 + %1]
- mov r3d, r4d
- sub r3d, r5d
- movd m6, r3d
- pshuflw m6, m6, 0
- pshufd m6, m6, 0
-
- pmullw m7, m6, [multiL]
- paddw m7, m3
- paddw m1, m4
- paddw m7, m1
- psraw m7, 5
-
- pmullw m6, m6, [multiH]
- paddw m3, m6
- paddw m2, m5
- paddw m3, m2
- psraw m3, 5
-
- packuswb m7, m3
- movu [r0], m7
- lea r0, [r0 + r1]
-%endmacro
-
- PRED_PLANAR_ROW16 0
- PRED_PLANAR_ROW16 1
- PRED_PLANAR_ROW16 2
- PRED_PLANAR_ROW16 3
- PRED_PLANAR_ROW16 4
- PRED_PLANAR_ROW16 5
- PRED_PLANAR_ROW16 6
- PRED_PLANAR_ROW16 7
- PRED_PLANAR_ROW16 8
- PRED_PLANAR_ROW16 9
- PRED_PLANAR_ROW16 10
- PRED_PLANAR_ROW16 11
- PRED_PLANAR_ROW16 12
- PRED_PLANAR_ROW16 13
- PRED_PLANAR_ROW16 14
- PRED_PLANAR_ROW16 15
-
- RET
-
;---------------------------------------------------------------------------------------
; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
;---------------------------------------------------------------------------------------
@@ -909,114 +710,6 @@
jnz .loop
RET
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-%if ARCH_X86_64 == 1
-cglobal intra_pred_planar32, 4,7,12
- %define bottomRow0 m8
- %define bottomRow1 m9
- %define bottomRow2 m10
- %define bottomRow3 m11
-%else
-cglobal intra_pred_planar32, 4,7,8,0-(4*mmsize)
- %define bottomRow0 [rsp + 0 * mmsize]
- %define bottomRow1 [rsp + 1 * mmsize]
- %define bottomRow2 [rsp + 2 * mmsize]
- %define bottomRow3 [rsp + 3 * mmsize]
-%endif
- inc r2
- inc r3
- pxor m3, m3
- movd m0, [r2 + 32]
- pshufb m0, m3
- punpcklbw m0, m3 ; v_bottomLeft = left[32]
- movzx r4d, byte [r3 + 32] ; topRight = above[32]
-
- pmovzxbw m1, [r3 + 0] ; topRow[0]
- pmovzxbw m2, [r3 + 8] ; topRow[1]
- pmovzxbw m3, [r3 +16] ; topRow[2]
- pmovzxbw m4, [r3 +24] ; topRow[3]
-
- psubw m5, m0, m1 ; v_bottomRow[0]
- psubw m6, m0, m2 ; v_bottomRow[1]
- psubw m7, m0, m3 ; v_bottomRow[2]
- psubw m0, m4 ; v_bottomRow[3]
-
- mova bottomRow0, m5
- mova bottomRow1, m6
- mova bottomRow2, m7
- mova bottomRow3, m0
-
- psllw m1, 5
- psllw m2, 5
- psllw m3, 5
- psllw m4, 5
-
-%macro COMP_PRED_PLANAR_ROW 1
- movzx r5d, byte [r2]
- shl r5d, 5
- add r5d, 32
- movd m5, r5d
- pshuflw m5, m5, 0
- pshufd m5, m5, 0 ; horPred
-
- movzx r5d, byte [r2]
- mov r6d, r4d
- sub r6d, r5d
- movd m6, r6d
- pshuflw m6, m6, 0
- pshufd m6, m6, 0
-
-%if (%1 == 0)
- pmullw m7, m6, [multiL]
-%else
- pmullw m7, m6, [multiH2]
-%endif
-
- paddw m7, m5
-%if (%1 == 0)
- paddw m1, bottomRow0
- paddw m7, m1
-%else
- paddw m3, bottomRow2
- paddw m7, m3
-%endif
- psraw m7, 6
-
-%if (%1 == 0)
- pmullw m6, [multiH]
-%else
- pmullw m6, [multiH3]
-%endif
- paddw m6, m5
-%if (%1 == 0)
- paddw m2, bottomRow1
- paddw m6, m2
-%else
- paddw m4, bottomRow3
- paddw m6, m4
-%endif
- psraw m6, 6
-
- packuswb m7, m6
- movu [r0 + %1], m7
-%endmacro
-
- mov r3, 32
-.loop:
- COMP_PRED_PLANAR_ROW 0
- COMP_PRED_PLANAR_ROW 16
- inc r2
- lea r0, [r0 + r1]
-
- dec r3
- jnz .loop
-%undef COMP_PRED_PLANAR_ROW
-
- RET
-
;-----------------------------------------------------------------------------
; void intraPredAng(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
;-----------------------------------------------------------------------------
diff -r 44a17e7764a3 -r 558439d703fd source/test/intrapredharness.cpp
--- a/source/test/intrapredharness.cpp Tue Dec 23 19:11:31 2014 +0530
+++ b/source/test/intrapredharness.cpp Wed Dec 24 11:23:30 2014 +0530
@@ -33,44 +33,6 @@
pixel_buff[i] = rand() % PIXEL_MAX;
}
-bool IntraPredHarness::check_dc_primitive(intra_pred_t ref, intra_pred_t opt, int width)
-{
- int j = Predict::ADI_BUF_STRIDE;
- intptr_t stride = FENC_STRIDE;
-
-#if _DEBUG
- memset(pixel_out_vec, 0xCD, OUTPUT_SIZE);
- memset(pixel_out_c, 0xCD, OUTPUT_SIZE);
-#endif
-
- for (int i = 0; i <= 100; i++)
- {
- int rand_filter = rand() & 1;
- if (width > 16)
- rand_filter = 0;
-
- pixel left[MAX_CU_SIZE * 2 + 1];
- for (int k = 0; k < width * 2 + 1; k++)
- {
- left[k] = pixel_buff[j - 1 + k * Predict::ADI_BUF_STRIDE];
- }
-
- ref(pixel_out_c, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, left + 1, 0, rand_filter);
- checked(opt, pixel_out_vec, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, left + 1, 0, rand_filter);
-
- for (int k = 0; k < width; k++)
- {
- if (memcmp(pixel_out_vec + k * FENC_STRIDE, pixel_out_c + k * FENC_STRIDE, width * sizeof(pixel)))
- return false;
- }
-
- reportfail();
- j += FENC_STRIDE;
- }
-
- return true;
-}
-
bool IntraPredHarness::check_dc_primitive(intra_pred_new_t ref, intra_pred_new_t opt, int width)
{
int j = Predict::ADI_BUF_STRIDE;
@@ -88,41 +50,7 @@
rand_filter = 0;
ref(pixel_out_c, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, 0, rand_filter);
- opt(pixel_out_vec, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, 0, rand_filter);
-
- for (int k = 0; k < width; k++)
- {
- if (memcmp(pixel_out_vec + k * FENC_STRIDE, pixel_out_c + k * FENC_STRIDE, width * sizeof(pixel)))
- return false;
- }
-
- reportfail();
- j += FENC_STRIDE;
- }
-
- return true;
-}
-
-bool IntraPredHarness::check_planar_primitive(intra_pred_t ref, intra_pred_t opt, int width)
-{
- int j = Predict::ADI_BUF_STRIDE;
- intptr_t stride = FENC_STRIDE;
-
-#if _DEBUG
- memset(pixel_out_vec, 0xCD, OUTPUT_SIZE);
- memset(pixel_out_c, 0xCD, OUTPUT_SIZE);
-#endif
-
- for (int i = 0; i <= 100; i++)
- {
- pixel left[MAX_CU_SIZE * 2 + 1];
- for (int k = 0; k < width * 2 + 1; k++)
- {
- left[k] = pixel_buff[j - 1 + k * Predict::ADI_BUF_STRIDE];
- }
-
- ref(pixel_out_c, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, left + 1, 0, 0);
- checked(opt, pixel_out_vec, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, left + 1, 0, 0);
+ checked(opt, pixel_out_vec, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, 0, rand_filter);
for (int k = 0; k < width; k++)
{
@@ -264,15 +192,6 @@
{
for (int i = BLOCK_4x4; i <= BLOCK_32x32; i++)
{
- if (opt.intra_pred[0][i])
- {
- const int size = (1 << (i + 2));
- if (!check_planar_primitive(ref.intra_pred[0][i], opt.intra_pred[0][i], size))
- {
- printf("intra_planar %dx%d failed\n", size, size);
- return false;
- }
- }
if (opt.intra_pred_new[0][i])
{
const int size = (1 << (i + 2));
@@ -326,12 +245,6 @@
REPORT_SPEEDUP(opt.intra_pred_new[0][i], ref.intra_pred_new[0][i],
pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, 0, 0);
}
- if (opt.intra_pred[0][i])
- {
- printf("intra_planar %2dx%d", size, size);
- REPORT_SPEEDUP(opt.intra_pred[0][i], ref.intra_pred[0][i],
- pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, pixel_buff, 0, 0);
- }
if (opt.intra_pred_allangs[i])
{
bool bFilter = (size <= 16);
diff -r 44a17e7764a3 -r 558439d703fd source/test/intrapredharness.h
--- a/source/test/intrapredharness.h Tue Dec 23 19:11:31 2014 +0530
+++ b/source/test/intrapredharness.h Wed Dec 24 11:23:30 2014 +0530
@@ -41,9 +41,7 @@
pixel pixel_out_33_c[OUTPUT_SIZE_33];
pixel pixel_out_33_vec[OUTPUT_SIZE_33];
- bool check_dc_primitive(intra_pred_t ref, intra_pred_t opt, int width);
bool check_dc_primitive(intra_pred_new_t ref, intra_pred_new_t opt, int width);
- bool check_planar_primitive(intra_pred_t ref, intra_pred_t opt, int width);
bool check_planar_primitive(intra_pred_new_t ref, intra_pred_new_t opt, int width);
bool check_angular_primitive(const intra_pred_t ref[][NUM_TR_SIZE], const intra_pred_t opt[][NUM_TR_SIZE]);
bool check_allangs_primitive(const intra_allangs_t ref[], const intra_allangs_t opt[]);
More information about the x265-devel
mailing list