[x265-commits] [x265] cleanup reduce condition check for getUseDQP()
Min Chen
chenm003 at 163.com
Fri Jan 10 22:09:17 CET 2014
details: http://hg.videolan.org/x265/rev/a03cc8c4d739
branches: stable
changeset: 5812:a03cc8c4d739
user: Min Chen <chenm003 at 163.com>
date: Sat Dec 28 10:22:04 2013 +0800
description:
cleanup reduce condition check for getUseDQP()
Subject: [x265] improvement interpolate_H_pp
details: http://hg.videolan.org/x265/rev/b2a0cfe4837b
branches:
changeset: 5813:b2a0cfe4837b
user: Min Chen <chenm003 at 163.com>
date: Tue Jan 07 21:04:55 2014 +0800
description:
improvement interpolate_H_pp
Subject: [x265] asm: intra_pred_ang8_2 asm code
details: http://hg.videolan.org/x265/rev/acbe568e7366
branches:
changeset: 5814:acbe568e7366
user: Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
date: Thu Jan 09 12:13:23 2014 +0550
description:
asm: intra_pred_ang8_2 asm code
Subject: [x265] asm: code for intra_pred[BLOCK_16x16] mode 2 and 34
details: http://hg.videolan.org/x265/rev/c5aa7ae59fc7
branches:
changeset: 5815:c5aa7ae59fc7
user: Murugan Vairavel <murugan at multicorewareinc.com>
date: Thu Jan 09 12:50:16 2014 +0550
description:
asm: code for intra_pred[BLOCK_16x16] mode 2 and 34
Subject: [x265] Merge with stable
details: http://hg.videolan.org/x265/rev/b2e7d8da2838
branches:
changeset: 5816:b2e7d8da2838
user: Steve Borho <steve at borho.org>
date: Fri Jan 10 15:08:40 2014 -0600
description:
Merge with stable
diffstat:
source/Lib/TLibEncoder/TEncEntropy.cpp | 5 +-
source/common/x86/asm-primitives.cpp | 10 ++-
source/common/x86/intrapred.h | 38 +++++++++++++-
source/common/x86/intrapred8.asm | 76 +++++++++++++++++++++++++++
source/common/x86/ipfilter8.asm | 94 ++++++++++++++++++++++++++-------
5 files changed, 195 insertions(+), 28 deletions(-)
diffs (truncated from 305 to 300 lines):
diff -r 0d70188e80bc -r b2e7d8da2838 source/Lib/TLibEncoder/TEncEntropy.cpp
--- a/source/Lib/TLibEncoder/TEncEntropy.cpp Wed Jan 08 13:00:53 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncEntropy.cpp Fri Jan 10 15:08:40 2014 -0600
@@ -553,10 +553,7 @@ void TEncEntropy::encodeQP(TComDataCU* c
absPartIdx = 0;
}
- if (cu->getSlice()->getPPS()->getUseDQP())
- {
- m_entropyCoderIf->codeDeltaQP(cu, absPartIdx);
- }
+ m_entropyCoderIf->codeDeltaQP(cu, absPartIdx);
}
// texture
diff -r 0d70188e80bc -r b2e7d8da2838 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Jan 08 13:00:53 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp Fri Jan 10 15:08:40 2014 -0600
@@ -549,10 +549,13 @@ extern "C" {
#define SETUP_INTRA_ANG4(mode, fno, cpu) \
p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
+#define SETUP_INTRA_ANG8(mode, fno, cpu) \
+ p.intra_pred[BLOCK_8x8][mode] = x265_intra_pred_ang8_ ## fno ## _ ## cpu;
+#define SETUP_INTRA_ANG16(mode, fno, cpu) \
+ p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## cpu;
namespace x265 {
// private x265 namespace
-
void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
{
#if HIGH_BIT_DEPTH
@@ -889,10 +892,13 @@ void Setup_Assembly_Primitives(EncoderPr
SETUP_INTRA_ANG4(2, 2, ssse3);
SETUP_INTRA_ANG4(34, 2, ssse3);
+ SETUP_INTRA_ANG8(2, 2, ssse3);
+ SETUP_INTRA_ANG8(34, 2, ssse3);
+ SETUP_INTRA_ANG16(2, 2, ssse3);
+ SETUP_INTRA_ANG16(34, 2, ssse3);
p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
-
SAD_X3(ssse3);
SAD_X4(ssse3);
p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
diff -r 0d70188e80bc -r b2e7d8da2838 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Wed Jan 08 13:00:53 2014 -0600
+++ b/source/common/x86/intrapred.h Fri Jan 10 15:08:40 2014 -0600
@@ -57,9 +57,45 @@ DECL_ANG(4, 16, sse4);
DECL_ANG(4, 17, sse4);
DECL_ANG(4, 18, sse4);
DECL_ANG(4, 26, sse4);
+DECL_ANG(8, 2, ssse3);
+DECL_ANG(8, 3, sse4);
+DECL_ANG(8, 4, sse4);
+DECL_ANG(8, 5, sse4);
+DECL_ANG(8, 6, sse4);
+DECL_ANG(8, 7, sse4);
+DECL_ANG(8, 8, sse4);
+DECL_ANG(8, 9, sse4);
+DECL_ANG(8, 10, sse4);
+DECL_ANG(8, 11, sse4);
+DECL_ANG(8, 12, sse4);
+DECL_ANG(8, 13, sse4);
+DECL_ANG(8, 14, sse4);
+DECL_ANG(8, 15, sse4);
+DECL_ANG(8, 16, sse4);
+DECL_ANG(8, 17, sse4);
+DECL_ANG(8, 18, sse4);
+DECL_ANG(8, 26, sse4);
+
+DECL_ANG(16, 2, ssse3);
+DECL_ANG(16, 3, sse4);
+DECL_ANG(16, 4, sse4);
+DECL_ANG(16, 5, sse4);
+DECL_ANG(16, 6, sse4);
+DECL_ANG(16, 7, sse4);
+DECL_ANG(16, 8, sse4);
+DECL_ANG(16, 9, sse4);
+DECL_ANG(16, 10, sse4);
+DECL_ANG(16, 11, sse4);
+DECL_ANG(16, 12, sse4);
+DECL_ANG(16, 13, sse4);
+DECL_ANG(16, 14, sse4);
+DECL_ANG(16, 15, sse4);
+DECL_ANG(16, 16, sse4);
+DECL_ANG(16, 17, sse4);
+DECL_ANG(16, 18, sse4);
+DECL_ANG(16, 26, sse4);
#undef DECL_ANG
-
void x265_all_angs_pred_4x4_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma);
void x265_all_angs_pred_8x8_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma);
diff -r 0d70188e80bc -r b2e7d8da2838 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Wed Jan 08 13:00:53 2014 -0600
+++ b/source/common/x86/intrapred8.asm Fri Jan 10 15:08:40 2014 -0600
@@ -1105,6 +1105,82 @@ cglobal intra_pred_ang4_18, 4,4,1
psrldq m0, 1
movd [r0], m0
RET
+;-----------------------------------------------------------------------------
+; void intraPredAng(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
+;-----------------------------------------------------------------------------
+INIT_XMM ssse3
+cglobal intra_pred_ang8_2, 3,5,2
+ cmp r4m, byte 34
+ cmove r2, r3mp
+ movu m0, [r2 + 2]
+ lea r4, [r1 * 3]
+
+ movh [r0], m0
+ palignr m1, m0, 1
+ movh [r0 + r1], m1
+ palignr m1, m0, 2
+ movh [r0 + r1 * 2], m1
+ palignr m1, m0, 3
+ movh [r0 + r4], m1
+ palignr m1, m0, 4
+ lea r0, [r0 + r1 * 4]
+ movh [r0], m1
+ palignr m1, m0, 5
+ movh [r0 + r1], m1
+ palignr m1, m0, 6
+ movh [r0 + r1 * 2], m1
+ palignr m1, m0, 7
+ movh [r0 + r4], m1
+ RET
+
+;-----------------------------------------------------------------------------
+; void intraPredAng16(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
+;-----------------------------------------------------------------------------
+INIT_XMM ssse3
+cglobal intra_pred_ang16_2, 3,3,3
+ cmp r4m, byte 34
+ cmove r2, r3mp
+ movu m0, [r2 + 2]
+ movu m1, [r2 + 18]
+ movu [r0], m0
+ palignr m2, m1, m0, 1
+ movu [r0 + r1], m2
+ lea r0, [r0 + r1 * 2]
+ palignr m2, m1, m0, 2
+ movu [r0], m2
+ palignr m2, m1, m0, 3
+ movu [r0 + r1], m2
+ lea r0, [r0 + r1 * 2]
+ palignr m2, m1, m0, 4
+ movu [r0], m2
+ palignr m2, m1, m0, 5
+ movu [r0 + r1], m2
+ lea r0, [r0 + r1 * 2]
+ palignr m2, m1, m0, 6
+ movu [r0], m2
+ palignr m2, m1, m0, 7
+ movu [r0 + r1], m2
+ lea r0, [r0 + r1 * 2]
+ palignr m2, m1, m0, 8
+ movu [r0], m2
+ palignr m2, m1, m0, 9
+ movu [r0 + r1], m2
+ lea r0, [r0 + r1 * 2]
+ palignr m2, m1, m0, 10
+ movu [r0], m2
+ palignr m2, m1, m0, 11
+ movu [r0 + r1], m2
+ lea r0, [r0 + r1 * 2]
+ palignr m2, m1, m0, 12
+ movu [r0], m2
+ palignr m2, m1, m0, 13
+ movu [r0 + r1], m2
+ lea r0, [r0 + r1 * 2]
+ palignr m2, m1, m0, 14
+ movu [r0], m2
+ palignr m2, m1, m0, 15
+ movu [r0 + r1], m2
+ RET
;-----------------------------------------------------------------------------
; void all_angs_pred_4x4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma)
diff -r 0d70188e80bc -r b2e7d8da2838 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Wed Jan 08 13:00:53 2014 -0600
+++ b/source/common/x86/ipfilter8.asm Fri Jan 10 15:08:40 2014 -0600
@@ -29,6 +29,7 @@
SECTION_RODATA 32
tab_Tm: db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
+ db 8, 9,10,11, 9,10,11,12,10,11,12,13,11,12,13, 14
tab_Lm: db 0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8
db 2, 3, 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9, 10
@@ -127,6 +128,7 @@ tab_c_64_n64: times 8 db 64, -64
SECTION .text
+cextern pw_512
cextern pw_2000
%macro FILTER_H4_w2_2 3
@@ -688,30 +690,80 @@ cglobal interp_8tap_horiz_%3_%1x%2, 4,7,
; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;--------------------------------------------------------------------------------------------------------------
IPFILTER_LUMA 4, 4, pp
- IPFILTER_LUMA 8, 8, pp
- IPFILTER_LUMA 8, 4, pp
IPFILTER_LUMA 4, 8, pp
- IPFILTER_LUMA 16, 16, pp
- IPFILTER_LUMA 16, 8, pp
- IPFILTER_LUMA 8, 16, pp
- IPFILTER_LUMA 16, 12, pp
IPFILTER_LUMA 12, 16, pp
- IPFILTER_LUMA 16, 4, pp
IPFILTER_LUMA 4, 16, pp
- IPFILTER_LUMA 32, 32, pp
- IPFILTER_LUMA 32, 16, pp
- IPFILTER_LUMA 16, 32, pp
- IPFILTER_LUMA 32, 24, pp
- IPFILTER_LUMA 24, 32, pp
- IPFILTER_LUMA 32, 8, pp
- IPFILTER_LUMA 8, 32, pp
- IPFILTER_LUMA 64, 64, pp
- IPFILTER_LUMA 64, 32, pp
- IPFILTER_LUMA 32, 64, pp
- IPFILTER_LUMA 64, 48, pp
- IPFILTER_LUMA 48, 64, pp
- IPFILTER_LUMA 64, 16, pp
- IPFILTER_LUMA 16, 64, pp
+
+
+;--------------------------------------------------------------------------------------------------------------
+; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
+;--------------------------------------------------------------------------------------------------------------
+%macro IPFILTER_LUMA_PP_W8 2
+INIT_XMM sse4
+cglobal interp_8tap_horiz_pp_%1x%2, 4,6,7
+ mov r4d, r4m
+
+%ifdef PIC
+ lea r5, [tab_LumaCoeff]
+ movh m3, [r5 + r4 * 8]
+%else
+ movh m3, [tab_LumaCoeff + r4 * 8]
+%endif
+ pshufd m0, m3, 0 ; m0 = coeff-L
+ pshufd m1, m3, 0x55 ; m1 = coeff-H
+ lea r5, [tab_Tm] ; r5 = shuffle
+ mova m2, [pw_512] ; m2 = 512
+
+ mov r4d, %2
+.loopH
+%assign x 0
+%rep %1 / 8
+ movu m3, [r0 - 3 + x] ; m3 = [F E D C B A 9 8 7 6 5 4 3 2 1 0]
+ pshufb m4, m3, [r5 + 0*16] ; m4 = [6 5 4 3 5 4 3 2 4 3 2 1 3 2 1 0]
+ pshufb m5, m3, [r5 + 1*16] ; m5 = [A 9 8 7 9 8 7 6 8 7 6 5 7 6 5 4]
+ pshufb m3, [r5 + 2*16] ; m3 = [E D C B D C B A C B A 9 B A 9 8]
+ pmaddubsw m4, m0
+ pmaddubsw m6, m5, m1
+ pmaddubsw m5, m0
+ pmaddubsw m3, m1
+ paddw m4, m6
+ paddw m5, m3
+ phaddw m4, m5
+ pmulhrsw m4, m2
+ packuswb m4, m4
+ movh [r2 + x], m4
+%assign x x+8
+%endrep
+
+ add r0, r1
+ add r2, r3
+
+ dec r4d
+ jnz .loopH
+ RET
+%endmacro
+
+IPFILTER_LUMA_PP_W8 8, 4
+IPFILTER_LUMA_PP_W8 8, 8
+IPFILTER_LUMA_PP_W8 8, 16
+IPFILTER_LUMA_PP_W8 8, 32
+IPFILTER_LUMA_PP_W8 16, 4
+IPFILTER_LUMA_PP_W8 16, 8
+IPFILTER_LUMA_PP_W8 16, 12
+IPFILTER_LUMA_PP_W8 16, 16
+IPFILTER_LUMA_PP_W8 16, 32
+IPFILTER_LUMA_PP_W8 16, 64
+IPFILTER_LUMA_PP_W8 24, 32
+IPFILTER_LUMA_PP_W8 32, 8
+IPFILTER_LUMA_PP_W8 32, 16
+IPFILTER_LUMA_PP_W8 32, 24
+IPFILTER_LUMA_PP_W8 32, 32
+IPFILTER_LUMA_PP_W8 32, 64
+IPFILTER_LUMA_PP_W8 48, 64
+IPFILTER_LUMA_PP_W8 64, 16
+IPFILTER_LUMA_PP_W8 64, 32
More information about the x265-commits
mailing list