[x265] [PATCH] asm: intra_pred_ang16_25
Praveen Tiwari
praveen at multicorewareinc.com
Thu Mar 12 14:39:44 CET 2015
Please ignore, need to add performance data in commit message.
Regards,
Praveen
On Thu, Mar 12, 2015 at 6:50 PM, <praveen at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Praveen Tiwari <praveen at multicorewareinc.com>
> # Date 1426165765 -19800
> # Node ID e4204ceeb011a009455cde620c346729d80ac822
> # Parent d012e125bdb1299ba29b9c0680931e148981a42e
> asm: intra_pred_ang16_25
>
> diff -r d012e125bdb1 -r e4204ceeb011 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Thu Mar 12 18:40:23 2015
> +0530
> +++ b/source/common/x86/asm-primitives.cpp Thu Mar 12 18:39:25 2015
> +0530
> @@ -1504,6 +1504,7 @@
> p.cu[BLOCK_8x8].intra_pred[12] = x265_intra_pred_ang8_12_avx2;
> p.cu[BLOCK_8x8].intra_pred[24] = x265_intra_pred_ang8_24_avx2;
> p.cu[BLOCK_8x8].intra_pred[11] = x265_intra_pred_ang8_11_avx2;
> + p.cu[BLOCK_16x16].intra_pred[25] = x265_intra_pred_ang16_25_avx2;
>
> // copy_sp primitives
> p.cu[BLOCK_16x16].copy_sp = x265_blockcopy_sp_16x16_avx2;
> diff -r d012e125bdb1 -r e4204ceeb011 source/common/x86/intrapred.h
> --- a/source/common/x86/intrapred.h Thu Mar 12 18:40:23 2015 +0530
> +++ b/source/common/x86/intrapred.h Thu Mar 12 18:39:25 2015 +0530
> @@ -182,6 +182,7 @@
> void x265_intra_pred_ang8_12_avx2(pixel* dst, intptr_t dstStride, const
> pixel* srcPix, int dirMode, int bFilter);
> void x265_intra_pred_ang8_24_avx2(pixel* dst, intptr_t dstStride, const
> pixel* srcPix, int dirMode, int bFilter);
> void x265_intra_pred_ang8_11_avx2(pixel* dst, intptr_t dstStride, const
> pixel* srcPix, int dirMode, int bFilter);
> +void x265_intra_pred_ang16_25_avx2(pixel* dst, intptr_t dstStride, const
> pixel* srcPix, int dirMode, int bFilter);
> void x265_all_angs_pred_4x4_sse4(pixel *dest, pixel *refPix, pixel
> *filtPix, int bLuma);
> void x265_all_angs_pred_8x8_sse4(pixel *dest, pixel *refPix, pixel
> *filtPix, int bLuma);
> void x265_all_angs_pred_16x16_sse4(pixel *dest, pixel *refPix, pixel
> *filtPix, int bLuma);
> diff -r d012e125bdb1 -r e4204ceeb011 source/common/x86/intrapred8.asm
> --- a/source/common/x86/intrapred8.asm Thu Mar 12 18:40:23 2015 +0530
> +++ b/source/common/x86/intrapred8.asm Thu Mar 12 18:39:25 2015 +0530
> @@ -113,6 +113,17 @@
> db 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7,
> 25, 7, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2
> db 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29, 3, 29,
> 3, 29, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24
>
> +ALIGN 32
> +c_ang16_mode_25: db 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30,
> 2, 30, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28
> + db 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26,
> 6, 26, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24
> + db 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22,
> 10, 22, 10, 22, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12,
> 20
> + db 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18,
> 14, 18, 14, 18, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
> 16
> + db 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14,
> 18, 14, 18, 14, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20,
> 12
> + db 22, 10, 22, 10, 22, 10, 22, 10, 22, 10, 22, 10,
> 22, 10, 22, 10, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8
> + db 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6, 26, 6,
> 26, 6, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4
> + db 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2, 30, 2,
> 30, 2, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0
> +
> +ALIGN 32
> ;; (blkSize - 1 - x)
> pw_planar4_0: dw 3, 2, 1, 0, 3, 2, 1, 0
> pw_planar4_1: dw 3, 3, 3, 3, 3, 3, 3, 3
> @@ -10368,6 +10379,47 @@
> movhps [r0 + r3], xm2
> RET
>
> +%macro INTRA_PRED_ANG16_MC0 3
> + pmaddubsw m3, m1, [r4 + %3 * mmsize]
> + pmulhrsw m3, m0
> + pmaddubsw m4, m2, [r4 + %3 * mmsize]
> + pmulhrsw m4, m0
> + packuswb m3, m4
> + movu [%1], xm3
> + vextracti128 xm4, m3, 1
> + movu [%2], xm4
> +%endmacro
> +
> +%macro INTRA_PRED_ANG16_25 1
> + INTRA_PRED_ANG16_MC0 r0, r0 + r1, %1
> + INTRA_PRED_ANG16_MC0 r0 + 2 * r1, r0 + r3, (%1 + 1)
> +%endmacro
> +
> +INIT_YMM avx2
> +cglobal intra_pred_ang16_25, 3, 5, 5
> + mova m0, [pw_1024]
> +
> + vbroadcasti128 m1, [r2]
> + pshufb m1, [intra_pred_shuff_0_8]
> + vbroadcasti128 m2, [r2 + 8]
> + pshufb m2, [intra_pred_shuff_0_8]
> +
> + lea r3, [3 * r1]
> + lea r4, [c_ang16_mode_25]
> +
> + INTRA_PRED_ANG16_25 0
> +
> + lea r0, [r0 + 4 * r1]
> + INTRA_PRED_ANG16_25 2
> +
> + lea r0, [r0 + 4 * r1]
> + INTRA_PRED_ANG16_25 4
> +
> + lea r0, [r0 + 4 * r1]
> + INTRA_PRED_ANG16_25 6
> + RET
> +
> +
>
> INIT_YMM avx2
> cglobal intra_pred_ang8_12, 3, 5, 5
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150312/c4b3848d/attachment-0001.html>
More information about the x265-devel
mailing list