[x265] [PATCH] asm code for pixeladd_ps_4x4 and testbench integration

Praveen Tiwari praveen at multicorewareinc.com
Wed Nov 20 14:17:32 CET 2013


Merged, sent implementation.

Regards,
Praveen Tiwari




On Wed, Nov 20, 2013 at 6:08 PM, chen <chenm003 at 163.com> wrote:

> At 2013-11-20 19:45:24,praveen at multicorewareinc.com wrote:
> ># HG changeset patch
> ># User Praveen Tiwari
> ># Date 1384947915 -19800
> ># Node ID c1e556f54d61422d153ff67f4830dc62dd1111d9
> ># Parent  a7fb47a7eddf18634449a5ac898f7c2d029048e9
> >asm code for pixeladd_ps_4x4 and testbench integration
> >
> >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/CMakeLists.txt
> >--- a/source/common/CMakeLists.txt	Wed Nov 20 12:57:57 2013 +0530
> >+++ b/source/common/CMakeLists.txt	Wed Nov 20 17:15:15 2013 +0530
> >@@ -113,7 +113,7 @@
> >
> > if(ENABLE_PRIMITIVES_ASM)
> >     set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h)
> >-    set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm blockcopy8.asm)
> >+    set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm blockcopy8.asm pixeladd8.asm)
> >     if (NOT X64)
> >         set(A_SRCS ${A_SRCS} pixel-32.asm)
> >     endif()
> >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/asm-primitives.cpp
> >--- a/source/common/x86/asm-primitives.cpp	Wed Nov 20 12:57:57 2013 +0530
> >+++ b/source/common/x86/asm-primitives.cpp	Wed Nov 20 17:15:15 2013 +0530
> >@@ -633,6 +633,13 @@
> >         p.calcrecon[BLOCK_32x32] = x265_calcRecons32_sse4;
> >         p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse4;
> >         p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4;
> >+
> >+        // This function pointer initialization is temporary will be removed
> >+        // later with macro definitions.  It is used to avoid linker errors
> >+        // until all partitions are coded and commit smaller patches, easier to
> >+        // review.
> >+
> >+        p.chroma_add_ps[X265_CSP_I420][CHROMA_4x4] = x265_pixel_add_ps_4x4_sse4;
> >     }
> >     if (cpuMask & X265_CPU_AVX)
> >     {
> >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/pixel.h
> >--- a/source/common/x86/pixel.h	Wed Nov 20 12:57:57 2013 +0530
> >+++ b/source/common/x86/pixel.h	Wed Nov 20 17:15:15 2013 +0530
> >@@ -313,7 +313,8 @@
> >     SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 32, cpu);
> >
> > #define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \
> >-    void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
> >+    void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);\
> >+    void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel *dest, int destride, pixel *src0, int16_t *scr1, int srcStride0, int srcStride1);
> >
> > #define LUMA_PIXELSUB_DEF(cpu) \
> >     SETUP_LUMA_PIXELSUB_PS_FUNC(4,   4, cpu); \
> >@@ -342,6 +343,8 @@
> >     SETUP_LUMA_PIXELSUB_PS_FUNC(64, 16, cpu); \
> >     SETUP_LUMA_PIXELSUB_PS_FUNC(16, 64, cpu);
> >
> >+//    void x265_pixeladd_ps_4x4_sse4(pixel *dest, int destride, pixel *src0, int16_t *scr1, int srcStride0, int srcStride1);
> >+
> remove unused line
>
>
>
> > CHROMA_PIXELSUB_DEF(_sse4);
> > LUMA_PIXELSUB_DEF(_sse4);
> >
> >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/pixeladd8.asm
> >--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
> >+++ b/source/common/x86/pixeladd8.asm	Wed Nov 20 17:15:15 2013 +0530
> >@@ -0,0 +1,79 @@
> >+;*****************************************************************************
> >+;* Copyright (C) 2013 x265 project
> >+;*
> >+;* Authors: Praveen Kumar Tiwari <praveen at multicorewareinc.com>
> >+;*
> >+;* This program is free software; you can redistribute it and/or modify
> >+;* it under the terms of the GNU General Public License as published by
> >+;* the Free Software Foundation; either version 2 of the License, or
> >+;* (at your option) any later version.
> >+;*
> >+;* This program is distributed in the hope that it will be useful,
> >+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> >+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >+;* GNU General Public License for more details.
> >+;*
> >+;* You should have received a copy of the GNU General Public License
> >+;* along with this program; if not, write to the Free Software
> >+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
> >+;*
> >+;* This program is also available under a commercial proprietary license.
> >+;* For more information, contact us at licensing at multicorewareinc.com.
> >+;*****************************************************************************/
> >+
> >+%include "x86inc.asm"
> >+%include "x86util.asm"
> >+
> >+SECTION_RODATA 32
> >+
> >+SECTION .text
> >+
> >+;-----------------------------------------------------------------------------
> >+; void pixel_add_ps_4x4(pixel *dest, int destride, pixel *src0, int16_t *scr1, int srcStride0, int srcStride1)
> >+;-----------------------------------------------------------------------------
> >+INIT_XMM sse4
> >+cglobal pixel_add_ps_4x4, 6, 6, 2, dest, destride, src0, scr1, srcStride0, srcStride1
> >+
> >+add         r5,            r5
> >+
> >+movd        m0,            [r2]
> >+pmovzxbw    m0,            m0
> >+movh        m1,            [
> r3]
> we can merge movd and pmovzxbw, in Intel documents, this instruction is not need alignment to 16-bytes bound
>
> >+
> >+paddw       m0,            m1
> >+packuswb    m0,            m0
> >+
> >+movd        [r0],          m0
> >+
> >+movd        m0,            [r2 + r4]
> >+pmovzxbw    m0,            m0
> >+movh        m1,            [r3 + r5]
> >+
> >+paddw       m0,            m1
> >+packuswb    m0,            m0
> >+
> >+movd        [r0 + r1],     m0
> >+
> >+movd        m0,            [r2 + 2 * r4]
> >+pmovzxbw    m0,            m0
> >+movh        m1,            [r3 + 2 * r5]
> >+
> >+paddw       m0,            m1
> >+packuswb    m0,            m0
> >+
> >+movd        [r0 + 2 * r1], m0
> >+
> >+lea         r0,            [r0 + 2 * r1]
> >+lea         r2,            [r2 + 2 * r4]
> >+lea         r3,            [r3 + 2 * r5]
> >+
> >+movd        m0,            [r2 + r4]
> >+pmovzxbw    m0,            m0
> >+movh        m1,            [r3 + r5]
> >+
> >+paddw       m0,            m1
> >+packuswb    m0,            m0
> >+
> >+movd        [r0 + r1],
> m0
> >+
> >+RET
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131120/d9d529d4/attachment.html>


More information about the x265-devel mailing list