[x265] [PATCH] added csp support for blpckcopy_ps

Steve Borho steve at borho.org
Mon Nov 18 21:42:21 CET 2013


On Nov 18, 2013, at 9:23 AM, praveen at multicorewareinc.com wrote:

> # HG changeset patch
> # User Praveen Tiwari
> # Date 1384788209 -19800
> # Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9
> # Parent  b353d170c54f0e33a8869c413be226a48deb1f5c
> added csp support for blpckcopy_ps
> 
> diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp
> --- a/source/common/pixel.cpp	Mon Nov 18 19:34:07 2013 +0530
> +++ b/source/common/pixel.cpp	Mon Nov 18 20:53:29 2013 +0530
> @@ -837,7 +837,7 @@
> #define CHROMA(W, H) \
>  p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
>  p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
> -    p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
> +    p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
>  p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>;
> 
> #define LUMA(W, H) \
> diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h
> --- a/source/common/primitives.h	Mon Nov 18 19:34:07 2013 +0530
> +++ b/source/common/primitives.h	Mon Nov 18 20:53:29 2013 +0530
> @@ -247,7 +247,7 @@
>  copy_sp_t       luma_copy_sp[NUM_LUMA_PARTITIONS];
>  copy_sp_t       chroma_copy_sp[NUM_CHROMA_PARTITIONS];
>  copy_ps_t       luma_copy_ps[NUM_LUMA_PARTITIONS];
> -    copy_ps_t       chroma_copy_ps[NUM_CHROMA_PARTITIONS];
> +    copy_ps_t       chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS];
> 
>  pixel_sub_ps_t  luma_sub_ps[NUM_LUMA_PARTITIONS];
>  pixel_sub_ps_t  chroma_sub_ps[NUM_CHROMA_PARTITIONS];
> diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp	Mon Nov 18 19:34:07 2013 +0530
> +++ b/source/common/x86/asm-primitives.cpp	Mon Nov 18 20:53:29 2013 +0530
> @@ -141,7 +141,6 @@
>  p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu; \
>  p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu; \
>  p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## H ## cpu; \
> -    p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x ## H ## cpu; \
>  p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu;
> 
> #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \
> @@ -380,6 +379,36 @@
>  SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \
>  SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu);
> 
> +#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \
> +    p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu;
> +
> +// For X265_CSP_I420 chroma width and height will be half of luma width and height
> +#define CHROMA_BLOCKCOPY_SSE4(cpu) \

When the macro accepts a cpu type argument, adding SSE4 to the name is redundant (and confusing)
there should probably be a generic I420 macro that maps luma blocks to I420 blocks so adding more color spaces does not multiply amount of code in this file

> +    SETUP_CHROMA_FROM_LUMA_SSE4(8,   8, 4,  4,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(8,   4, 4,  2,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(4,   8, 2,  4,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(16, 16, 8,  8,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(16,  8, 8,  4,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(8,  16, 4,  8,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(16, 12, 8,  6,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(12, 16, 6,  8,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(16,  4, 8,  2,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(4,  16, 2,  8,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(32, 32, 16, 16, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(32, 16, 16, 8,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(16, 32, 8,  16, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(32, 24, 16, 12, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(24, 32, 12, 16, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(32,  8, 16, 4,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(8,  32, 4,  16, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(64, 64, 32, 32, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(64, 32, 32, 16, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(32, 64, 16, 32, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(64, 48, 32, 24, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(48, 64, 24, 32, cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(64, 16, 32, 8,  cpu); \
> +    SETUP_CHROMA_FROM_LUMA_SSE4(16, 64, 8,  32, cpu);
> +
> using namespace x265;
> 
> namespace {
> @@ -591,6 +620,7 @@
>      CHROMA_FILTERS(_sse4);
>      LUMA_FILTERS(_sse4);
>      HEVC_SATD(sse4);
> +        CHROMA_BLOCKCOPY_SSE4(_sse4);
>      p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
>      p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
>      p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
> diff -r b353d170c54f -r 59646d515e79 source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp	Mon Nov 18 19:34:07 2013 +0530
> +++ b/source/test/pixelharness.cpp	Mon Nov 18 20:53:29 2013 +0530
> @@ -763,12 +763,15 @@
>      }
>  }
> 
> -    if (opt.chroma_copy_ps[part])
> +    for(int i = 0; i < NUM_CSP; i++)

white-space

>  {
> -        if (!check_block_copy_ps(ref.chroma_copy_ps[part], opt.chroma_copy_ps[part]))
> +        if (opt.chroma_copy_ps[i][part])
>      {
> -            printf("chroma_copy_ps[%s] failed\n", chromaPartStr[part]);
> -            return false;
> +            if (!check_block_copy_ps(ref.chroma_copy_ps[i][part], opt.chroma_copy_ps[i][part]))
> +            {
> +                 printf("chroma_copy_ps[%s][%s] failed\n", colorSpaceNames[i], chromaPartStr[part]);
> +                 return false;
> +            }
>      }
>  }
> 
> @@ -1051,10 +1054,13 @@
>      REPORT_SPEEDUP(opt.luma_copy_ps[part], ref.luma_copy_ps[part], sbuf1, 64, pbuf1, 128);
>  }
> 
> -    if (opt.chroma_copy_ps[part])
> +    for (int i = 0; i < NUM_CSP; i++)
>  {
> -        printf("ccpy_ps[%s]", chromaPartStr[part]);
> -        REPORT_SPEEDUP(opt.chroma_copy_ps[part], ref.chroma_copy_ps[part], sbuf1, 64, pbuf1, 128);
> +        if (opt.chroma_copy_ps[i][part])
> +        {
> +            printf("ccpy_ps[%s][%s]", colorSpaceNames[i], chromaPartStr[part]);
> +            REPORT_SPEEDUP(opt.chroma_copy_ps[i][part], ref.chroma_copy_ps[i][part], sbuf1, 64, pbuf1, 128);
> +        }
>  }
> 
>  if (opt.luma_sub_ps[part])
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 842 bytes
Desc: Message signed with OpenPGP using GPGMail
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131118/29b6e860/attachment.sig>


More information about the x265-devel mailing list