[x265] asm: fix dequant_normal

Steve Borho steve at borho.org
Sat Aug 30 09:43:25 CEST 2014


On 08/30, Satoshi Nakagawa wrote:
> > How about remove '#if...'?
> > The asm code didn't check it.
> 
> added '%if...' to asm code :)
> 
> 
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1409378187 -32400
> #      Sat Aug 30 14:56:27 2014 +0900
> # Node ID c4f15840feb443f8c38ba58b52ef5ba6d518e626
> # Parent  4e2d9ac6d489e82e70544d626c89964ee653c452
> asm: fix dequant_normal

Queued for stable, thanks

> diff -r 4e2d9ac6d489 -r c4f15840feb4 source/common/dct.cpp
> --- a/source/common/dct.cpp	Fri Aug 29 11:12:49 2014 +0200
> +++ b/source/common/dct.cpp	Sat Aug 30 14:56:27 2014 +0900
> @@ -720,7 +720,9 @@
>  
>  void dequant_normal_c(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
>  {
> -#if !HIGH_BIT_DEPTH
> +#if HIGH_BIT_DEPTH
> +    X265_CHECK(scale < 32768 || ((scale & 3) == 0 && shift > 2), "dequant invalid scale %d\n", scale);
> +#else
>      // NOTE: maximum of scale is (72 * 256)
>      X265_CHECK(scale < 32768, "dequant invalid scale %d\n", scale);
>  #endif
> diff -r 4e2d9ac6d489 -r c4f15840feb4 source/common/x86/pixel-util8.asm
> --- a/source/common/x86/pixel-util8.asm	Fri Aug 29 11:12:49 2014 +0200
> +++ b/source/common/x86/pixel-util8.asm	Sat Aug 30 14:56:27 2014 +0900
> @@ -1005,23 +1005,23 @@
>  ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift)
>  ;-----------------------------------------------------------------------------
>  INIT_XMM sse4
> -cglobal dequant_normal, 4,5,5
> -    movd        m1, r3             ; m1 = word [scale]
> +cglobal dequant_normal, 5,5,5
> +    movd        m1, r3              ; m1 = word [scale]
> +    mova        m2, [pw_1]
> +%if HIGH_BIT_DEPTH
>      cmp         r3d, 32767
>      jle         .skip
> -
>      psrld       m1, 2
> -    mov         r4d, r4m
> +    sub         r4d, 2
> +.skip:
> +%endif
>      movd        m0, r4d             ; m0 = shift
>      xor         r3d, r3d
>      dec         r4d
>      bts         r3d, r4d
> -    movd        m2, r3d
> -    punpcklwd   m1, m2
> +    movd        m3, r3d
> +    punpcklwd   m1, m3
>      pshufd      m1, m1, 0           ; m1 = dword [add scale]
> -    mova        m2, [pw_1]
> -    mov         r2d, r2m
> -
>      ; m0 = shift
>      ; m1 = scale
>      ; m2 = word [1]
> @@ -1029,45 +1029,6 @@
>      movu        m3, [r0]
>      movu        m4, [r0 + 16]
>      packssdw    m3, m4              ; m3 = clipQCoef
> -    psllw       m3, 2
> -    punpckhwd   m4, m3, m2
> -    punpcklwd   m3, m2
> -    pmaddwd     m3, m1              ; m3 = dword (clipQCoef * scale + add)
> -    pmaddwd     m4, m1
> -    psrad       m3, m0
> -    psrad       m4, m0
> -    packssdw    m3, m3              ; OPT_ME: store must be 32 bits
> -    pmovsxwd    m3, m3
> -    packssdw    m4, m4
> -    pmovsxwd    m4, m4
> -    movu        [r1], m3
> -    movu        [r1 + 16], m4
> -
> -    add         r0, 32
> -    add         r1, 32
> -
> -    sub         r2d, 8
> -    jnz        .loop
> -    jz         .end
> -
> -.skip:
> -    mov         r4d, r4m
> -    movd        m0, r4d             ; m0 = shift
> -    xor         r3d, r3d
> -    dec         r4d
> -    bts         r3d, r4d
> -    movd        m2, r3d
> -    punpcklwd   m1, m2
> -    pshufd      m1, m1, 0           ; m1 = dword [add scale]
> -    mova        m2, [pw_1]
> -    mov         r2d, r2m
> -    ; m0 = shift
> -    ; m1 = scale
> -    ; m2 = word [1]
> -.sloop:
> -    movu        m3, [r0]
> -    movu        m4, [r0 + 16]
> -    packssdw    m3, m4              ; m3 = clipQCoef
>      punpckhwd   m4, m3, m2
>      punpcklwd   m3, m2
>      pmaddwd     m3, m1              ; m3 = dword (clipQCoef * scale + add)
> @@ -1085,8 +1046,7 @@
>      add         r1, 32
>  
>      sub         r2d, 8
> -    jnz        .sloop
> -.end:
> +    jnz        .loop
>      RET
>  
>  
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list