[x265-commits] [x265] api: add error message and checks in api_get()

Deepthi Nandakumar deepthi at multicorewareinc.com
Mon May 4 21:14:36 CEST 2015


details:   http://hg.videolan.org/x265/rev/4e94cc04f64c
branches:  
changeset: 10355:4e94cc04f64c
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Thu Apr 30 13:57:44 2015 +0530
description:
api: add error message and checks in api_get()
Subject: [x265] asm: chroma_hps[48x64] for i444 - improved 16378c->11853c

details:   http://hg.videolan.org/x265/rev/322f57fddd64
branches:  
changeset: 10356:322f57fddd64
user:      Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
date:      Mon May 04 09:58:06 2015 +0530
description:
asm: chroma_hps[48x64] for i444 - improved 16378c->11853c
Subject: [x265] asm: chroma_hps[24x64] for i422 - improved 8428c->6562c

details:   http://hg.videolan.org/x265/rev/b8b005852aba
branches:  
changeset: 10357:b8b005852aba
user:      Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
date:      Mon May 04 10:09:46 2015 +0530
description:
asm: chroma_hps[24x64] for i422 - improved 8428c->6562c
Subject: [x265] asm: chroma_hps[2x16] for i422 - improved 796c->559c

details:   http://hg.videolan.org/x265/rev/f1ec8c787d6d
branches:  
changeset: 10358:f1ec8c787d6d
user:      Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
date:      Mon May 04 10:12:40 2015 +0530
description:
asm: chroma_hps[2x16] for i422 - improved 796c->559c
Subject: [x265] asm: use x264 code for sse2 sad[16xN] except for 16x64

details:   http://hg.videolan.org/x265/rev/600ac3a306ee
branches:  
changeset: 10359:600ac3a306ee
user:      Sumalatha Polureddy
date:      Mon May 04 14:08:10 2015 +0530
description:
asm: use x264 code for sse2 sad[16xN] except for 16x64
Subject: [x265] asm: use x264 code for sse2 sad[16x64]

details:   http://hg.videolan.org/x265/rev/269c8ec81c4f
branches:  
changeset: 10360:269c8ec81c4f
user:      Sumalatha Polureddy
date:      Mon May 04 15:28:27 2015 +0530
description:
asm: use x264 code for sse2 sad[16x64]
Subject: [x265] doc: give a more useful master-display example

details:   http://hg.videolan.org/x265/rev/0825962a357e
branches:  
changeset: 10361:0825962a357e
user:      Steve Borho <steve at borho.org>
date:      Sat May 02 11:11:04 2015 -0500
description:
doc: give a more useful master-display example
Subject: [x265] sei: add content light level info SEI

details:   http://hg.videolan.org/x265/rev/5d1b7ed7ad18
branches:  
changeset: 10362:5d1b7ed7ad18
user:      Steve Borho <steve at borho.org>
date:      Sat May 02 14:03:26 2015 -0500
description:
sei: add content light level info SEI
Subject: [x265] api: fixup master-display docs

details:   http://hg.videolan.org/x265/rev/48364a429386
branches:  
changeset: 10363:48364a429386
user:      Steve Borho <steve at borho.org>
date:      Sat May 02 14:03:47 2015 -0500
description:
api: fixup master-display docs
Subject: [x265] search: move up selectMVP() in file, no behavior change

details:   http://hg.videolan.org/x265/rev/4e97d12a3a94
branches:  
changeset: 10364:4e97d12a3a94
user:      Steve Borho <steve at borho.org>
date:      Sat May 02 11:37:41 2015 -0500
description:
search: move up selectMVP() in file, no behavior change
Subject: [x265] cudata: add comment for clipMv

details:   http://hg.videolan.org/x265/rev/1b0425662a64
branches:  
changeset: 10365:1b0425662a64
user:      Steve Borho <steve at borho.org>
date:      Sat May 02 11:47:01 2015 -0500
description:
cudata: add comment for clipMv
Subject: [x265] cli: header nit

details:   http://hg.videolan.org/x265/rev/c75df34cf90d
branches:  
changeset: 10366:c75df34cf90d
user:      Steve Borho <steve at borho.org>
date:      Mon May 04 12:53:03 2015 -0500
description:
cli: header nit

Our include policy is that any file which includes any headers from common/
or encoder/ should include common.h first.  Most of our internal headers also
include common.h first (as they should) but we should not rely upon that in
x265.cpp
Subject: [x265] cli: nit

details:   http://hg.videolan.org/x265/rev/28d7e7582028
branches:  
changeset: 10367:28d7e7582028
user:      Steve Borho <steve at borho.org>
date:      Mon May 04 12:57:08 2015 -0500
description:
cli: nit

diffstat:

 doc/reST/cli.rst                     |   22 ++-
 source/CMakeLists.txt                |    2 +-
 source/common/cudata.cpp             |    4 +
 source/common/param.cpp              |    1 +
 source/common/x86/asm-primitives.cpp |    3 +
 source/common/x86/ipfilter8.asm      |  259 +++++++++++++++++++++++++++++++++++
 source/common/x86/sad16-a.asm        |   76 ++++++++-
 source/encoder/api.cpp               |   17 ++
 source/encoder/encoder.cpp           |   15 ++
 source/encoder/search.cpp            |   56 +++---
 source/encoder/sei.h                 |   27 +++
 source/x265.cpp                      |    5 +-
 source/x265.h                        |   11 +-
 source/x265cli.h                     |    4 +-
 14 files changed, 455 insertions(+), 47 deletions(-)

diffs (truncated from 739 to 300 lines):

diff -r 4cf55e54fe3e -r 28d7e7582028 doc/reST/cli.rst
--- a/doc/reST/cli.rst	Sat May 02 10:58:05 2015 -0500
+++ b/doc/reST/cli.rst	Mon May 04 12:57:08 2015 -0500
@@ -1496,12 +1496,28 @@ VUI fields must be manually specified.
 	string format is "G(%hu,%hu)B(%hu,%hu)R(%hu,%hu)WP(%hu,%hu)L(%u,%u)"
 	where %hu are unsigned 16bit integers and %u are unsigned 32bit
 	integers. The SEI includes X,Y display primaries for RGB channels,
-	white point X,Y and max,min luminance values.
+	white point X,Y and max,min luminance values. (HDR)
 
-	Example: G(10,12)B(5,13)R(5,13)WP(100,100)L(1000,100)
+	Example for P65D3 1000-nits:
+
+		G(13200,34500)B(7500,3000)R(34000,16000)WP(15635,16450)L(10000000,1)
 
 	Note that this string value will need to be escaped or quoted to
-	protect against shell expansion on many platforms
+	protect against shell expansion on many platforms. No default.
+
+.. option:: --max-cll <string>
+
+	Maximum content light level and maximum frame average light level as
+	required by the Consumer Electronics Association 861.3 specification.
+
+	Specified as a string which is parsed when the stream header SEI are
+	emitted. The string format is "%hu,%hu" where %hu are unsigned 16bit
+	integers. The first value is the max content light level (or 0 if no
+	maximum is indicated), the second value is the maximum picture
+	average light level (or 0). (HDR)
+
+	Note that this string value will need to be escaped or quoted to
+	protect against shell expansion on many platforms. No default.
 
 Bitstream options
 =================
diff -r 4cf55e54fe3e -r 28d7e7582028 source/CMakeLists.txt
--- a/source/CMakeLists.txt	Sat May 02 10:58:05 2015 -0500
+++ b/source/CMakeLists.txt	Mon May 04 12:57:08 2015 -0500
@@ -30,7 +30,7 @@ option(STATIC_LINK_CRT "Statically link 
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 56)
+set(X265_BUILD 57)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 4cf55e54fe3e -r 28d7e7582028 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Sat May 02 10:58:05 2015 -0500
+++ b/source/common/cudata.cpp	Mon May 04 12:57:08 2015 -0500
@@ -1830,6 +1830,10 @@ void CUData::getInterNeighbourMV(InterNe
     }
 }
 
+/* Clip motion vector to within slightly padded boundary of picture (the
+ * MV may reference a block that is completely within the padded area).
+ * Note this function is unaware of how much of this picture is actually
+ * available for use (re: frame parallelism) */
 void CUData::clipMv(MV& outMV) const
 {
     const uint32_t mvshift = 2;
diff -r 4cf55e54fe3e -r 28d7e7582028 source/common/param.cpp
--- a/source/common/param.cpp	Sat May 02 10:58:05 2015 -0500
+++ b/source/common/param.cpp	Mon May 04 12:57:08 2015 -0500
@@ -852,6 +852,7 @@ int x265_param_parse(x265_param* p, cons
     OPT("analysis-file") p->analysisFileName = strdup(value);
     OPT("qg-size") p->rc.qgSize = atoi(value);
     OPT("master-display") p->masteringDisplayColorVolume = strdup(value);
+    OPT("max-cll") p->contentLightLevelInfo = strdup(value);
     else
         return X265_PARAM_BAD_NAME;
 #undef OPT
diff -r 4cf55e54fe3e -r 28d7e7582028 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Sat May 02 10:58:05 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp	Mon May 04 12:57:08 2015 -0500
@@ -2442,6 +2442,8 @@ void setupAssemblyPrimitives(EncoderPrim
         p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_hps = x265_interp_4tap_horiz_ps_32x48_avx2;
 
         p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].filter_hps = x265_interp_4tap_horiz_ps_2x8_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_hps = x265_interp_4tap_horiz_ps_24x64_avx2;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].filter_hps = x265_interp_4tap_horiz_ps_2x16_avx2;
 
         //i444 chroma_hps
         p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_hps = x265_interp_4tap_horiz_ps_64x32_avx2;
@@ -2468,6 +2470,7 @@ void setupAssemblyPrimitives(EncoderPrim
         p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_hps = x265_interp_4tap_horiz_ps_16x64_avx2;
 
         p.chroma[X265_CSP_I444].pu[LUMA_24x32].filter_hps = x265_interp_4tap_horiz_ps_24x32_avx2;
+        p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_hps = x265_interp_4tap_horiz_ps_48x64_avx2;
 
         p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_hps = x265_interp_4tap_horiz_ps_32x16_avx2;
         p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_hps = x265_interp_4tap_horiz_ps_32x64_avx2;
diff -r 4cf55e54fe3e -r 28d7e7582028 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Sat May 02 10:58:05 2015 -0500
+++ b/source/common/x86/ipfilter8.asm	Mon May 04 12:57:08 2015 -0500
@@ -291,6 +291,9 @@ const interp4_hpp_shuf,     times 2 db 0
 
 const interp8_hps_shuf,     dd 0, 4, 1, 5, 2, 6, 3, 7
 
+ALIGN 32
+interp4_hps_shuf: times 2 db 0, 1, 2, 3, 1, 2, 3, 4, 8, 9, 10, 11, 9, 10, 11, 12
+
 SECTION .text
 
 cextern pb_128
@@ -24076,3 +24079,259 @@ cglobal interp_4tap_horiz_pp_48x64, 4,6,
     dec               r4d
     jnz               .loop
     RET
+
+;-----------------------------------------------------------------------------------------------------------------------------
+; void interp_4tap_horiz_ps_48x64(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx, int isRowExt)
+;-----------------------------------------------------------------------------------------------------------------------------;
+
+INIT_YMM avx2
+cglobal interp_4tap_horiz_ps_48x64, 4,7,6
+    mov             r4d, r4m
+    mov             r5d, r5m
+    add             r3d, r3d
+
+%ifdef PIC
+    lea               r6,           [tab_ChromaCoeff]
+    vpbroadcastd      m0,           [r6 + r4 * 4]
+%else
+    vpbroadcastd      m0,           [tab_ChromaCoeff + r4 * 4]
+%endif
+
+    vbroadcasti128     m2,          [pw_1]
+    vbroadcasti128     m5,          [pw_2000]
+    mova               m1,          [tab_Tm]
+
+    ; register map
+    ; m0 - interpolate coeff
+    ; m1 - shuffle order table
+    ; m2 - constant word 1
+    mov               r6d,          64
+    dec               r0
+    test              r5d,          r5d
+    je                .loop
+    sub               r0 ,          r1
+    add               r6d ,         3
+
+.loop
+    ; Row 0
+    vbroadcasti128    m3,           [r0]                           ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb            m3,           m1
+    pmaddubsw         m3,           m0
+    pmaddwd           m3,           m2
+    vbroadcasti128    m4,           [r0 + 8]                       ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb            m4,           m1
+    pmaddubsw         m4,           m0
+    pmaddwd           m4,           m2
+
+    packssdw          m3,           m4
+    psubw             m3,           m5
+    vpermq            m3,           m3,          q3120
+    movu              [r2],         m3
+
+    vbroadcasti128    m3,           [r0 + 16]                      ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb            m3,           m1
+    pmaddubsw         m3,           m0
+    pmaddwd           m3,           m2
+    vbroadcasti128    m4,           [r0 + 24]                      ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb            m4,           m1
+    pmaddubsw         m4,           m0
+    pmaddwd           m4,           m2
+
+    packssdw          m3,           m4
+    psubw             m3,           m5
+    vpermq            m3,           m3,          q3120
+    movu              [r2 + 32],    m3
+
+    vbroadcasti128    m3,           [r0 + 32]                      ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb            m3,           m1
+    pmaddubsw         m3,           m0
+    pmaddwd           m3,           m2
+    vbroadcasti128    m4,           [r0 + 40]                      ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb            m4,           m1
+    pmaddubsw         m4,           m0
+    pmaddwd           m4,           m2
+
+    packssdw          m3,           m4
+    psubw             m3,           m5
+    vpermq            m3,           m3,          q3120
+    movu              [r2 + 64],    m3
+
+    add               r2,          r3
+    add               r0,          r1
+    dec               r6d
+    jnz               .loop
+    RET
+
+;-----------------------------------------------------------------------------------------------------------------------------
+; void interp_4tap_horiz_ps_24x64(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx, int isRowExt)
+;-----------------------------------------------------------------------------------------------------------------------------
+INIT_YMM avx2
+cglobal interp_4tap_horiz_ps_24x64, 4,7,6
+    mov                r4d,            r4m
+    mov                r5d,            r5m
+    add                r3d,            r3d
+%ifdef PIC
+    lea                r6,             [tab_ChromaCoeff]
+    vpbroadcastd       m0,             [r6 + r4 * 4]
+%else
+    vpbroadcastd       m0,             [tab_ChromaCoeff + r4 * 4]
+%endif
+    vbroadcasti128     m2,             [pw_1]
+    vbroadcasti128     m5,             [pw_2000]
+    mova               m1,             [tab_Tm]
+
+    ; register map
+    ; m0 - interpolate coeff
+    ; m1 - shuffle order table
+    ; m2 - constant word 1
+    mov                r6d,            64
+    dec                r0
+    test               r5d,            r5d
+    je                 .loop
+    sub                r0 ,            r1
+    add                r6d ,           3
+
+.loop
+    ; Row 0
+    vbroadcasti128     m3,             [r0]                          ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb             m3,             m1
+    pmaddubsw          m3,             m0
+    pmaddwd            m3,             m2
+    vbroadcasti128     m4,             [r0 + 8]                      ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb             m4,             m1
+    pmaddubsw          m4,             m0
+    pmaddwd            m4,             m2
+    packssdw           m3,             m4
+    psubw              m3,             m5
+    vpermq             m3,             m3,          q3120
+    movu               [r2],           m3
+
+    vbroadcasti128     m3,             [r0 + 16]                     ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb             m3,             m1
+    pmaddubsw          m3,             m0
+    pmaddwd            m3,             m2
+    packssdw           m3,             m3
+    psubw              m3,             m5
+    vpermq             m3,             m3,          q3120
+    movu               [r2 + 32],      xm3
+
+    add                r2,             r3
+    add                r0,             r1
+    dec                r6d
+    jnz                .loop
+    RET
+
+INIT_YMM avx2
+cglobal interp_4tap_horiz_ps_2x16, 4, 7, 7
+    mov               r4d,           r4m
+    mov               r5d,           r5m
+    add               r3d,           r3d
+
+%ifdef PIC
+    lea               r6,            [tab_ChromaCoeff]
+    vpbroadcastd      m0,            [r6 + r4 * 4]
+%else
+    vpbroadcastd      m0,            [tab_ChromaCoeff + r4 * 4]
+%endif
+    vbroadcasti128    m6,            [pw_2000]
+    test              r5d,            r5d
+    jz                .label
+    sub               r0,             r1
+
+.label
+    mova              m4,            [interp4_hps_shuf]
+    mova              m5,            [pw_1]
+    dec               r0
+    lea               r4,            [r1 * 3]
+    movq              xm1,           [r0]                                   ;row 0
+    movhps            xm1,           [r0 + r1]
+    movq              xm2,           [r0 + r1 * 2]
+    movhps            xm2,           [r0 + r4]
+    vinserti128       m1,            m1,           xm2,          1
+    lea               r0,            [r0 + r1 * 4]
+    movq              xm3,           [r0]
+    movhps            xm3,           [r0 + r1]
+    movq              xm2,           [r0 + r1 * 2]
+    movhps            xm2,           [r0 + r4]
+    vinserti128       m3,            m3,           xm2,          1
+
+    pshufb            m1,            m4
+    pshufb            m3,            m4
+    pmaddubsw         m1,            m0
+    pmaddubsw         m3,            m0
+    pmaddwd           m1,            m5
+    pmaddwd           m3,            m5
+    packssdw          m1,            m3
+    psubw             m1,            m6
+
+    lea               r4,            [r3 * 3]
+    vextracti128      xm2,           m1,           1
+
+    movd              [r2],          xm1
+    pextrd            [r2 + r3],     xm1,          1
+    movd              [r2 + r3 * 2], xm2


More information about the x265-commits mailing list