[x265-commits] [x265] asm: avx2 code for sse_ss[32x32] for 10 bpp (1935 -> 1030)

Sumalatha at videolan.org Sumalatha at videolan.org
Thu May 21 16:54:11 CEST 2015


details:   http://hg.videolan.org/x265/rev/6b9ce45b2693
branches:  
changeset: 10510:6b9ce45b2693
user:      Sumalatha Polureddy
date:      Thu May 21 11:58:50 2015 +0530
description:
asm: avx2 code for sse_ss[32x32] for 10 bpp (1935 -> 1030)

sse2:
sse_ss[32x32]  4.71x    1935.46         9113.11

avx2:
sse_ss[32x32]  8.85x    1030.08         9112.05
Subject: [x265] asm: avx2 code for sse_ss[64x64] for 10 bpp (7039 -> 4051)

details:   http://hg.videolan.org/x265/rev/8c3bf404906e
branches:  
changeset: 10511:8c3bf404906e
user:      Sumalatha Polureddy
date:      Thu May 21 12:04:46 2015 +0530
description:
asm: avx2 code for sse_ss[64x64] for 10 bpp (7039 -> 4051)

sse2:
sse_ss[64x64]  4.63x    7039.23         32592.51

avx2:
sse_ss[64x64]  8.58x    4051.57         34751.46
Subject: [x265] Regression Test: added new command line --ref-limits in regression-tests.txt

details:   http://hg.videolan.org/x265/rev/b7d3c37cf388
branches:  
changeset: 10512:b7d3c37cf388
user:      Ashok Kumar Mishra<ashok at multicorewareinc.com>
date:      Wed May 20 18:16:40 2015 +0530
description:
Regression Test: added new command line --ref-limits in regression-tests.txt
Subject: [x265] encoder: fix line feed issue

details:   http://hg.videolan.org/x265/rev/234bc93bd516
branches:  
changeset: 10513:234bc93bd516
user:      Gopu Govindaswamy <gopu at multicorewareinc.com>
date:      Thu May 21 16:34:48 2015 +0530
description:
encoder: fix line feed issue

diffstat:

 source/common/x86/asm-primitives.cpp |   2 ++
 source/common/x86/ssd-a.asm          |   2 ++
 source/encoder/encoder.cpp           |   2 +-
 source/test/regression-tests.txt     |  26 +++++++++++++-------------
 4 files changed, 18 insertions(+), 14 deletions(-)

diffs (130 lines):

diff -r dc4fcfc574ad -r 234bc93bd516 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed May 20 12:17:44 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp	Thu May 21 16:34:48 2015 +0530
@@ -1259,6 +1259,8 @@ void setupAssemblyPrimitives(EncoderPrim
 
         p.cu[BLOCK_32x32].ssd_s = x265_pixel_ssd_s_32_avx2;
         p.cu[BLOCK_16x16].sse_ss = x265_pixel_ssd_ss_16x16_avx2;
+        p.cu[BLOCK_32x32].sse_ss = x265_pixel_ssd_ss_32x32_avx2;
+        p.cu[BLOCK_64x64].sse_ss = x265_pixel_ssd_ss_64x64_avx2;
 
         p.quant = x265_quant_avx2;
         p.nquant = x265_nquant_avx2;
diff -r dc4fcfc574ad -r 234bc93bd516 source/common/x86/ssd-a.asm
--- a/source/common/x86/ssd-a.asm	Wed May 20 12:17:44 2015 -0500
+++ b/source/common/x86/ssd-a.asm	Thu May 21 16:34:48 2015 +0530
@@ -347,6 +347,8 @@ SSD_TWO    64, 64
 INIT_YMM avx2
 SSD_ONE    16,  8
 SSD_ONE    16, 16
+SSD_ONE    32, 32
+SSD_ONE    64, 64
 %endif ; HIGH_BIT_DEPTH
 
 ;-----------------------------------------------------------------------------
diff -r dc4fcfc574ad -r 234bc93bd516 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Wed May 20 12:17:44 2015 -0500
+++ b/source/encoder/encoder.cpp	Thu May 21 16:34:48 2015 +0530
@@ -1795,7 +1795,7 @@ void Encoder::configure(x265_param *p)
 
     if (p->analysisMode && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation))
     {
-        x265_log(p, X265_LOG_ERROR, "Analysis load/save options incompatible with pmode/pme");
+        x265_log(p, X265_LOG_WARNING, "Analysis load/save options incompatible with pmode/pme Disabling pmod/pme\n");
         p->bDistributeMotionEstimation = p->bDistributeModeAnalysis = 0;
     }
 
diff -r dc4fcfc574ad -r 234bc93bd516 source/test/regression-tests.txt
--- a/source/test/regression-tests.txt	Wed May 20 12:17:44 2015 -0500
+++ b/source/test/regression-tests.txt	Thu May 21 16:34:48 2015 +0530
@@ -14,18 +14,18 @@
 BasketballDrive_1920x1080_50.y4m,--preset faster --aq-strength 2 --merange 190
 BasketballDrive_1920x1080_50.y4m,--preset medium --ctu 16 --max-tu-size 8 --subme 7 --qg-size 32
 BasketballDrive_1920x1080_50.y4m,--preset medium --keyint -1 --nr-inter 100 -F4 --no-sao
-BasketballDrive_1920x1080_50.y4m,--preset slow --nr-intra 100 -F4 --aq-strength 3 --qg-size 16
+BasketballDrive_1920x1080_50.y4m,--preset slow --nr-intra 100 -F4 --aq-strength 3 --qg-size 16 --limit-refs 1
 BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc 3 --subme 0
 BasketballDrive_1920x1080_50.y4m,--preset superfast --psy-rd 1 --ctu 16 --no-wpp
 BasketballDrive_1920x1080_50.y4m,--preset ultrafast --signhide --colormatrix bt709
 BasketballDrive_1920x1080_50.y4m,--preset veryfast --tune zerolatency --no-temporal-mvp
 BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless --pmode
 Coastguard-4k.y4m,--preset medium --rdoq-level 1 --tune ssim --no-signhide --me umh
-Coastguard-4k.y4m,--preset slow --tune psnr --cbqpoffs -1 --crqpoffs 1
+Coastguard-4k.y4m,--preset slow --tune psnr --cbqpoffs -1 --crqpoffs 1 --limit-refs 1
 Coastguard-4k.y4m,--preset superfast --tune grain --overscan=crop
 CrowdRun_1920x1080_50_10bit_422.yuv,--preset fast --aq-mode 0 --sar 2 --range full
 CrowdRun_1920x1080_50_10bit_422.yuv,--preset faster --max-tu-size 4 --min-cu-size 32
-CrowdRun_1920x1080_50_10bit_422.yuv,--preset medium --no-wpp --no-cutree --no-strong-intra-smoothing
+CrowdRun_1920x1080_50_10bit_422.yuv,--preset medium --no-wpp --no-cutree --no-strong-intra-smoothing --limit-refs 1
 CrowdRun_1920x1080_50_10bit_422.yuv,--preset slow --no-wpp --tune ssim --transfer smpte240m
 CrowdRun_1920x1080_50_10bit_422.yuv,--preset slower --tune ssim --tune fastdecode
 CrowdRun_1920x1080_50_10bit_422.yuv,--preset superfast --weightp --no-wpp --sao
@@ -34,10 +34,10 @@ CrowdRun_1920x1080_50_10bit_422.yuv,--pr
 CrowdRun_1920x1080_50_10bit_444.yuv,--preset medium --dither --keyint -1 --rdoq-level 1
 CrowdRun_1920x1080_50_10bit_444.yuv,--preset superfast --weightp --dither --no-psy-rd
 CrowdRun_1920x1080_50_10bit_444.yuv,--preset ultrafast --weightp --no-wpp --no-open-gop
-CrowdRun_1920x1080_50_10bit_444.yuv,--preset veryfast --temporal-layers --repeat-headers
+CrowdRun_1920x1080_50_10bit_444.yuv,--preset veryfast --temporal-layers --repeat-headers --limit-refs 2
 CrowdRun_1920x1080_50_10bit_444.yuv,--preset veryslow --tskip --tskip-fast --no-scenecut
 DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset medium --tune psnr --bframes 16
-DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset slow --temporal-layers --no-psy-rd --qg-size 32
+DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset slow --temporal-layers --no-psy-rd --qg-size 32 --limit-refs 0
 DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset superfast --weightp --qg-size 16
 DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset medium --nr-inter 500 -F4 --no-psy-rdoq
 DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp --rdoq-level 0
@@ -51,9 +51,9 @@ Kimono1_1920x1080_24_10bit_444.yuv,--pre
 Kimono1_1920x1080_24_10bit_444.yuv,--preset superfast --weightb
 KristenAndSara_1280x720_60.y4m,--preset medium --no-cutree --max-tu-size 16
 KristenAndSara_1280x720_60.y4m,--preset slower --pmode --max-tu-size 8
-KristenAndSara_1280x720_60.y4m,--preset superfast --min-cu-size 16 --qg-size 16
+KristenAndSara_1280x720_60.y4m,--preset superfast --min-cu-size 16 --qg-size 16 --limit-refs 1
 KristenAndSara_1280x720_60.y4m,--preset ultrafast --strong-intra-smoothing
-NebutaFestival_2560x1600_60_10bit_crop.yuv,--preset medium --tune grain
+NebutaFestival_2560x1600_60_10bit_crop.yuv,--preset medium --tune grain --limit-refs 2
 NebutaFestival_2560x1600_60_10bit_crop.yuv,--preset superfast --tune psnr
 News-4k.y4m,--preset medium --tune ssim --no-sao --qg-size 32
 News-4k.y4m,--preset superfast --lookahead-slices 6 --aq-mode 0
@@ -67,15 +67,15 @@ RaceHorses_416x240_30.y4m,--preset mediu
 RaceHorses_416x240_30.y4m,--preset slower --keyint -1 --rdoq-level 0
 RaceHorses_416x240_30.y4m,--preset superfast --no-cutree
 RaceHorses_416x240_30.y4m,--preset veryslow --tskip-fast --tskip
-RaceHorses_416x240_30_10bit.yuv,--preset fast --lookahead-slices 2 --b-intra
+RaceHorses_416x240_30_10bit.yuv,--preset fast --lookahead-slices 2 --b-intra --limit-refs 1
 RaceHorses_416x240_30_10bit.yuv,--preset faster --rdoq-level 0 --dither
 RaceHorses_416x240_30_10bit.yuv,--preset slow --tune grain
-RaceHorses_416x240_30_10bit.yuv,--preset ultrafast --tune psnr
+RaceHorses_416x240_30_10bit.yuv,--preset ultrafast --tune psnr --limit-refs 1
 RaceHorses_416x240_30_10bit.yuv,--preset veryfast --weightb
 RaceHorses_416x240_30_10bit.yuv,--preset placebo
 SteamLocomotiveTrain_2560x1600_60_10bit_crop.yuv,--preset medium --dither
 big_buck_bunny_360p24.y4m,--preset faster --keyint 240 --min-keyint 60 --rc-lookahead 200
-big_buck_bunny_360p24.y4m,--preset medium --keyint 60 --min-keyint 48 --weightb
+big_buck_bunny_360p24.y4m,--preset medium --keyint 60 --min-keyint 48 --weightb --limit-refs 3
 big_buck_bunny_360p24.y4m,--preset slow --psy-rdoq 2.0 --rdoq-level 1 --no-b-intra
 big_buck_bunny_360p24.y4m,--preset superfast --psy-rdoq 2.0
 big_buck_bunny_360p24.y4m,--preset ultrafast --deblock=2
@@ -85,13 +85,13 @@ city_4cif_60fps.y4m,--preset superfast -
 city_4cif_60fps.y4m,--preset slower --scaling-list default
 city_4cif_60fps.y4m,--preset veryslow --rdpenalty 2 --sao-non-deblock --no-b-intra
 ducks_take_off_420_720p50.y4m,--preset fast --deblock 6 --bframes 16 --rc-lookahead 40
-ducks_take_off_420_720p50.y4m,--preset faster --qp 24 --deblock -6
+ducks_take_off_420_720p50.y4m,--preset faster --qp 24 --deblock -6 --limit-refs 2
 ducks_take_off_420_720p50.y4m,--preset medium --tskip --tskip-fast --constrained-intra
 ducks_take_off_420_720p50.y4m,--preset slow --scaling-list default --qp 40
 ducks_take_off_420_720p50.y4m,--preset ultrafast --constrained-intra --rd 1
 ducks_take_off_420_720p50.y4m,--preset veryslow --constrained-intra --bframes 2
 ducks_take_off_444_720p50.y4m,--preset medium --qp 38 --no-scenecut
-ducks_take_off_444_720p50.y4m,--preset superfast --weightp --rd 0
+ducks_take_off_444_720p50.y4m,--preset superfast --weightp --rd 0 --limit-refs 2
 ducks_take_off_444_720p50.y4m,--preset slower --psy-rd 1 --psy-rdoq 2.0 --rdoq-level 1
 mobile_calendar_422_ntsc.y4m,--preset medium --bitrate 500 -F4
 mobile_calendar_422_ntsc.y4m,--preset slower --tskip --tskip-fast
@@ -113,7 +113,7 @@ vtc1nw_422_ntsc.y4m,--preset medium --sc
 vtc1nw_422_ntsc.y4m,--preset slower --nr-inter 1000 -F4 --tune fast-decode --qg-size 16
 vtc1nw_422_ntsc.y4m,--preset superfast --weightp --nr-intra 100 -F4
 washdc_422_ntsc.y4m,--preset faster --rdoq-level 1 --max-merge 5
-washdc_422_ntsc.y4m,--preset medium --no-weightp --max-tu-size 4
+washdc_422_ntsc.y4m,--preset medium --no-weightp --max-tu-size 4 --limit-refs 1
 washdc_422_ntsc.y4m,--preset slower --psy-rdoq 2.0 --rdoq-level 2 --qg-size 32
 washdc_422_ntsc.y4m,--preset superfast --psy-rd 1 --tune zerolatency
 washdc_422_ntsc.y4m,--preset ultrafast --weightp --tu-intra-depth 4


More information about the x265-commits mailing list