[x265-commits] [x265] x265: remove X265_TYPE_KEYFRAME.
Deepthi Nandakumar
deepthi at multicorewareinc.com
Fri Feb 7 19:42:59 CET 2014
details: http://hg.videolan.org/x265/rev/c1cea0534e6b
branches:
changeset: 6052:c1cea0534e6b
user: Deepthi Nandakumar <deepthi at multicorewareinc.com>
date: Fri Feb 07 13:14:45 2014 +0530
description:
x265: remove X265_TYPE_KEYFRAME.
Not used, and not required. IDR/I-slice can be chosen at the outset based on openGOP.
Subject: [x265] asm: fix sad_x4 stress case failure on AVX2
details: http://hg.videolan.org/x265/rev/cfbe679e73dc
branches:
changeset: 6053:cfbe679e73dc
user: Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
date: Fri Feb 07 12:39:17 2014 +0530
description:
asm: fix sad_x4 stress case failure on AVX2
Subject: [x265] asm: correction of function declaration to sse4
details: http://hg.videolan.org/x265/rev/95fc15598e3e
branches:
changeset: 6054:95fc15598e3e
user: Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
date: Fri Feb 07 15:52:20 2014 +0530
description:
asm: correction of function declaration to sse4
Subject: [x265] asm: intra_pred_ang16 code for all remaing modes
details: http://hg.videolan.org/x265/rev/990dbb374285
branches:
changeset: 6055:990dbb374285
user: Murugan Vairavel <murugan at multicorewareinc.com>
date: Fri Feb 07 17:57:39 2014 +0530
description:
asm: intra_pred_ang16 code for all remaing modes
Subject: [x265] vec: remove intra-ssse3.cpp, full assembly coverage
details: http://hg.videolan.org/x265/rev/7da1a8d3bbbe
branches:
changeset: 6056:7da1a8d3bbbe
user: Steve Borho <steve at borho.org>
date: Fri Feb 07 12:23:39 2014 -0600
description:
vec: remove intra-ssse3.cpp, full assembly coverage
Subject: [x265] asm: cleanup unused registers interp_4tap_horiz_pp_2xN and 4xN
details: http://hg.videolan.org/x265/rev/fa9f7b56d4d8
branches:
changeset: 6057:fa9f7b56d4d8
user: Murugan Vairavel <murugan at multicorewareinc.com>
date: Fri Feb 07 21:37:17 2014 +0530
description:
asm: cleanup unused registers interp_4tap_horiz_pp_2xN and 4xN
diffstat:
source/common/CMakeLists.txt | 2 +-
source/common/vec/intra-ssse3.cpp | 573 -----
source/common/vec/vec-primitives.cpp | 2 -
source/common/x86/asm-primitives.cpp | 40 +-
source/common/x86/blockcopy8.asm | 2 +-
source/common/x86/blockcopy8.h | 2 +-
source/common/x86/intrapred.h | 14 +-
source/common/x86/intrapred8.asm | 3324 ++++++++++++++++++++++++++++++++++
source/common/x86/ipfilter8.asm | 48 +-
source/common/x86/mc-a.asm | 22 +-
source/common/x86/pixel.h | 1 -
source/common/x86/sad-a.asm | 18 +-
source/encoder/slicetype.cpp | 2 -
source/x265.h | 2 +-
14 files changed, 3415 insertions(+), 637 deletions(-)
diffs (truncated from 4395 to 300 lines):
diff -r d2d181f1881a -r fa9f7b56d4d8 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Fri Feb 07 00:32:32 2014 -0600
+++ b/source/common/CMakeLists.txt Fri Feb 07 21:37:17 2014 +0530
@@ -60,7 +60,7 @@ if(MSVC)
endif(MSVC)
set(SSE3 vec/dct-sse3.cpp vec/blockcopy-sse3.cpp)
-set(SSSE3 vec/dct-ssse3.cpp vec/intra-ssse3.cpp)
+set(SSSE3 vec/dct-ssse3.cpp)
set(SSE41 vec/dct-sse41.cpp vec/intra-sse41.cpp)
if(MSVC AND X86)
diff -r d2d181f1881a -r fa9f7b56d4d8 source/common/vec/intra-ssse3.cpp
--- a/source/common/vec/intra-ssse3.cpp Fri Feb 07 00:32:32 2014 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,573 +0,0 @@
-/*****************************************************************************
- * Copyright (C) 2013 x265 project
- *
- * Authors: Min Chen <chenm003 at 163.com>
- * Deepthi Devaki <deepthidevaki at multicorewareinc.com>
- * Steve Borho <steve at borho.org>
- * ShinYee Chung <shinyee at multicorewareinc.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing at multicorewareinc.com.
- *****************************************************************************/
-
-#include "primitives.h"
-#include "TLibCommon/TComRom.h"
-#include <assert.h>
-#include <xmmintrin.h> // SSE
-#include <pmmintrin.h> // SSE3
-#include <tmmintrin.h> // SSSE3
-
-using namespace x265;
-
-namespace {
-#if !HIGH_BIT_DEPTH
-const int angAP[17][64] =
-{
- {
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
- },
- {
- 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 8, 9, 10, 11, 12, 13, 13, 14, 15, 16, 17, 17, 18, 19, 20, 21, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, 30, 31, 32, 33, 34, 34, 35, 36, 37, 38, 39, 39, 40, 41, 42, 43, 43, 44, 45, 46, 47, 47, 48, 49, 50, 51, 52
- },
- {
- 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 30, 31, 32, 32, 33, 34, 34, 35, 36, 36, 37, 38, 38, 39, 40, 40, 41, 42
- },
- {
- 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 34
- },
- {
- 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8, 9, 9, 10, 10, 10, 11, 11, 12, 12, 13, 13, 13, 14, 14, 15, 15, 15, 16, 16, 17, 17, 17, 18, 18, 19, 19, 19, 20, 20, 21, 21, 21, 22, 22, 23, 23, 23, 24, 24, 25, 25, 26
- },
- {
- 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18
- },
- {
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10
- },
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
- },
- { // 0th virtual index; never used; just to help indexing
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4
- },
- {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4
- },
- {
- -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, -4, -4, -4, -5, -5, -5, -5, -5, -5, -5, -6, -6, -6, -6, -6, -6, -7, -7, -7, -7, -7, -7, -8, -8, -8, -8, -8, -8, -8, -9, -9, -9, -9, -9, -9, -10, -10, -10, -10, -10, -10, -10
- },
- {
- -1, -1, -1, -2, -2, -2, -2, -3, -3, -3, -4, -4, -4, -4, -5, -5, -5, -6, -6, -6, -6, -7, -7, -7, -8, -8, -8, -8, -9, -9, -9, -9, -10, -10, -10, -11, -11, -11, -11, -12, -12, -12, -13, -13, -13, -13, -14, -14, -14, -15, -15, -15, -15, -16, -16, -16, -17, -17, -17, -17, -18, -18, -18, -18
- },
- {
- -1, -1, -2, -2, -3, -3, -3, -4, -4, -5, -5, -5, -6, -6, -7, -7, -7, -8, -8, -9, -9, -9, -10, -10, -11, -11, -11, -12, -12, -13, -13, -13, -14, -14, -15, -15, -16, -16, -16, -17, -17, -18, -18, -18, -19, -19, -20, -20, -20, -21, -21, -22, -22, -22, -23, -23, -24, -24, -24, -25, -25, -26, -26, -26
- },
- {
- -1, -2, -2, -3, -3, -4, -4, -5, -5, -6, -6, -7, -7, -8, -8, -9, -10, -10, -11, -11, -12, -12, -13, -13, -14, -14, -15, -15, -16, -16, -17, -17, -18, -19, -19, -20, -20, -21, -21, -22, -22, -23, -23, -24, -24, -25, -25, -26, -27, -27, -28, -28, -29, -29, -30, -30, -31, -31, -32, -32, -33, -33, -34, -34
- },
- {
- -1, -2, -2, -3, -4, -4, -5, -6, -6, -7, -8, -8, -9, -10, -10, -11, -12, -12, -13, -14, -14, -15, -16, -16, -17, -18, -18, -19, -20, -20, -21, -21, -22, -23, -23, -24, -25, -25, -26, -27, -27, -28, -29, -29, -30, -31, -31, -32, -33, -33, -34, -35, -35, -36, -37, -37, -38, -39, -39, -40, -41, -41, -42, -42
- },
- {
- -1, -2, -3, -4, -5, -5, -6, -7, -8, -9, -9, -10, -11, -12, -13, -13, -14, -15, -16, -17, -18, -18, -19, -20, -21, -22, -22, -23, -24, -25, -26, -26, -27, -28, -29, -30, -31, -31, -32, -33, -34, -35, -35, -36, -37, -38, -39, -39, -40, -41, -42, -43, -44, -44, -45, -46, -47, -48, -48, -49, -50, -51, -52, -52
- },
- {
- -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64
- }
-};
-
-#define GETAP(X, Y) angAP[8 - (X)][(Y)]
-
-// 16x16
-#define PREDANG_CALCROW_VER(X) \
- LOADROW(row11L, row11H, GETAP(lookIdx, X)); \
- LOADROW(row12L, row12H, GETAP(lookIdx, X) + 1); \
- CALCROW(row11L, row11H, row11L, row11H, row12L, row12H); \
- itmp = _mm_packus_epi16(row11L, row11H); \
- _mm_storeu_si128((__m128i*)(dst + ((X)*dstStride)), itmp);
-
-#define PREDANG_CALCROW_HOR(X, rowx) \
- LOADROW(row11L, row11H, GETAP(lookIdx, (X))); \
- LOADROW(row12L, row12H, GETAP(lookIdx, (X)) + 1); \
- CALCROW(row11L, row11H, row11L, row11H, row12L, row12H); \
- rowx = _mm_packus_epi16(row11L, row11H);
-
-#define LOADROW(ROWL, ROWH, X) \
- itmp = _mm_loadu_si128((__m128i const*)(refMain + 1 + (X))); \
- ROWL = _mm_unpacklo_epi8(itmp, _mm_setzero_si128()); \
- ROWH = _mm_unpackhi_epi8(itmp, _mm_setzero_si128());
-
-#define CALCROW(RESL, RESH, ROW1L, ROW1H, ROW2L, ROW2H) \
- v_deltaPos = _mm_add_epi16(v_deltaPos, v_ipAngle); \
- v_deltaFract = _mm_and_si128(v_deltaPos, thirty1); \
- it1 = _mm_sub_epi16(thirty2, v_deltaFract); \
- it2 = _mm_mullo_epi16(it1, ROW1L); \
- it3 = _mm_mullo_epi16(v_deltaFract, ROW2L); \
- it2 = _mm_add_epi16(it2, it3); \
- i16 = _mm_set1_epi16(16); \
- it2 = _mm_add_epi16(it2, i16); \
- RESL = _mm_srai_epi16(it2, 5); \
- it2 = _mm_mullo_epi16(it1, ROW1H); \
- it3 = _mm_mullo_epi16(v_deltaFract, ROW2H); \
- it2 = _mm_add_epi16(it2, it3); \
- it2 = _mm_add_epi16(it2, i16); \
- RESH = _mm_srai_epi16(it2, 5);
-
-#define BLND2_16(R1, R2) \
- itmp1 = _mm_unpacklo_epi8(R1, R2); \
- itmp2 = _mm_unpackhi_epi8(R1, R2); \
- R1 = itmp1; \
- R2 = itmp2;
-
-#define MB4(R1, R2, R3, R4) \
- BLND2_16(R1, R2) \
- BLND2_16(R3, R4) \
- itmp1 = _mm_unpacklo_epi16(R1, R3); \
- itmp2 = _mm_unpackhi_epi16(R1, R3); \
- R1 = itmp1; \
- R3 = itmp2; \
- itmp1 = _mm_unpacklo_epi16(R2, R4); \
- itmp2 = _mm_unpackhi_epi16(R2, R4); \
- R2 = itmp1; \
- R4 = itmp2;
-
-#define BLND2_4(R1, R2) \
- itmp1 = _mm_unpacklo_epi32(R1, R2); \
- itmp2 = _mm_unpackhi_epi32(R1, R2); \
- R1 = itmp1; \
- R2 = itmp2;
-
-#define BLND2_2(R1, R2) \
- itmp1 = _mm_unpacklo_epi64(R1, R2); \
- itmp2 = _mm_unpackhi_epi64(R1, R2); \
- _mm_storeu_si128((__m128i*)dst, itmp1); \
- dst += dstStride; \
- _mm_storeu_si128((__m128i*)dst, itmp2); \
- dst += dstStride;
-
-#define CALC_BLND_8ROWS(R1, R2, R3, R4, R5, R6, R7, R8, X) \
- PREDANG_CALCROW_HOR(0 + X, R1) \
- PREDANG_CALCROW_HOR(1 + X, R2) \
- PREDANG_CALCROW_HOR(2 + X, R3) \
- PREDANG_CALCROW_HOR(3 + X, R4) \
- PREDANG_CALCROW_HOR(4 + X, R5) \
- PREDANG_CALCROW_HOR(5 + X, R6) \
- PREDANG_CALCROW_HOR(6 + X, R7) \
- PREDANG_CALCROW_HOR(7 + X, R8) \
- MB4(R1, R2, R3, R4) \
- MB4(R5, R6, R7, R8) \
- BLND2_4(R1, R5); \
- BLND2_4(R2, R6); \
- BLND2_4(R3, R7); \
- BLND2_4(R4, R8);
-
-void intraPredAng16x16(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
-{
- int k;
- int blkSize = 16;
-
- // Map the mode index to main prediction direction and angle
- assert(dirMode > 1); //no planar and dc
- bool modeHor = (dirMode < 18);
- bool modeVer = !modeHor;
- int intraPredAngle = modeVer ? (int)dirMode - VER_IDX : modeHor ? -((int)dirMode - HOR_IDX) : 0;
- int lookIdx = intraPredAngle;
- int absAng = abs(intraPredAngle);
- int signAng = intraPredAngle < 0 ? -1 : 1;
-
- // Set bitshifts and scale the angle parameter to block size
- int angTable[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };
- int invAngTable[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / Angle
- int invAngle = invAngTable[absAng];
- absAng = angTable[absAng];
- intraPredAngle = signAng * absAng;
-
- // Do angular predictions
-
- pixel* refMain;
- pixel* refSide;
-
- // Initialise the Main and Left reference array.
- if (intraPredAngle < 0)
- {
- refMain = (modeVer ? refAbove : refLeft); // + (blkSize - 1);
- refSide = (modeVer ? refLeft : refAbove); // + (blkSize - 1);
-
- // Extend the Main reference to the left.
- int invAngleSum = 128; // rounding for (shift by 8)
- if (intraPredAngle != -32)
- for (k = -1; k > blkSize * intraPredAngle >> 5; k--)
- {
- invAngleSum += invAngle;
- refMain[k] = refSide[invAngleSum >> 8];
- }
- }
- else
- {
- refMain = modeVer ? refAbove : refLeft;
- refSide = modeVer ? refLeft : refAbove;
- }
-
- // bfilter will always be true for blocksize 8
- if (intraPredAngle == 0) // Exactly hotizontal/vertical angles
- {
- if (modeHor)
- {
- __m128i v_temp;
- __m128i tmp1;
- v_temp = _mm_loadu_si128((__m128i*)(refMain + 1));
-
- if (bFilter)
- {
- __m128i v_side_0 = _mm_set1_epi16(refSide[0]); // refSide[0] value in a vector
- __m128i v_temp16;
- v_temp16 = _mm_loadu_si128((__m128i*)(refSide + 1));
- __m128i v_side;
- v_side = _mm_unpacklo_epi8(v_temp16, _mm_setzero_si128());
-
- __m128i row01, row02, ref;
- ref = _mm_set1_epi16(refMain[1]);
- v_side = _mm_sub_epi16(v_side, v_side_0);
- v_side = _mm_srai_epi16(v_side, 1);
- row01 = _mm_add_epi16(ref, v_side);
- row01 = _mm_min_epi16(_mm_max_epi16(_mm_setzero_si128(), row01), _mm_set1_epi16((1 << X265_DEPTH) - 1));
-
- v_side = _mm_unpackhi_epi8(v_temp16, _mm_setzero_si128());
- v_side = _mm_sub_epi16(v_side, v_side_0);
- v_side = _mm_srai_epi16(v_side, 1);
- row02 = _mm_add_epi16(ref, v_side);
- row02 = _mm_min_epi16(_mm_max_epi16(_mm_setzero_si128(), row02), _mm_set1_epi16((1 << X265_DEPTH) - 1));
-
- tmp1 = _mm_packus_epi16(row01, row02);
- _mm_storeu_si128((__m128i*)dst, tmp1); //row0
- }
- else
- {
- tmp1 = _mm_shuffle_epi8(v_temp, _mm_set1_epi8(0));
- _mm_storeu_si128((__m128i*)dst, tmp1); //row0
- }
-
- tmp1 = _mm_shuffle_epi8(v_temp, _mm_set1_epi8(1));
- _mm_storeu_si128((__m128i*)(dst + (1 * dstStride)), tmp1); //row1
-
- tmp1 = _mm_shuffle_epi8(v_temp, _mm_set1_epi8(2));
- _mm_storeu_si128((__m128i*)(dst + (2 * dstStride)), tmp1); //row2
-
- tmp1 = _mm_shuffle_epi8(v_temp, _mm_set1_epi8(3));
- _mm_storeu_si128((__m128i*)(dst + (3 * dstStride)), tmp1); //row3
-
- tmp1 = _mm_shuffle_epi8(v_temp, _mm_set1_epi8(4));
- _mm_storeu_si128((__m128i*)(dst + (4 * dstStride)), tmp1); //row4
-
- tmp1 = _mm_shuffle_epi8(v_temp, _mm_set1_epi8(5));
- _mm_storeu_si128((__m128i*)(dst + (5 * dstStride)), tmp1); //row5
-
- tmp1 = _mm_shuffle_epi8(v_temp, _mm_set1_epi8(6));
- _mm_storeu_si128((__m128i*)(dst + (6 * dstStride)), tmp1); //row6
-
- tmp1 = _mm_shuffle_epi8(v_temp, _mm_set1_epi8(7));
- _mm_storeu_si128((__m128i*)(dst + (7 * dstStride)), tmp1); //row7
-
More information about the x265-commits
mailing list