[x265] [PATCH] asm-primitives.cpp, fundef creation and function pointer table setup for luma asm primitives

praveen at multicorewareinc.com praveen at multicorewareinc.com
Thu Oct 17 17:37:05 CEST 2013


# HG changeset patch
# User Praveen Tiwari
# Date 1382024214 -19800
# Node ID 9b1a816dd291e0e0fdca9117ecb50bd9b720e3a6
# Parent  93bef87604902f0ba3b07faf836fdd74d4e847c6
asm-primitives.cpp, fundef creation and function pointer table setup for luma asm primitives

diff -r 93bef8760490 -r 9b1a816dd291 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Oct 17 21:03:28 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Oct 17 21:06:54 2013 +0530
@@ -2,6 +2,7 @@
  * Copyright (C) 2013 x265 project
  *
  * Authors: Steve Borho <steve at borho.org>
+ *          Praveen Kumar Tiwari <praveen at multicorewareinc.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -121,6 +122,39 @@
 SETUP_CHROMA_FUNC_DEF(32, 16);
 SETUP_CHROMA_FUNC_DEF(32, 24);
 SETUP_CHROMA_FUNC_DEF(32, 32);
+
+#define SET_LUMA_PRIMITIVE_TABLE_F3(W, H) \
+    void x265_interp_8tap_horiz_pp_ ## W ## x ## H ## _sse4(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx); \
+    void x265_interp_8tap_horiz_pp_ ## W ## x ## (H + 1) ## _sse4(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx); \
+    void x265_interp_8tap_horiz_pp_ ## (W + 1) ## x ## H ## _sse4(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
+
+#define SET_LUMA_PRIMITIVE_TABLE_F1(W, H) \
+    void x265_interp_8tap_horiz_pp_ ## W ## x ## H ## _sse4(pixel * src, intptr_t srcStride, pixel * dst, intptr_t dstStride, int coeffIdx);
+
+#define SET_LUMA_PRIMITIVE_TABLE \
+    SET_LUMA_PRIMITIVE_TABLE_F3(4,   8); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(4,  16); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(8,   4); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(8,   8); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(8,  16); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(8,  32); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(12, 16); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(16,  4); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(16,  8); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(16, 12); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(16, 16); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(16, 32); \
+    SET_LUMA_PRIMITIVE_TABLE_F1(16, 64); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(32,  8); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(32, 16); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(32, 24); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(32, 32); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(32, 64); \
+    SET_LUMA_PRIMITIVE_TABLE_F1(48, 64); \
+    SET_LUMA_PRIMITIVE_TABLE_F1(64, 16); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(64, 32); \
+    SET_LUMA_PRIMITIVE_TABLE_F1(64, 48); \
+    SET_LUMA_PRIMITIVE_TABLE_F3(64, 64);
 }
 
 using namespace x265;
@@ -271,6 +305,39 @@
     SETUP_CHROMA_PARTITION(32, 24, cpu); \
     SETUP_CHROMA_PARTITION(32, 32, cpu);
 
+#define SETUP_LUMA_PARTITION_F3(W, H, cpu) \
+    p.luma_hpp[LUMA_PARTITION_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
+    p.luma_hpp[(LUMA_PARTITION_ ## W ## x ## H) + 1] = x265_interp_8tap_horiz_pp_ ## W ## x ## (H + 1) ## cpu; \
+    p.luma_hpp[(LUMA_PARTITION_ ## W ## x ## H) + 2] = x265_interp_8tap_horiz_pp_ ## (W + 1) ## x ## H ## cpu;
+
+#define SETUP_LUMA_PARTITION_F1(W, H, cpu) \
+    p.luma_hpp[LUMA_PARTITION_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu;
+
+#define SETUP_LUMA_PARTITION (cpu) \
+    SETUP_LUMA_PARTITION_F3(4,  8,  cpu); \
+    SETUP_LUMA_PARTITION_F3(4,  16, cpu); \
+    SETUP_LUMA_PARTITION_F3(8,  4,  cpu); \
+    SETUP_LUMA_PARTITION_F3(8,  8,  cpu); \
+    SETUP_LUMA_PARTITION_F3(8,  16, cpu); \
+    SETUP_LUMA_PARTITION_F3(8,  32, cpu); \
+    SETUP_LUMA_PARTITION_F3(12, 16, cpu); \
+    SETUP_LUMA_PARTITION_F3(16, 4,  cpu); \
+    SETUP_LUMA_PARTITION_F3(16, 8,  cpu); \
+    SETUP_LUMA_PARTITION_F3(16, 12, cpu); \
+    SETUP_LUMA_PARTITION_F3(16, 16, cpu); \
+    SETUP_LUMA_PARTITION_F3(16, 32, cpu); \
+    SETUP_LUMA_PARTITION_F1(16, 64, cpu); \
+    SETUP_LUMA_PARTITION_F3(32, 8,  cpu); \
+    SETUP_LUMA_PARTITION_F3(32, 16, cpu); \
+    SETUP_LUMA_PARTITION_F3(32, 24, cpu); \
+    SETUP_LUMA_PARTITION_F3(32, 32, cpu); \
+    SETUP_LUMA_PARTITION_F3(32, 64, cpu); \
+    SETUP_LUMA_PARTITION_F1(48, 64, cpu); \
+    SETUP_LUMA_PARTITION_F1(64, 16, cpu); \
+    SETUP_LUMA_PARTITION_F3(64, 32, cpu); \
+    SETUP_LUMA_PARTITION_F1(64, 48, cpu); \
+    SETUP_LUMA_PARTITION_F3(64, 64, cpu);
+
 void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
 {
 #if HIGH_BIT_DEPTH


More information about the x265-devel mailing list