[x265] [PATCH 1 of 2] Vectorized WeightUni
deepthidevaki at multicorewareinc.com
deepthidevaki at multicorewareinc.com
Tue Jul 16 10:58:02 CEST 2013
# HG changeset patch
# User Deepthi Devaki
# Date 1373959114 -19800
# Node ID 9c5fc824649816f4c54b769e04ee6071cdb70d5d
# Parent c9bb72e8cb8effc0d1d0e99f0b9abc8d341c652a
Vectorized WeightUni
diff -r c9bb72e8cb8e -r 9c5fc8246498 source/common/vec/pixel.inc
--- a/source/common/vec/pixel.inc Mon Jul 15 23:41:11 2013 -0500
+++ b/source/common/vec/pixel.inc Tue Jul 16 12:48:34 2013 +0530
@@ -382,6 +382,7 @@
p.transpose[2] = transpose16;
p.transpose[3] = transpose32;
p.transpose[4] = transpose<64>;
+ p.weightpUni = weightUnidir;
#endif
}
}
diff -r c9bb72e8cb8e -r 9c5fc8246498 source/common/vec/pixel8.inc
--- a/source/common/vec/pixel8.inc Mon Jul 15 23:41:11 2013 -0500
+++ b/source/common/vec/pixel8.inc Tue Jul 16 12:48:34 2013 +0530
@@ -26,6 +26,9 @@
// Vector class versions of pixel comparison performance primitives
/* intrinsics for when pixel type is uint8_t */
+#define IF_INTERNAL_PREC 14 ///< Number of bits for internal precision
+#define IF_INTERNAL_OFFS (1 << (IF_INTERNAL_PREC - 1)) ///< Offset used internally
+
#if defined(_MSC_VER)
#undef ALWAYSINLINE
#define ALWAYSINLINE __forceinline
@@ -1935,6 +1938,32 @@
}
}
+void weightUnidir(short *src, pixel *dst, int srcStride, int dstStride, int width, int height, int w0, int round, int shift, int offset, int bitDepth)
+{
+ int x, y;
+ Vec8s tmp;
+ Vec4i vw0(w0), vsrc, iofs(IF_INTERNAL_OFFS), ofs(offset), vround(round), vdst;
+ for (y = height - 1; y >= 0; y--)
+ {
+ for (x = 0; x <= width-4; x+=4 )
+ {
+ tmp = load_partial(const_int(8), src+x);
+ vsrc = extend_low(tmp);
+ vdst = ((vw0 * (vsrc + iofs) + vround) >> shift) + ofs;
+ store_partial(const_int(4), dst+x, compress_unsafe(compress_saturated(vdst, vdst), 0));
+ }
+ if(width>x)
+ {
+ tmp = load_partial(const_int(4), src+x);
+ vsrc = extend_low(tmp);
+ vdst = ((vw0 * (vsrc + iofs) + vround) >> shift) + ofs;
+ compress_unsafe(compress_saturated(vdst, vdst), 0).store_partial(2, dst+x);
+ }
+ src += srcStride;
+ dst += dstStride;
+ }
+}
+
#if INSTRSET >= 8
template<int size>
ALWAYSINLINE void unrollFunc_32_avx2(pixel *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride, Vec16us& sad)
diff -r c9bb72e8cb8e -r 9c5fc8246498 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Mon Jul 15 23:41:11 2013 -0500
+++ b/source/test/pixelharness.cpp Tue Jul 16 12:48:34 2013 +0530
@@ -333,7 +333,7 @@
memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
int j = 0;
- int width = 8;
+ int width = (2 * rand()) % 64;
int height = 8;
int w0 = rand() % 256;
int shift = rand() % 12;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: xhevc_deepthid-1.patch
Type: text/x-patch
Size: 2810 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130716/3f9a2086/attachment.bin>
More information about the x265-devel
mailing list