<div dir="ltr"><div>Hi,</div><div><br></div><div>this regression has been picked-up by the Ubuntu 18.04 team and is therefore affecting quite a big number of people. On a machine with a i7 8700k, you cannot even decently play a fullHD video using vaapi (over half of the frames are dropped). The proposed fix restores the expected performances (low CPU usage and no frame drop for 4K60 HDR videos using vaapi).<br></div><div><br></div><div>If there is anything wrong about my patch, please tell and i'll fix it.</div><div><br></div><div>Quentin C.<br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">Le ven. 1 mars 2019 à 23:36, Quentin Chateau <<a href="mailto:quentin.chateau@gmail.com">quentin.chateau@gmail.com</a>> a écrit :<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">regression 09d421a20851e1c49aa98e117957dd118620fae4<br>
---<br>
modules/video_chroma/copy.c | 8 ++++----<br>
1 file changed, 4 insertions(+), 4 deletions(-)<br>
<br>
diff --git a/modules/video_chroma/copy.c b/modules/video_chroma/copy.c<br>
index e9250b948e..51498f4a06 100644<br>
--- a/modules/video_chroma/copy.c<br>
+++ b/modules/video_chroma/copy.c<br>
@@ -468,7 +468,7 @@ static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,<br>
const size_t copy_pitch = __MIN(src_pitch, dst_pitch);<br>
const unsigned w16 = (copy_pitch+15) & ~15;<br>
const unsigned hstep = cache_size / w16;<br>
- const unsigned cache_width = __MIN(src_pitch, hstep);<br>
+ const unsigned cache_width = __MIN(src_pitch, cache_size);<br>
assert(hstep > 0);<br>
<br>
/* If SSE4.1: CopyFromUswc is faster than memcpy */<br>
@@ -501,8 +501,8 @@ SSE_InterleavePlanes(uint8_t *dst, size_t dst_pitch,<br>
size_t copy_pitch = __MIN(dst_pitch / 2, srcu_pitch);<br>
unsigned int const w16 = (srcu_pitch+15) & ~15;<br>
unsigned int const hstep = (cache_size) / (2*w16);<br>
- const unsigned cacheu_width = __MIN(srcu_pitch, hstep);<br>
- const unsigned cachev_width = __MIN(srcv_pitch, hstep);<br>
+ const unsigned cacheu_width = __MIN(srcu_pitch, cache_size);<br>
+ const unsigned cachev_width = __MIN(srcv_pitch, cache_size);<br>
assert(hstep > 0);<br>
<br>
for (unsigned int y = 0; y < height; y += hstep)<br>
@@ -535,7 +535,7 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,<br>
size_t copy_pitch = __MIN(__MIN(src_pitch / 2, dstu_pitch), dstv_pitch);<br>
const unsigned w16 = (src_pitch+15) & ~15;<br>
const unsigned hstep = cache_size / w16;<br>
- const unsigned cache_width = __MIN(src_pitch, hstep);<br>
+ const unsigned cache_width = __MIN(src_pitch, cache_size);<br>
assert(hstep > 0);<br>
<br>
for (unsigned y = 0; y < height; y += hstep) {<br>
-- <br>
2.19.1<br>
<br>
</blockquote></div>