[x265] [PATCH] Add VMAF suppport to report per frame and aggregate VMAF score

indumathi at multicorewareinc.com indumathi at multicorewareinc.com
Thu Apr 12 13:13:59 CEST 2018


# HG changeset patch
# User IndumathiR<indumathi at multicorewareinc.com>
# Date 1518528290 -19800
#      Tue Feb 13 18:54:50 2018 +0530
# Node ID 27e3b161cd8b59ad1cae67a96e11e3e0506d5017
# Parent  04a337abd70de269cef7d9655365f3a3ebde02aa
Add VMAF suppport to report per frame and aggregate VMAF score

diff -r 04a337abd70d -r 27e3b161cd8b doc/reST/api.rst
--- a/doc/reST/api.rst	Thu Apr 12 15:10:59 2018 +0530
+++ b/doc/reST/api.rst	Tue Feb 13 18:54:50 2018 +0530
@@ -398,7 +398,30 @@
 	 *     release library static allocations, reset configured CTU size */
 	void x265_cleanup(void);
 
+VMAF (Video Multi-Method Assessment Fusion)
+==========================================
 
+If you set the ENABLE_LIBVMAF cmake option to ON, then x265 will report per frame
+and aggregate VMAF score for the given input and dump the scores in csv file.
+The user also need to specify the :option:`--recon` in command line to get the VMAF scores.
+ 
+    /* x265_calculate_vmafScore:
+     *    returns VMAF score for the input video.
+     *    This api must be called only after encoding was done. */
+    double x265_calculate_vmafscore(x265_param*, x265_vmaf_data*);
+
+    /* x265_calculate_vmaf_framelevelscore:
+     *    returns VMAF score for each frame in a given input video. */
+    double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata*);
+    
+.. Note::
+
+    When setting ENABLE_LIBVMAF cmake option to ON, it is recommended to
+    also set ENABLE_SHARED to OFF to prevent build problems.  
+    We only need the static library from these builds.
+    
+    Binaries build with windows will not have VMAF support.
+      
 Multi-library Interface
 =======================
 
diff -r 04a337abd70d -r 27e3b161cd8b source/CMakeLists.txt
--- a/source/CMakeLists.txt	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/CMakeLists.txt	Tue Feb 13 18:54:50 2018 +0530
@@ -29,7 +29,7 @@
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 157)
+set(X265_BUILD 158)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
@@ -109,6 +109,11 @@
     if(NO_ATOMICS)
         add_definitions(-DNO_ATOMICS=1)
     endif(NO_ATOMICS)
+    find_library(VMAF vmaf)
+    option(ENABLE_LIBVMAF "Enable VMAF" OFF)
+    if(ENABLE_LIBVMAF)
+        add_definitions(-DENABLE_LIBVMAF)
+    endif()
 endif(UNIX)
 
 if(X64 AND NOT WIN32)
@@ -536,6 +541,9 @@
 if(EXTRA_LIB)
     target_link_libraries(x265-static ${EXTRA_LIB})
 endif()
+if(ENABLE_LIBVMAF)
+    target_link_libraries(x265-static ${VMAF})
+endif()
 install(TARGETS x265-static
     LIBRARY DESTINATION ${LIB_INSTALL_DIR}
     ARCHIVE DESTINATION ${LIB_INSTALL_DIR})
diff -r 04a337abd70d -r 27e3b161cd8b source/common/picyuv.h
--- a/source/common/picyuv.h	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/common/picyuv.h	Tue Feb 13 18:54:50 2018 +0530
@@ -72,6 +72,7 @@
     pixel   m_maxChromaVLevel;
     pixel   m_minChromaVLevel;
     double  m_avgChromaVLevel;
+    double  m_vmafScore;
     x265_param *m_param;
 
     PicYuv();
diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/api.cpp
--- a/source/encoder/api.cpp	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/encoder/api.cpp	Tue Feb 13 18:54:50 2018 +0530
@@ -31,6 +31,10 @@
 #include "nal.h"
 #include "bitcost.h"
 
+#if ENABLE_LIBVMAF
+#include "libvmaf.h"
+#endif
+
 /* multilib namespace reflectors */
 #if LINKED_8BIT
 namespace x265_8bit {
@@ -302,13 +306,34 @@
         encoder->fetchStats(outputStats, statsSizeBytes);
     }
 }
+#if ENABLE_LIBVMAF
+void x265_vmaf_encoder_log(x265_encoder* enc, int argc, char **argv, x265_param *param, x265_vmaf_data *vmafdata)
+{
+    if (enc)
+    {
+        Encoder *encoder = static_cast<Encoder*>(enc);
+        x265_stats stats;       
+        stats.aggregateVmafScore = x265_calculate_vmafscore(param, vmafdata);
+        if(vmafdata->reference_file)
+            fclose(vmafdata->reference_file);
+        if(vmafdata->distorted_file)
+            fclose(vmafdata->distorted_file);
+        if(vmafdata)
+            x265_free(vmafdata);
+        encoder->fetchStats(&stats, sizeof(stats));
+        int padx = encoder->m_sps.conformanceWindow.rightOffset;
+        int pady = encoder->m_sps.conformanceWindow.bottomOffset;
+        x265_csvlog_encode(encoder->m_param, &stats, padx, pady, argc, argv);
+    }
+}
+#endif
 
 void x265_encoder_log(x265_encoder* enc, int argc, char **argv)
 {
     if (enc)
     {
         Encoder *encoder = static_cast<Encoder*>(enc);
-        x265_stats stats;
+        x265_stats stats;       
         encoder->fetchStats(&stats, sizeof(stats));
         int padx = encoder->m_sps.conformanceWindow.rightOffset;
         int pady = encoder->m_sps.conformanceWindow.bottomOffset;
@@ -457,7 +482,13 @@
     &x265_csvlog_frame,
     &x265_csvlog_encode,
     &x265_dither_image,
-    &x265_set_analysis_data
+    &x265_set_analysis_data,
+#if ENABLE_LIBVMAF
+    &x265_calculate_vmafscore,
+    &x265_calculate_vmaf_framelevelscore,
+    &x265_vmaf_encoder_log
+#endif
+
 };
 
 typedef const x265_api* (*api_get_func)(int bitDepth);
@@ -751,6 +782,9 @@
                     /* detailed performance statistics */
                     fprintf(csvfp, ", DecideWait (ms), Row0Wait (ms), Wall time (ms), Ref Wait Wall (ms), Total CTU time (ms),"
                         "Stall Time (ms), Total frame time (ms), Avg WPP, Row Blocks");
+#if ENABLE_LIBVMAF
+                    fprintf(csvfp, ", VMAF Frame Score");
+#endif
                 }
                 fprintf(csvfp, "\n");
             }
@@ -759,6 +793,9 @@
                 fputs(summaryCSVHeader, csvfp);
                 if (param->csvLogLevel >= 2 || param->maxCLL || param->maxFALL)
                     fputs("MaxCLL, MaxFALL,", csvfp);
+#if ENABLE_LIBVMAF
+                fputs(" Aggregate VMAF Score,", csvfp);
+#endif
                 fputs(" Version\n", csvfp);
             }
         }
@@ -868,6 +905,9 @@
                                                                                      frameStats->totalFrameTime);
 
         fprintf(param->csvfpt, " %.3lf, %d", frameStats->avgWPP, frameStats->countRowBlocks);
+#if ENABLE_LIBVMAF
+        fprintf(param->csvfpt, ", %lf", frameStats->vmafFrameScore);
+#endif
     }
     fprintf(param->csvfpt, "\n");
     fflush(stderr);
@@ -886,7 +926,11 @@
             fputs(summaryCSVHeader, p->csvfpt);
             if (p->csvLogLevel >= 2 || p->maxCLL || p->maxFALL)
                 fputs("MaxCLL, MaxFALL,", p->csvfpt);
+#if ENABLE_LIBVMAF
+            fputs(" Aggregate VMAF score,", p->csvfpt);
+#endif
             fputs(" Version\n",p->csvfpt);
+
         }
         // CLI arguments or other
         if (argc)
@@ -919,7 +963,6 @@
         char buffer[200];
         strftime(buffer, 128, "%c", timeinfo);
         fprintf(p->csvfpt, ", %s, ", buffer);
-
         // elapsed time, fps, bitrate
         fprintf(p->csvfpt, "%.2f, %.2f, %.2f,",
             stats->elapsedEncodeTime, stats->encodedPictureCount / stats->elapsedEncodeTime, stats->bitrate);
@@ -981,7 +1024,11 @@
             fprintf(p->csvfpt, " -, -, -, -, -, -, -,");
         if (p->csvLogLevel >= 2 || p->maxCLL || p->maxFALL)
             fprintf(p->csvfpt, " %-6u, %-6u,", stats->maxCLL, stats->maxFALL);
+#if ENABLE_LIBVMAF
+        fprintf(p->csvfpt, " %lf,", stats->aggregateVmafScore);
+#endif
         fprintf(p->csvfpt, " %s\n", api->version_str);
+
     }
 }
 
@@ -1072,4 +1119,318 @@
     }
 }
 
+#if ENABLE_LIBVMAF
+/* Read y values of single frame for 8-bit input */
+int read_image_byte(FILE *file, float *buf, int width, int height, int stride)
+{
+    char *byte_ptr = (char *)buf;
+    unsigned char *tmp_buf = 0;
+    int i, j;
+    int ret = 1;
+
+    if (width <= 0 || height <= 0)
+    {
+        goto fail_or_end;
+    }
+
+    if (!(tmp_buf = (unsigned char*)malloc(width)))
+    {
+        goto fail_or_end;
+    }
+
+    for (i = 0; i < height; ++i)
+    {
+        float *row_ptr = (float *)byte_ptr;
+
+        if (fread(tmp_buf, 1, width, file) != (size_t)width)
+        {
+            goto fail_or_end;
+        }
+
+        for (j = 0; j < width; ++j)
+        {
+            row_ptr[j] = tmp_buf[j];
+        }
+
+        byte_ptr += stride;
+    }
+
+    ret = 0;
+
+fail_or_end:
+    free(tmp_buf);
+    return ret;
+}
+/* Read y values of single frame for 10-bit input */
+int read_image_word(FILE *file, float *buf, int width, int height, int stride)
+{
+    char *byte_ptr = (char *)buf;
+    unsigned short *tmp_buf = 0;
+    int i, j;
+    int ret = 1;
+
+    if (width <= 0 || height <= 0)
+    {
+        goto fail_or_end;
+    }
+
+    if (!(tmp_buf = (unsigned short*)malloc(width * 2))) // '*2' to accommodate words
+    {
+        goto fail_or_end;
+    }
+
+    for (i = 0; i < height; ++i)
+    {
+        float *row_ptr = (float *)byte_ptr;
+
+        if (fread(tmp_buf, 2, width, file) != (size_t)width) // '2' for word
+        {
+            goto fail_or_end;
+        }
+
+        for (j = 0; j < width; ++j)
+        {
+            row_ptr[j] = tmp_buf[j] / 4.0; // '/4' to convert from 10 to 8-bit
+        }
+
+        byte_ptr += stride;
+    }
+
+    ret = 0;
+
+fail_or_end:
+    free(tmp_buf);
+    return ret;
+}
+
+int read_frame(float *reference_data, float *distorted_data, float *temp_data, int stride_byte, void *s)
+{
+    x265_vmaf_data *user_data = (x265_vmaf_data *)s;
+    int ret;
+
+    // read reference y
+    if (user_data->internalBitDepth == 8)
+    {
+        ret = read_image_byte(user_data->reference_file, reference_data, user_data->width, user_data->height, stride_byte);
+    }
+    else if (user_data->internalBitDepth == 10)
+    {
+        ret = read_image_word(user_data->reference_file, reference_data, user_data->width, user_data->height, stride_byte);
+    }
+    else
+    {
+        x265_log(NULL, X265_LOG_ERROR, "Invalid bitdepth\n");
+        return 1;
+    }
+    if (ret)
+    {
+        if (feof(user_data->reference_file))
+        {
+            ret = 2; // OK if end of file
+        }
+        return ret;
+    }
+
+    // read distorted y
+    if (user_data->internalBitDepth == 8)
+    {
+        ret = read_image_byte(user_data->distorted_file, distorted_data, user_data->width, user_data->height, stride_byte);
+    }
+    else if (user_data->internalBitDepth == 10)
+    {
+        ret = read_image_word(user_data->distorted_file, distorted_data, user_data->width, user_data->height, stride_byte);
+    }
+    else
+    {
+        x265_log(NULL, X265_LOG_ERROR, "Invalid bitdepth\n");
+        return 1;
+    }
+    if (ret)
+    {
+        if (feof(user_data->distorted_file))
+        {
+            ret = 2; // OK if end of file
+        }
+        return ret;
+    }
+
+    // reference skip u and v
+    if (user_data->internalBitDepth == 8)
+    {
+        if (fread(temp_data, 1, user_data->offset, user_data->reference_file) != (size_t)user_data->offset)
+        {
+            x265_log(NULL, X265_LOG_ERROR, "reference fread to skip u and v failed.\n");
+            goto fail_or_end;
+        }
+    }
+    else if (user_data->internalBitDepth == 10)
+    {
+        if (fread(temp_data, 2, user_data->offset, user_data->reference_file) != (size_t)user_data->offset)
+        {
+            x265_log(NULL, X265_LOG_ERROR, "reference fread to skip u and v failed.\n");
+            goto fail_or_end;
+        }
+    }
+    else
+    {
+        x265_log(NULL, X265_LOG_ERROR, "Invalid format\n");
+        goto fail_or_end;
+    }
+
+    // distorted skip u and v
+    if (user_data->internalBitDepth == 8)
+    {
+        if (fread(temp_data, 1, user_data->offset, user_data->distorted_file) != (size_t)user_data->offset)
+        {
+            x265_log(NULL, X265_LOG_ERROR, "distorted fread to skip u and v failed.\n");
+            goto fail_or_end;
+        }
+    }
+    else if (user_data->internalBitDepth == 10)
+    {
+        if (fread(temp_data, 2, user_data->offset, user_data->distorted_file) != (size_t)user_data->offset)
+        {
+            x265_log(NULL, X265_LOG_ERROR, "distorted fread to skip u and v failed.\n");
+            goto fail_or_end;
+        }
+    }
+    else
+    {
+        x265_log(NULL, X265_LOG_ERROR, "Invalid format\n");
+        goto fail_or_end;
+    }
+
+
+fail_or_end:
+    return ret;
+}
+
+double x265_calculate_vmafscore(x265_param *param, x265_vmaf_data *data)
+{
+    double score;
+    
+    data->width = param->sourceWidth;
+    data->height = param->sourceHeight;
+    data->internalBitDepth = param->internalBitDepth;
+   
+    if (param->internalCsp == X265_CSP_I420)
+    {
+        if ((param->sourceWidth * param->sourceHeight) % 2 != 0)
+            x265_log(NULL, X265_LOG_ERROR, "Invalid file size\n");
+        data->offset = param->sourceWidth * param->sourceHeight / 2;
+    }
+    else if (param->internalCsp == X265_CSP_I422)
+        data->offset = param->sourceWidth * param->sourceHeight;
+    else if (param->internalCsp == X265_CSP_I444)
+        data->offset = param->sourceWidth * param->sourceHeight * 2;
+    else
+        x265_log(NULL, X265_LOG_ERROR, "Invalid format\n");
+  
+    compute_vmaf(&score, vcd->format, data->width, data->height, read_frame, data, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool); 
+
+    return score;
+}
+
+int read_frame_10bit(float *reference_data, float *distorted_data, float *temp_data, int stride, void *s)
+{
+    x265_vmaf_framedata *user_data = (x265_vmaf_framedata *)s;
+
+    PicYuv *reference_frame = (PicYuv *)user_data->reference_frame;
+    PicYuv *distorted_frame = (PicYuv *)user_data->distorted_frame;
+
+    if(!user_data->frame_set) {
+ 
+        int reference_stride = reference_frame->m_stride;
+        int distorted_stride = distorted_frame->m_stride;
+
+        const uint16_t *reference_ptr = (const uint16_t *)reference_frame->m_picOrg[0]; 
+        const uint16_t *distorted_ptr = (const uint16_t *)distorted_frame->m_picOrg[0];
+
+        temp_data = reference_data;
+
+        int height = user_data->height;
+        int width = user_data->width; 
+
+        int i,j;
+        for (i = 0; i < height; i++) {
+            for ( j = 0; j < width; j++) {
+                temp_data[j] = ((float)reference_ptr[j] / 4.0);
+            }
+            reference_ptr += reference_stride;
+            temp_data += stride / sizeof(*temp_data);
+        }
+        
+        temp_data = distorted_data;
+        for (i = 0; i < height; i++) {
+            for (j = 0; j < width; j++) {
+                 temp_data[j] = ((float)distorted_ptr[j] / 4.0);
+            }
+            distorted_ptr += distorted_stride;
+            temp_data += stride / sizeof(*temp_data);
+        }
+
+        user_data->frame_set = 1;
+        return 0;
+    }                                                             
+    return 2;                                                               
+}
+
+int read_frame_8bit(float *reference_data, float *distorted_data, float *temp_data, int stride, void *s)
+{
+    x265_vmaf_framedata *user_data = (x265_vmaf_framedata *)s;
+
+    PicYuv *reference_frame = (PicYuv *)user_data->reference_frame;
+    PicYuv *distorted_frame = (PicYuv *)user_data->distorted_frame;
+
+    if(!user_data->frame_set) {
+
+        int reference_stride = reference_frame->m_stride;
+        int distorted_stride = distorted_frame->m_stride;
+
+        const uint8_t *reference_ptr = (const uint8_t *)reference_frame->m_picOrg[0]; 
+        const uint8_t *distorted_ptr = (const uint8_t *)distorted_frame->m_picOrg[0];
+
+        temp_data = reference_data;
+
+        int height = user_data->height;
+        int width = user_data->width; 
+
+        int i,j;
+        for (i = 0; i < height; i++) {
+            for ( j = 0; j < width; j++) {
+                temp_data[j] = (float)reference_ptr[j];
+            }
+            reference_ptr += reference_stride;
+            temp_data += stride / sizeof(*temp_data);
+        }
+        
+        temp_data = distorted_data;
+        for (i = 0; i < height; i++) {
+            for (j = 0; j < width; j++) {
+                 temp_data[j] = (float)distorted_ptr[j];
+            }
+            distorted_ptr += distorted_stride;
+            temp_data += stride / sizeof(*temp_data);
+        }
+
+        user_data->frame_set = 1;
+        return 0;
+    }                                                             
+    return 2;                                                               
+}
+
+double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata *vmafframedata)
+{
+    double score; 
+    int (*read_frame)(float *reference_data, float *distorted_data, float *temp_data,
+                      int stride, void *s);
+    if (vmafframedata->internalBitDepth == 8)
+        read_frame = read_frame_8bit;
+    else
+        read_frame = read_frame_10bit;
+    compute_vmaf(&score, vcd->format, vmafframedata->width, vmafframedata->height, read_frame, vmafframedata, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool);
+ 
+    return score;
+}
+#endif
 } /* end namespace or extern "C" */
diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/encoder/encoder.cpp	Tue Feb 13 18:54:50 2018 +0530
@@ -2127,6 +2127,9 @@
 #define ELAPSED_MSEC(start, end) (((double)(end) - (start)) / 1000)
         if (m_param->csvLogLevel >= 2)
         {
+#if ENABLE_LIBVMAF
+            frameStats->vmafFrameScore = curFrame->m_fencPic->m_vmafScore;
+#endif
             frameStats->decideWaitTime = ELAPSED_MSEC(0, curEncoder->m_slicetypeWaitTime);
             frameStats->row0WaitTime = ELAPSED_MSEC(curEncoder->m_startCompressTime, curEncoder->m_row0WaitTime);
             frameStats->wallTime = ELAPSED_MSEC(curEncoder->m_row0WaitTime, curEncoder->m_endCompressTime);
diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/encoder/frameencoder.cpp	Tue Feb 13 18:54:50 2018 +0530
@@ -864,6 +864,9 @@
                 m_frameFilter.processRow(i - m_filterRowDelay);
         }
     }
+#if ENABLE_LIBVMAF
+    vmafFrameLevelScore();
+#endif
 
     if (m_param->maxSlices > 1)
     {
@@ -932,7 +935,7 @@
                 updateChecksum(reconPic->m_picOrg[1], m_checksum[1], height, width, stride, 0, cuHeight);
                 updateChecksum(reconPic->m_picOrg[2], m_checksum[2], height, width, stride, 0, cuHeight);
             }
-        }
+        }  
     } // end of (m_param->maxSlices > 1)
 
     if (m_param->rc.bStatWrite)
@@ -1189,7 +1192,7 @@
         m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId], *m_param);
 #endif
 
-    m_endFrameTime = x265_mdate();
+    m_endFrameTime = x265_mdate();  
 }
 
 void FrameEncoder::encodeSlice(uint32_t sliceAddr)
@@ -2058,11 +2061,36 @@
         m_nr->nrOffsetDenoise[cat][0] = 0;
     }
 }
+#if ENABLE_LIBVMAF
+void FrameEncoder::vmafFrameLevelScore()
+{
+    PicYuv *fenc = m_frame->m_fencPic;
+    PicYuv *recon = m_frame->m_reconPic;
+
+    x265_vmaf_framedata *vmafframedata = (x265_vmaf_framedata*)x265_malloc(sizeof(x265_vmaf_framedata));
+    if (!vmafframedata)
+    {
+        x265_log(NULL, X265_LOG_ERROR, "vmaf frame data alloc failed\n");
+    }
+
+    vmafframedata->height = fenc->m_picHeight;
+    vmafframedata->width = fenc->m_picWidth;
+    vmafframedata->frame_set = 0;
+    vmafframedata->internalBitDepth = m_param->internalBitDepth;
+    vmafframedata->reference_frame = fenc;
+    vmafframedata->distorted_frame = recon;
+
+    fenc->m_vmafScore = x265_calculate_vmaf_framelevelscore(vmafframedata);
+
+    if (vmafframedata)
+    x265_free(vmafframedata);
+}
+#endif
 
 Frame *FrameEncoder::getEncodedPicture(NALList& output)
 {
     if (m_frame)
-    {
+    {    
         /* block here until worker thread completes */
         m_done.wait();
 
diff -r 04a337abd70d -r 27e3b161cd8b source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/encoder/frameencoder.h	Tue Feb 13 18:54:50 2018 +0530
@@ -240,6 +240,9 @@
     void enqueueRowFilter(int row)  { WaveFront::enqueueRow(row * 2 + 1); }
     void enableRowEncoder(int row)  { WaveFront::enableRow(row * 2 + 0); }
     void enableRowFilter(int row)   { WaveFront::enableRow(row * 2 + 1); }
+#if ENABLE_LIBVMAF
+    void vmafFrameLevelScore();
+#endif
 };
 }
 
diff -r 04a337abd70d -r 27e3b161cd8b source/x265.cpp
--- a/source/x265.cpp	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/x265.cpp	Tue Feb 13 18:54:50 2018 +0530
@@ -75,6 +75,7 @@
     const char* reconPlayCmd;
     const x265_api* api;
     x265_param* param;
+    x265_vmaf_data* vmafData;
     bool bProgress;
     bool bForceY4m;
     bool bDither;
@@ -96,6 +97,7 @@
         reconPlayCmd = NULL;
         api = NULL;
         param = NULL;
+        vmafData = NULL;
         framesToBeEncoded = seek = 0;
         totalbytes = 0;
         bProgress = true;
@@ -216,6 +218,14 @@
         x265_log(NULL, X265_LOG_ERROR, "param alloc failed\n");
         return true;
     }
+#if ENABLE_LIBVMAF
+    vmafData = (x265_vmaf_data*)x265_malloc(sizeof(x265_vmaf_data));
+    if(!vmafData)
+    {
+        x265_log(NULL, X265_LOG_ERROR, "vmaf data alloc failed\n");
+        return true;
+    }
+#endif
 
     if (api->param_default_preset(param, preset, tune) < 0)
     {
@@ -363,6 +373,7 @@
     info.frameCount = 0;
     getParamAspectRatio(param, info.sarWidth, info.sarHeight);
 
+
     this->input = InputFile::open(info, this->bForceY4m);
     if (!this->input || this->input->isFail())
     {
@@ -439,7 +450,30 @@
                     param->sourceWidth, param->sourceHeight, param->fpsNum, param->fpsDenom,
                     x265_source_csp_names[param->internalCsp]);
     }
+#if ENABLE_LIBVMAF
+    if (!reconfn)
+    {
+        x265_log(param, X265_LOG_ERROR, "recon file must be specified to get VMAF score, try --help for help\n");
+        return true;
+    }
+    const char *str = strrchr(info.filename, '.');
 
+    if (!strcmp(str, ".y4m"))
+    {
+        x265_log(param, X265_LOG_ERROR, "VMAF supports YUV file format only.\n");
+        return true; 
+    }
+    if(param->internalCsp == X265_CSP_I420 || param->internalCsp == X265_CSP_I422 || param->internalCsp == X265_CSP_I444)
+    {
+        vmafData->reference_file = x265_fopen(inputfn, "rb");
+        vmafData->distorted_file = x265_fopen(reconfn, "rb");
+    }
+    else
+    {
+        x265_log(param, X265_LOG_ERROR, "VMAF will support only yuv420p, yu422p, yu444p, yuv420p10le, yuv422p10le, yuv444p10le formats.\n");
+        return true;
+    }
+#endif
     this->output = OutputFile::open(outputfn, info);
     if (this->output->isFail())
     {
@@ -555,7 +589,9 @@
 
     x265_param* param = cliopt.param;
     const x265_api* api = cliopt.api;
-
+#if ENABLE_LIBVMAF
+    x265_vmaf_data* vmafdata = cliopt.vmafData;
+#endif
     /* This allows muxers to modify bitstream format */
     cliopt.output->setParam(param);
 
@@ -712,7 +748,7 @@
         if (!numEncoded)
             break;
     }
-
+  
     /* clear progress report */
     if (cliopt.bProgress)
         fprintf(stderr, "%*s\r", 80, " ");
@@ -723,7 +759,11 @@
 
     api->encoder_get_stats(encoder, &stats, sizeof(stats));
     if (param->csvfn && !b_ctrl_c)
+#if ENABLE_LIBVMAF
+        api->vmaf_encoder_log(encoder, argc, argv, param, vmafdata);
+#else
         api->encoder_log(encoder, argc, argv);
+#endif
     api->encoder_close(encoder);
 
     int64_t second_largest_pts = 0;
diff -r 04a337abd70d -r 27e3b161cd8b source/x265.h
--- a/source/x265.h	Thu Apr 12 15:10:59 2018 +0530
+++ b/source/x265.h	Tue Feb 13 18:54:50 2018 +0530
@@ -209,6 +209,7 @@
     x265_cu_stats    cuStats;
     x265_pu_stats    puStats;
     double           totalFrameTime;
+    double           vmafFrameScore;
 } x265_frame_stats;
 
 typedef struct x265_ctu_info_t
@@ -536,6 +537,7 @@
     double                elapsedEncodeTime;    /* wall time since encoder was opened */
     double                elapsedVideoTime;     /* encoded picture count / frame rate */
     double                bitrate;              /* accBits / elapsed video time */
+    double                aggregateVmafScore;   /* aggregate VMAF score for input video*/
     uint64_t              accBits;              /* total bits output thus far */
     uint32_t              encodedPictureCount;  /* number of output pictures thus far */
     uint32_t              totalWPFrames;        /* number of uni-directional weighted frames used */
@@ -572,6 +574,47 @@
     float bitrateFactor;
 } x265_zone;
     
+/* data to calculate aggregate VMAF score */
+typedef struct x265_vmaf_data
+{   
+    int width;
+    int height;
+    size_t offset; 
+    int internalBitDepth;
+    FILE *reference_file; /* FILE pointer for input file */
+    FILE *distorted_file; /* FILE pointer for recon file generated*/
+}x265_vmaf_data;
+
+/* data to calculate frame level VMAF score */
+typedef struct x265_vmaf_framedata
+{
+    int width;
+    int height;
+    int frame_set; 
+    int internalBitDepth; 
+    void *reference_frame; /* points to fenc of particular frame */
+    void *distorted_frame; /* points to recon of particular frame */
+}x265_vmaf_framedata;
+
+/* common data needed to calculate both frame level and video level VMAF scores */
+typedef struct x265_vmaf_commondata
+{
+    char *format;
+    char *model_path;
+    char *log_path;
+    char *log_fmt;
+    int disable_clip;
+    int disable_avx;
+    int enable_transform;
+    int phone_model;
+    int psnr;
+    int ssim;
+    int ms_ssim;
+    char *pool;
+}x265_vmaf_commondata;
+
+static const x265_vmaf_commondata vcd[] = {NULL, (char *)"/usr/local/share/model/vmaf_v0.6.1.pkl", NULL, NULL, 0, 0, 0, 0, 0, 0, 0, NULL};
+
 /* x265 input parameters
  *
  * For version safety you may use x265_param_alloc/free() to manage the
@@ -1811,6 +1854,22 @@
 /* In-place downshift from a bit-depth greater than 8 to a bit-depth of 8, using
  * the residual bits to dither each row. */
 void x265_dither_image(x265_picture *, int picWidth, int picHeight, int16_t *errorBuf, int bitDepth);
+#if ENABLE_LIBVMAF
+/* x265_calculate_vmafScore:
+ *    returns VMAF score for the input video.
+ *    This api must be called only after encoding was done. */
+double x265_calculate_vmafscore(x265_param*, x265_vmaf_data*);
+
+/* x265_calculate_vmaf_framelevelscore:
+ *    returns VMAF score for each frame in a given input video. */
+double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata*);
+/* x265_vmaf_encoder_log:
+ *       write a line to the configured CSV file.  If a CSV filename was not
+ *       configured, or file open failed, this function will perform no write.
+ *       This api will be called only when ENABLE_LIBVMAF cmake option is set */
+void x265_vmaf_encoder_log(x265_encoder *encoder, int argc, char **argv, x265_param*, x265_vmaf_data*);
+
+#endif
 
 #define X265_MAJOR_VERSION 1
 
@@ -1864,6 +1923,11 @@
     void          (*csvlog_encode)(const x265_param*, const x265_stats *, int, int, int, char**);
     void          (*dither_image)(x265_picture*, int, int, int16_t*, int);
     int           (*set_analysis_data)(x265_encoder *encoder, x265_analysis_data *analysis_data, int poc, uint32_t cuBytes);
+#if ENABLE_LIBVMAF
+    double        (*calculate_vmafscore)(x265_param *, x265_vmaf_data *);
+    double        (*calculate_vmaf_framelevelscore)(x265_vmaf_framedata *);
+    void          (*vmaf_encoder_log)(x265_encoder*, int, char**, x265_param *, x265_vmaf_data *);
+#endif
     /* add new pointers to the end, or increment X265_MAJOR_VERSION */
 } x265_api;
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265.patch
Type: text/x-patch
Size: 28100 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20180412/eae1ea0e/attachment-0001.bin>


More information about the x265-devel mailing list