Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 92 additions & 28 deletions libvmaf/src/feature/integer_adm.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@

#if ARCH_X86
#include "x86/adm_avx2.h"
#if HAVE_AVX512
#include "x86/adm_avx512.h"
#endif
#elif ARCH_AARCH64
#include "arm64/adm_neon.h"
#include <arm_neon.h>
Expand All @@ -41,6 +44,31 @@ typedef struct AdmState {
void (*dwt2_8)(const uint8_t *src, const adm_dwt_band_t *dst,
AdmBuffer *buf, int w, int h, int src_stride,
int dst_stride);
void (*dwt2_16)(const uint16_t *src, const adm_dwt_band_t *dst,
AdmBuffer *buf, int w, int h, int src_stride,
int dst_stride, int inp_size_bits);
void (*adm_decouple)(AdmBuffer *buf, int w, int h, int stride,
double adm_enhn_gain_limit, int32_t* adm_div_lookup);
void (*adm_decouple_s123)(AdmBuffer *buf, int w, int h, int stride,
double adm_enhn_gain_limit, int32_t* adm_div_lookup);
float (*adm_csf_den_scale)(const adm_dwt_band_t *src, int w, int h,
int src_stride, double adm_norm_view_dist,
int adm_ref_display_height);
void (*adm_csf)(AdmBuffer *buf, int w, int h, int stride,
double adm_norm_view_dist, int adm_ref_display_height);
float (*adm_cm)(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_stride,
double adm_norm_view_dist, int adm_ref_display_height);
void (*adm_dwt2_s123_combined)(const int32_t *i4_ref_scale, const int32_t *i4_curr_dis,
AdmBuffer *buf, int w, int h, int ref_stride,
int dis_stride, int dst_stride, int scale);
float (*adm_csf_den_s123)(const i4_adm_dwt_band_t *src, int scale, int w, int h,
int src_stride, double adm_norm_view_dist,
int adm_ref_display_height);
void (*i4_adm_csf)(AdmBuffer *buf, int scale, int w, int h, int stride,
double adm_norm_view_dist, int adm_ref_display_height);
float (*i4_adm_cm)(AdmBuffer *buf, int w, int h, int src_stride,
int csf_a_stride, int scale, double adm_norm_view_dist,
int adm_ref_display_height);
VmafDictionary *feature_name_dict;
} AdmState;

Expand Down Expand Up @@ -657,8 +685,8 @@ static void dwt2_src_indices_filt(int **src_ind_y, int **src_ind_x, int w, int h
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))

static void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
double adm_enhn_gain_limit)
static inline void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
double adm_enhn_gain_limit, int32_t* adm_div_lookup)
{
const float cos_1deg_sq = cos(1.0 * M_PI / 180.0) * cos(1.0 * M_PI / 180.0);

Expand All @@ -684,7 +712,6 @@ static void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
if (bottom > h) {
bottom = h;
}

int64_t ot_dp, o_mag_sq, t_mag_sq;

for (int i = top; i < bottom; ++i) {
Expand Down Expand Up @@ -722,6 +749,7 @@ static void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
o_mag_sq = (int64_t)oh * oh + (int64_t)ov * ov;
t_mag_sq = (int64_t)th * th + (int64_t)tv * tv;


/**
* angle_flag is calculated in floating-point by converting fixed-point variables back to
* floating-point
Expand All @@ -735,16 +763,17 @@ static void adm_decouple(AdmBuffer *buf, int w, int h, int stride,
*/

int32_t tmp_kh = (oh == 0) ?
32768 : (((int64_t)div_lookup[oh + 32768] * th) + 16384) >> 15;
32768 : (((int64_t)adm_div_lookup[oh + 32768] * th) + 16384) >> 15;
int32_t tmp_kv = (ov == 0) ?
32768 : (((int64_t)div_lookup[ov + 32768] * tv) + 16384) >> 15;
32768 : (((int64_t)adm_div_lookup[ov + 32768] * tv) + 16384) >> 15;
int32_t tmp_kd = (od == 0) ?
32768 : (((int64_t)div_lookup[od + 32768] * td) + 16384) >> 15;
32768 : (((int64_t)adm_div_lookup[od + 32768] * td) + 16384) >> 15;

int32_t kh = tmp_kh < 0 ? 0 : (tmp_kh > 32768 ? 32768 : tmp_kh);
int32_t kv = tmp_kv < 0 ? 0 : (tmp_kv > 32768 ? 32768 : tmp_kv);
int32_t kd = tmp_kd < 0 ? 0 : (tmp_kd > 32768 ? 32768 : tmp_kd);


/**
* kh,kv,kd are in Q15 type and oh,ov,od are in Q16 type hence shifted by
* 15 to make result Q16
Expand Down Expand Up @@ -787,7 +816,7 @@ static inline uint16_t get_best15_from32(uint32_t temp, int *x)
}

static void adm_decouple_s123(AdmBuffer *buf, int w, int h, int stride,
double adm_enhn_gain_limit)
double adm_enhn_gain_limit, int32_t* adm_div_lookup)
{
const float cos_1deg_sq = cos(1.0 * M_PI / 180.0) * cos(1.0 * M_PI / 180.0);

Expand Down Expand Up @@ -890,11 +919,11 @@ static void adm_decouple_s123(AdmBuffer *buf, int w, int h, int stride,
uint16_t kv_msb = (abs_ov < (32768) ? abs_ov : get_best15_from32(abs_ov, &kv_shift));
uint16_t kd_msb = (abs_od < (32768) ? abs_od : get_best15_from32(abs_od, &kd_shift));

int64_t tmp_kh = (oh == 0) ? 32768 : (((int64_t)div_lookup[kh_msb + 32768] * th)*(kh_sign) +
int64_t tmp_kh = (oh == 0) ? 32768 : (((int64_t)adm_div_lookup[kh_msb + 32768] * th)*(kh_sign) +
(1 << (14 + kh_shift))) >> (15 + kh_shift);
int64_t tmp_kv = (ov == 0) ? 32768 : (((int64_t)div_lookup[kv_msb + 32768] * tv)*(kv_sign) +
int64_t tmp_kv = (ov == 0) ? 32768 : (((int64_t)adm_div_lookup[kv_msb + 32768] * tv)*(kv_sign) +
(1 << (14 + kv_shift))) >> (15 + kv_shift);
int64_t tmp_kd = (od == 0) ? 32768 : (((int64_t)div_lookup[kd_msb + 32768] * td)*(kd_sign) +
int64_t tmp_kd = (od == 0) ? 32768 : (((int64_t)adm_div_lookup[kd_msb + 32768] * td)*(kd_sign) +
(1 << (14 + kd_shift))) >> (15 + kd_shift);

int64_t kh = tmp_kh < 0 ? 0 : (tmp_kh > 32768 ? 32768 : tmp_kh);
Expand All @@ -903,12 +932,12 @@ static void adm_decouple_s123(AdmBuffer *buf, int w, int h, int stride,

rst_h = ((kh * oh) + 16384) >> 15;
rst_v = ((kv * ov) + 16384) >> 15;
rst_d = ((kd * od) + 16384) >> 15;
rst_d = ((kd * od) + 16384) >> 15;

const float rst_h_f = ((float)kh / 32768) * ((float)oh / 64);
const float rst_v_f = ((float)kv / 32768) * ((float)ov / 64);
const float rst_d_f = ((float)kd / 32768) * ((float)od / 64);

if (angle_flag && (rst_h_f > 0.)) rst_h = MIN((rst_h * adm_enhn_gain_limit), th);
if (angle_flag && (rst_h_f < 0.)) rst_h = MAX((rst_h * adm_enhn_gain_limit), th);

Expand Down Expand Up @@ -1416,6 +1445,7 @@ static float adm_cm(AdmBuffer *buf, int w, int h, int src_stride, int csf_a_stri
accum_inner_h = 0;
accum_inner_v = 0;
accum_inner_d = 0;

for (j = start_col; j < end_col; ++j) {
xh = src->band_h[i * src_stride + j] * i_rfactor[0];
xv = src->band_v[i * src_stride + j] * i_rfactor[1];
Expand Down Expand Up @@ -2429,7 +2459,6 @@ void integer_compute_adm(AdmState *s, VmafPicture *ref_pic, VmafPicture *dis_pic
{
int w = ref_pic->w[0];
int h = ref_pic->h[0];

const double numden_limit = 1e-10 * (w * h) / (1920.0 * 1080.0);

size_t curr_ref_stride;
Expand Down Expand Up @@ -2463,9 +2492,9 @@ void integer_compute_adm(AdmState *s, VmafPicture *ref_pic, VmafPicture *dis_pic
curr_dis_stride, buf_stride);
}
else {
adm_dwt2_16(ref_pic->data[0], &buf->ref_dwt2, buf, w, h,
s->dwt2_16(ref_pic->data[0], &buf->ref_dwt2, buf, w, h,
curr_ref_stride, buf_stride, ref_pic->bpc);
adm_dwt2_16(dis_pic->data[0], &buf->dis_dwt2, buf, w, h,
s->dwt2_16(dis_pic->data[0], &buf->dis_dwt2, buf, w, h,
curr_dis_stride, buf_stride, dis_pic->bpc);
}

Expand All @@ -2474,35 +2503,35 @@ void integer_compute_adm(AdmState *s, VmafPicture *ref_pic, VmafPicture *dis_pic

w = (w + 1) / 2;
h = (h + 1) / 2;
s->adm_decouple(buf, w, h, buf_stride, adm_enhn_gain_limit, div_lookup);

adm_decouple(buf, w, h, buf_stride, adm_enhn_gain_limit);

den_scale = adm_csf_den_scale(&buf->ref_dwt2, w, h, buf_stride,
den_scale = s->adm_csf_den_scale(&buf->ref_dwt2, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);

adm_csf(buf, w, h, buf_stride, adm_norm_view_dist, adm_ref_display_height);
s->adm_csf(buf, w, h, buf_stride, adm_norm_view_dist, adm_ref_display_height);

num_scale = adm_cm(buf, w, h, buf_stride, buf_stride,
num_scale = s->adm_cm(buf, w, h, buf_stride, buf_stride,
adm_norm_view_dist, adm_ref_display_height);
}
else {
adm_dwt2_s123_combined(i4_curr_ref_scale, i4_curr_dis_scale, buf, w, h, curr_ref_stride,
curr_dis_stride, buf_stride, scale);
s->adm_dwt2_s123_combined(i4_curr_ref_scale, i4_curr_dis_scale, buf, w, h, curr_ref_stride,
curr_dis_stride, buf_stride, scale);

w = (w + 1) / 2;
h = (h + 1) / 2;

adm_decouple_s123(buf, w, h, buf_stride, adm_enhn_gain_limit);
s->adm_decouple_s123(buf, w, h, buf_stride, adm_enhn_gain_limit, div_lookup);

den_scale = adm_csf_den_s123(
den_scale = s->adm_csf_den_s123(
&buf->i4_ref_dwt2, scale, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);

s->i4_adm_csf(buf, scale, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);

i4_adm_csf(buf, scale, w, h, buf_stride,
adm_norm_view_dist, adm_ref_display_height);
num_scale = s->i4_adm_cm(buf, w, h, buf_stride, buf_stride, scale,
adm_norm_view_dist, adm_ref_display_height);

num_scale = i4_adm_cm(buf, w, h, buf_stride, buf_stride, scale,
adm_norm_view_dist, adm_ref_display_height);
}

num += num_scale;
Expand Down Expand Up @@ -2593,12 +2622,47 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,
}

s->dwt2_8 = adm_dwt2_8;
s->dwt2_16 = adm_dwt2_16;
s->adm_csf_den_scale = adm_csf_den_scale;
s->adm_csf = adm_csf;
s->adm_cm = adm_cm;
s->adm_dwt2_s123_combined = adm_dwt2_s123_combined;
s->adm_csf_den_s123 = adm_csf_den_s123;
s->i4_adm_csf = i4_adm_csf;
s->i4_adm_cm = i4_adm_cm;
s->adm_decouple = adm_decouple;
s->adm_decouple_s123 = adm_decouple_s123;

#if ARCH_X86
unsigned flags = vmaf_get_cpu_flags();
if (flags & VMAF_X86_CPU_FLAG_AVX2) {
if (!(w % 8)) s->dwt2_8 = adm_dwt2_8_avx2;
s->dwt2_16 = adm_dwt2_16_avx2;
s->adm_csf_den_scale = adm_csf_den_scale_avx2;
s->adm_csf = adm_csf_avx2;
s->adm_cm = adm_cm_avx2;
s->adm_csf_den_s123 = adm_csf_den_s123_avx2;
s->adm_dwt2_s123_combined = adm_dwt2_s123_combined_avx2;
s->i4_adm_csf = i4_adm_csf_avx2;
s->i4_adm_cm = i4_adm_cm_avx2;
s->adm_decouple = adm_decouple_avx2;
s->adm_decouple_s123 = adm_decouple_s123_avx2;
}
#if HAVE_AVX512
if (flags & VMAF_X86_CPU_FLAG_AVX512) {
s->dwt2_8 = adm_dwt2_8_avx512;
s->dwt2_16 = adm_dwt2_16_avx512;
s->adm_csf_den_scale = adm_csf_den_scale_avx512;
s->adm_csf = adm_csf_avx512;
s->adm_cm = adm_cm_avx512;
s->adm_csf_den_s123 = adm_csf_den_s123_avx512;
s->adm_dwt2_s123_combined = adm_dwt2_s123_combined_avx512;
s->i4_adm_csf = i4_adm_csf_avx512;
s->i4_adm_cm = i4_adm_cm_avx512;
s->adm_decouple = adm_decouple_avx512;
s->adm_decouple_s123 = adm_decouple_s123_avx512;
}
#endif
#elif ARCH_AARCH64
unsigned flags = vmaf_get_cpu_flags();
if (flags & VMAF_ARM_CPU_FLAG_NEON) {
Expand Down
20 changes: 18 additions & 2 deletions libvmaf/src/feature/integer_motion.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,

MotionState *s = fex->priv;
int err = 0;
unsigned flags = vmaf_get_cpu_flags();

s->feature_name_dict =
vmaf_feature_name_dict_from_provided_features(fex->provided_features,
Expand All @@ -303,10 +304,17 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,
if (err) goto fail;

s->y_convolution = bpc == 8 ? y_convolution_8 : y_convolution_16;
s->x_convolution = x_convolution_16;
#if ARCH_X86
if (flags & VMAF_X86_CPU_FLAG_AVX2)
s->y_convolution = bpc == 8 ? y_convolution_8_avx2 : y_convolution_16_avx2;
#if HAVE_AVX512
if (flags & VMAF_X86_CPU_FLAG_AVX512)
s->y_convolution = bpc == 8 ? y_convolution_8_avx512 : y_convolution_16_avx512;
#endif
#endif

s->x_convolution = x_convolution_16;
#if ARCH_X86
unsigned flags = vmaf_get_cpu_flags();
if (flags & VMAF_X86_CPU_FLAG_AVX2)
s->x_convolution = x_convolution_16_avx2;
#if HAVE_AVX512
Expand All @@ -316,6 +324,14 @@ static int init(VmafFeatureExtractor *fex, enum VmafPixelFormat pix_fmt,
#endif

s->sad = sad_c;
#if ARCH_X86
if (flags & VMAF_X86_CPU_FLAG_AVX2)
s->sad = sad_avx2;
#if HAVE_AVX512
if (flags & VMAF_X86_CPU_FLAG_AVX512)
s->sad = sad_avx512;
#endif
#endif
s->score = 0.;

return 0;
Expand Down
1 change: 1 addition & 0 deletions libvmaf/src/feature/integer_motion.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <stdbool.h>
#include <stdint.h>
#include "cpu.h"

static const uint16_t filter[5] = { 3571, 16004, 26386, 16004, 3571 };
static const int filter_width = sizeof(filter) / sizeof(filter[0]);
Expand Down
1 change: 1 addition & 0 deletions libvmaf/src/feature/integer_vif.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <stdint.h>
#include <stdbool.h>
#include <assert.h>
#include "cpu.h"

/* Enhancement gain imposed on vif, must be >= 1.0, where 1.0 means the gain is completely disabled */
#ifndef DEFAULT_VIF_ENHN_GAIN_LIMIT
Expand Down
Loading