perf: improve vectorization optimizations and add function multi-versioning (#1271)

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
K4YT3X
2024-12-22 05:53:09 -05:00
committed by GitHub
parent f68939c478
commit e1e8ed864d
11 changed files with 116 additions and 52 deletions

View File

@@ -135,6 +135,7 @@ AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat
return best_pix_fmt;
}
[[gnu::target_clones("default", "avx2", "avx512f")]]
float get_frame_diff(AVFrame *frame1, AVFrame *frame2) {
if (!frame1 || !frame2) {
logger()->error("Invalid frame(s) provided for comparison");

View File

@@ -11,6 +11,7 @@ namespace video2x {
namespace conversions {
// Convert AVFrame format
[[gnu::target_clones("default", "avx2", "avx512f")]]
AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
AVFrame *dst_frame = av_frame_alloc();
if (dst_frame == nullptr) {
@@ -67,6 +68,7 @@ AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
}
// Convert AVFrame to ncnn::Mat by copying the data
[[gnu::target_clones("default", "avx2", "avx512f")]]
ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
AVFrame *converted_frame = nullptr;
@@ -106,6 +108,7 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
}
// Convert ncnn::Mat to AVFrame with a specified pixel format (this part is unchanged)
[[gnu::target_clones("default", "avx2", "avx512f")]]
AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) {
int ret;

View File

@@ -22,7 +22,7 @@ Decoder::~Decoder() {
}
}
AVPixelFormat Decoder::get_hw_format(AVCodecContext *_, const AVPixelFormat *pix_fmts) {
AVPixelFormat Decoder::get_hw_format(AVCodecContext *, const AVPixelFormat *pix_fmts) {
for (const AVPixelFormat *p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
if (*p == hw_pix_fmt_) {
return *p;

View File

@@ -254,6 +254,7 @@ int Encoder::init(
return 0;
}
[[gnu::target_clones("default", "avx2", "avx512f")]]
int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) {
AVFrame *converted_frame = nullptr;
int ret;
@@ -325,6 +326,7 @@ int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) {
return 0;
}
[[gnu::target_clones("default", "avx2", "avx512f")]]
int Encoder::flush() {
int ret;
AVPacket *enc_pkt = av_packet_alloc();

View File

@@ -28,6 +28,7 @@ VideoProcessor::VideoProcessor(
hw_device_type_(hw_device_type),
benchmark_(benchmark) {}
[[gnu::target_clones("default", "avx2", "avx512f")]]
int VideoProcessor::process(
const std::filesystem::path in_fname,
const std::filesystem::path out_fname