perf: improve vectorization optimizations and add function multi-versioning (#1271)

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
K4YT3X
2024-12-22 05:53:09 -05:00
committed by GitHub
parent f68939c478
commit e1e8ed864d
11 changed files with 116 additions and 52 deletions

View File

@@ -11,6 +11,7 @@ namespace video2x {
namespace conversions {
// Convert AVFrame format
[[gnu::target_clones("default", "avx2", "avx512f")]]
AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
AVFrame *dst_frame = av_frame_alloc();
if (dst_frame == nullptr) {
@@ -67,6 +68,7 @@ AVFrame *convert_avframe_pix_fmt(AVFrame *src_frame, AVPixelFormat pix_fmt) {
}
// Convert AVFrame to ncnn::Mat by copying the data
[[gnu::target_clones("default", "avx2", "avx512f")]]
ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
AVFrame *converted_frame = nullptr;
@@ -106,6 +108,7 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame *frame) {
}
// Convert ncnn::Mat to AVFrame with a specified pixel format (this part is unchanged)
[[gnu::target_clones("default", "avx2", "avx512f")]]
AVFrame *ncnn_mat_to_avframe(const ncnn::Mat &mat, AVPixelFormat pix_fmt) {
int ret;