refactor(libvideo2x): convert the video processor into a class (#1246)

Signed-off-by: k4yt3x <i@k4yt3x.com>
2026-02-04 03:22:07 +08:00 · 2024-12-03 05:22:07 +00:00
parent a379c7481e
commit d4d1e58f8d
16 changed files with 612 additions and 665 deletions
--- a/include/libvideo2x/avutils.h
+++ b/include/libvideo2x/avutils.h
@@ -5,15 +5,18 @@ extern "C" {
 #include <libavformat/avformat.h>
 }

-#define CALC_FFMPEG_VERSION(a, b, c) (a << 16 | b << 8 | c)
-
 AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx);

 int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx);

-AVPixelFormat
-get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt);
+AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt);

 float get_frame_diff(AVFrame *frame1, AVFrame *frame2);

+void av_bufferref_deleter(AVBufferRef *bufferref);
+
+void av_frame_deleter(AVFrame *frame);
+
+void av_packet_deleter(AVPacket *packet);
+
 #endif  // AVUTILS_H
--- a/include/libvideo2x/encoder.h
+++ b/include/libvideo2x/encoder.h
@@ -3,13 +3,51 @@

 #include <cstdint>
 #include <filesystem>
+#include <vector>

 extern "C" {
+#include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 #include <libavutil/pixdesc.h>
 }

-#include "libvideo2x/libvideo2x.h"
+#include "fsutils.h"
+
+// Encoder configurations
+struct EncoderConfig {
+    // Non-AVCodecContext options
+    AVCodecID codec;
+    bool copy_streams;
+
+    // Basic video options
+    int width;
+    int height;
+    int frm_rate_mul;
+    AVPixelFormat pix_fmt;
+
+    // Rate control and compression
+    int64_t bit_rate;
+    int rc_buffer_size;
+    int rc_min_rate;
+    int rc_max_rate;
+    int qmin;
+    int qmax;
+
+    // GOP and frame structure
+    int gop_size;
+    int max_b_frames;
+    int keyint_min;
+    int refs;
+
+    // Performance and threading
+    int thread_count;
+
+    // Latency and buffering
+    int delay;
+
+    // Extra AVOptions
+    std::vector<std::pair<StringType, StringType>> extra_opts;
+};

 class Encoder {
   public:
@@ -22,7 +60,6 @@ class Encoder {
        AVFormatContext *ifmt_ctx,
        AVCodecContext *dec_ctx,
        EncoderConfig &enc_cfg,
-        const ProcessorConfig &proc_cfg,
        int in_vstream_idx
    );

--- a/include/libvideo2x/filter_libplacebo.h
+++ b/include/libvideo2x/filter_libplacebo.h
@@ -13,17 +13,6 @@ extern "C" {

 // FilterLibplacebo class definition
 class FilterLibplacebo : public Filter {
-   private:
-    AVFilterGraph *filter_graph_;
-    AVFilterContext *buffersrc_ctx_;
-    AVFilterContext *buffersink_ctx_;
-    uint32_t vk_device_index_;
-    const std::filesystem::path shader_path_;
-    int width_;
-    int height_;
-    AVRational in_time_base_;
-    AVRational out_time_base_;
-
   public:
    // Constructor
    FilterLibplacebo(
@@ -56,6 +45,17 @@ class FilterLibplacebo : public Filter {
        int &out_width,
        int &out_height
    ) const override;
+
+   private:
+    AVFilterGraph *filter_graph_;
+    AVFilterContext *buffersrc_ctx_;
+    AVFilterContext *buffersink_ctx_;
+    uint32_t vk_device_index_;
+    const std::filesystem::path shader_path_;
+    int width_;
+    int height_;
+    AVRational in_time_base_;
+    AVRational out_time_base_;
 };

 #endif  // FILTER_LIBPLACEBO_H
--- a/include/libvideo2x/filter_realesrgan.h
+++ b/include/libvideo2x/filter_realesrgan.h
@@ -10,16 +10,6 @@ extern "C" {

 // FilterRealesrgan class definition
 class FilterRealesrgan : public Filter {
-   private:
-    RealESRGAN *realesrgan_;
-    int gpuid_;
-    bool tta_mode_;
-    int scaling_factor_;
-    const StringType model_name_;
-    AVRational in_time_base_;
-    AVRational out_time_base_;
-    AVPixelFormat out_pix_fmt_;
-
   public:
    // Constructor
    FilterRealesrgan(
@@ -49,6 +39,16 @@ class FilterRealesrgan : public Filter {
        int &out_width,
        int &out_height
    ) const override;
+
+   private:
+    RealESRGAN *realesrgan_;
+    int gpuid_;
+    bool tta_mode_;
+    int scaling_factor_;
+    const StringType model_name_;
+    AVRational in_time_base_;
+    AVRational out_time_base_;
+    AVPixelFormat out_pix_fmt_;
 };

 #endif  // FILTER_REALESRGAN_H
--- a/include/libvideo2x/frames_processor.h
+++ b/include/libvideo2x/frames_processor.h
@@ -1,19 +0,0 @@
-#ifndef FRAMES_PROCESSOR_H
-#define FRAMES_PROCESSOR_H
-
-#include "decoder.h"
-#include "encoder.h"
-#include "libvideo2x.h"
-#include "processor.h"
-
-int process_frames(
-    const EncoderConfig &enc_cfg,
-    const ProcessorConfig &proc_cfg,
-    VideoProcessingContext *proc_ctx,
-    Decoder &decoder,
-    Encoder &encoder,
-    Processor *processor,
-    bool benchmark = false
-);
-
-#endif  // FRAMES_PROCESSOR_H
--- a/include/libvideo2x/interpolator_rife.h
+++ b/include/libvideo2x/interpolator_rife.h
@@ -10,18 +10,6 @@ extern "C" {

 // InterpolatorRIFE class definition
 class InterpolatorRIFE : public Interpolator {
-   private:
-    RIFE *rife_;
-    int gpuid_;
-    bool tta_mode_;
-    bool tta_temporal_mode_;
-    bool uhd_mode_;
-    int num_threads_;
-    const StringType model_name_;
-    AVRational in_time_base_;
-    AVRational out_time_base_;
-    AVPixelFormat out_pix_fmt_;
-
   public:
    // Constructor
    InterpolatorRIFE(
@@ -54,6 +42,18 @@ class InterpolatorRIFE : public Interpolator {
        int &out_width,
        int &out_height
    ) const override;
+
+   private:
+    RIFE *rife_;
+    int gpuid_;
+    bool tta_mode_;
+    bool tta_temporal_mode_;
+    bool uhd_mode_;
+    int num_threads_;
+    const StringType model_name_;
+    AVRational in_time_base_;
+    AVRational out_time_base_;
+    AVPixelFormat out_pix_fmt_;
 };

 #endif  // INTERPOLATOR_RIFE_H
--- a/include/libvideo2x/libvideo2x.h
+++ b/include/libvideo2x/libvideo2x.h
@@ -1,17 +1,20 @@
 #ifndef LIBVIDEO2X_H
 #define LIBVIDEO2X_H

-#include <filesystem>
-#include <variant>
-#include <vector>
+#include <atomic>
+#include <cstdint>
+#include <memory>

 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 }

-#include "fsutils.h"
+#include "avutils.h"
+#include "decoder.h"
+#include "encoder.h"
 #include "logging.h"
+#include "processor.h"

 #ifdef _WIN32
 #ifdef LIBVIDEO2X_EXPORTS
@@ -23,105 +26,75 @@ extern "C" {
 #define LIBVIDEO2X_API
 #endif

-enum class ProcessingMode {
-    Filter,
-    Interpolate,
-};
-
-enum class ProcessorType {
-    Libplacebo,
-    RealESRGAN,
-    RIFE,
-};
-
-struct LibplaceboConfig {
-    StringType shader_path;
-};
-
-struct RealESRGANConfig {
-    bool tta_mode;
-    StringType model_name;
-};
-
-struct RIFEConfig {
-    bool tta_mode;
-    bool tta_temporal_mode;
-    bool uhd_mode;
-    int num_threads;
-    StringType model_name;
-};
-
-// Unified filter configuration
-struct ProcessorConfig {
-    ProcessorType processor_type;
-    int width;
-    int height;
-    int scaling_factor;
-    int frm_rate_mul;
-    float scn_det_thresh;
-    std::variant<LibplaceboConfig, RealESRGANConfig, RIFEConfig> config;
-};
-
-// Encoder configurations
-struct EncoderConfig {
-    // Non-AVCodecContext options
-    AVCodecID codec;
-    bool copy_streams;
-
-    // Basic video options
-    int width;
-    int height;
-    AVPixelFormat pix_fmt;
-
-    // Rate control and compression
-    int64_t bit_rate;
-    int rc_buffer_size;
-    int rc_min_rate;
-    int rc_max_rate;
-    int qmin;
-    int qmax;
-
-    // GOP and frame structure
-    int gop_size;
-    int max_b_frames;
-    int keyint_min;
-    int refs;
-
-    // Performance and threading
-    int thread_count;
-
-    // Latency and buffering
-    int delay;
-
-    // Extra AVOptions
-    std::vector<std::pair<StringType, StringType>> extra_opts;
-};
-
 struct HardwareConfig {
    uint32_t vk_device_index;
    AVHWDeviceType hw_device_type;
 };

-// Video processing context
-struct VideoProcessingContext {
-    int64_t processed_frames;
-    int64_t total_frames;
-    std::time_t start_time;
-    bool pause;
-    bool abort;
-    bool completed;
+class LIBVIDEO2X_API VideoProcessor {
+   public:
+    VideoProcessor(
+        const HardwareConfig hw_cfg,
+        const ProcessorConfig proc_cfg,
+        EncoderConfig enc_cfg,
+        Video2xLogLevel = Video2xLogLevel::Info,
+        bool benchmark = false
+    );
+
+    virtual ~VideoProcessor() = default;
+
+    [[nodiscard]] int
+    process(const std::filesystem::path in_fname, const std::filesystem::path out_fname);
+
+    void pause() { paused_.store(true); }
+    void resume() { paused_.store(false); }
+    void abort() { aborted_.store(true); }
+
+    int64_t get_processed_frames() const { return frame_index_.load(); }
+    int64_t get_total_frames() const { return total_frames_.load(); }
+
+    bool is_paused() const { return paused_.load(); }
+    bool is_aborted() const { return aborted_.load(); }
+    bool is_completed() const { return completed_.load(); }
+
+   private:
+    [[nodiscard]] int
+    process_frames(Decoder &decoder, Encoder &encoder, std::unique_ptr<Processor> &processor);
+
+    [[nodiscard]] int write_frame(AVFrame *frame, Encoder &encoder);
+
+    [[nodiscard]] inline int write_raw_packet(
+        AVPacket *packet,
+        AVFormatContext *ifmt_ctx,
+        AVFormatContext *ofmt_ctx,
+        int *stream_map
+    );
+
+    [[nodiscard]] inline int process_filtering(
+        std::unique_ptr<Processor> &processor,
+        Encoder &encoder,
+        AVFrame *frame,
+        AVFrame *proc_frame
+    );
+
+    [[nodiscard]] inline int process_interpolation(
+        std::unique_ptr<Processor> &processor,
+        Encoder &encoder,
+        std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> &prev_frame,
+        AVFrame *frame,
+        AVFrame *proc_frame
+    );
+
+    HardwareConfig hw_cfg_;
+    ProcessorConfig proc_cfg_;
+    EncoderConfig enc_cfg_;
+    bool benchmark_ = false;
+
+    std::atomic<int64_t> frame_index_ = 0;
+    std::atomic<int64_t> total_frames_ = 0;
+    std::atomic<bool> paused_ = false;
+    std::atomic<bool> aborted_ = false;
+    std::atomic<bool> completed_ = false;
 };

-// Process a video file using the specified configurations
-[[nodiscard]] LIBVIDEO2X_API int process_video(
-    const std::filesystem::path in_fname,
-    const std::filesystem::path out_fname,
-    const HardwareConfig hw_cfg,
-    const ProcessorConfig proc_cfg,
-    EncoderConfig enc_cfg,
-    VideoProcessingContext *proc_ctx,
-    Libvideo2xLogLevel log_level,
-    bool benchmark
-);
-
 #endif  // LIBVIDEO2X_H
--- a/include/libvideo2x/logging.h
+++ b/include/libvideo2x/logging.h
@@ -5,7 +5,7 @@

 #include "fsutils.h"

-enum class Libvideo2xLogLevel {
+enum class Video2xLogLevel {
    Unknown,
    Trace,
    Debug,
@@ -16,8 +16,8 @@ enum class Libvideo2xLogLevel {
    Off
 };

-void set_log_level(Libvideo2xLogLevel log_level);
+void set_log_level(Video2xLogLevel log_level);

-std::optional<Libvideo2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
+std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name);

 #endif  // LOGGING_H
--- a/include/libvideo2x/processor.h
+++ b/include/libvideo2x/processor.h
@@ -1,6 +1,7 @@
 #ifndef PROCESSOR_H
 #define PROCESSOR_H

+#include <variant>
 #include <vector>

 extern "C" {
@@ -9,7 +10,46 @@ extern "C" {
 #include <libavutil/buffer.h>
 }

-#include "libvideo2x.h"
+#include "fsutils.h"
+
+enum class ProcessingMode {
+    Filter,
+    Interpolate,
+};
+
+enum class ProcessorType {
+    Libplacebo,
+    RealESRGAN,
+    RIFE,
+};
+
+struct LibplaceboConfig {
+    StringType shader_path;
+};
+
+struct RealESRGANConfig {
+    bool tta_mode;
+    StringType model_name;
+};
+
+struct RIFEConfig {
+    bool tta_mode;
+    bool tta_temporal_mode;
+    bool uhd_mode;
+    int num_threads;
+    StringType model_name;
+};
+
+// Unified filter configuration
+struct ProcessorConfig {
+    ProcessorType processor_type;
+    int width;
+    int height;
+    int scaling_factor;
+    int frm_rate_mul;
+    float scn_det_thresh;
+    std::variant<LibplaceboConfig, RealESRGANConfig, RIFEConfig> config;
+};

 class Processor {
   public: