refactor(libvideo2x): convert the video processor into a class (#1246)

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
K4YT3X
2024-12-03 05:22:07 +00:00
committed by GitHub
parent a379c7481e
commit d4d1e58f8d
16 changed files with 612 additions and 665 deletions

View File

@@ -5,15 +5,18 @@ extern "C" {
#include <libavformat/avformat.h>
}
#define CALC_FFMPEG_VERSION(a, b, c) (a << 16 | b << 8 | c)
AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx);
int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx);
AVPixelFormat
get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt);
AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt);
float get_frame_diff(AVFrame *frame1, AVFrame *frame2);
void av_bufferref_deleter(AVBufferRef *bufferref);
void av_frame_deleter(AVFrame *frame);
void av_packet_deleter(AVPacket *packet);
#endif // AVUTILS_H

View File

@@ -3,13 +3,51 @@
#include <cstdint>
#include <filesystem>
#include <vector>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/pixdesc.h>
}
#include "libvideo2x/libvideo2x.h"
#include "fsutils.h"
// Encoder configurations
struct EncoderConfig {
// Non-AVCodecContext options
AVCodecID codec;
bool copy_streams;
// Basic video options
int width;
int height;
int frm_rate_mul;
AVPixelFormat pix_fmt;
// Rate control and compression
int64_t bit_rate;
int rc_buffer_size;
int rc_min_rate;
int rc_max_rate;
int qmin;
int qmax;
// GOP and frame structure
int gop_size;
int max_b_frames;
int keyint_min;
int refs;
// Performance and threading
int thread_count;
// Latency and buffering
int delay;
// Extra AVOptions
std::vector<std::pair<StringType, StringType>> extra_opts;
};
class Encoder {
public:
@@ -22,7 +60,6 @@ class Encoder {
AVFormatContext *ifmt_ctx,
AVCodecContext *dec_ctx,
EncoderConfig &enc_cfg,
const ProcessorConfig &proc_cfg,
int in_vstream_idx
);

View File

@@ -13,17 +13,6 @@ extern "C" {
// FilterLibplacebo class definition
class FilterLibplacebo : public Filter {
private:
AVFilterGraph *filter_graph_;
AVFilterContext *buffersrc_ctx_;
AVFilterContext *buffersink_ctx_;
uint32_t vk_device_index_;
const std::filesystem::path shader_path_;
int width_;
int height_;
AVRational in_time_base_;
AVRational out_time_base_;
public:
// Constructor
FilterLibplacebo(
@@ -56,6 +45,17 @@ class FilterLibplacebo : public Filter {
int &out_width,
int &out_height
) const override;
private:
AVFilterGraph *filter_graph_;
AVFilterContext *buffersrc_ctx_;
AVFilterContext *buffersink_ctx_;
uint32_t vk_device_index_;
const std::filesystem::path shader_path_;
int width_;
int height_;
AVRational in_time_base_;
AVRational out_time_base_;
};
#endif // FILTER_LIBPLACEBO_H

View File

@@ -10,16 +10,6 @@ extern "C" {
// FilterRealesrgan class definition
class FilterRealesrgan : public Filter {
private:
RealESRGAN *realesrgan_;
int gpuid_;
bool tta_mode_;
int scaling_factor_;
const StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
public:
// Constructor
FilterRealesrgan(
@@ -49,6 +39,16 @@ class FilterRealesrgan : public Filter {
int &out_width,
int &out_height
) const override;
private:
RealESRGAN *realesrgan_;
int gpuid_;
bool tta_mode_;
int scaling_factor_;
const StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
};
#endif // FILTER_REALESRGAN_H

View File

@@ -1,19 +0,0 @@
#ifndef FRAMES_PROCESSOR_H
#define FRAMES_PROCESSOR_H
#include "decoder.h"
#include "encoder.h"
#include "libvideo2x.h"
#include "processor.h"
int process_frames(
const EncoderConfig &enc_cfg,
const ProcessorConfig &proc_cfg,
VideoProcessingContext *proc_ctx,
Decoder &decoder,
Encoder &encoder,
Processor *processor,
bool benchmark = false
);
#endif // FRAMES_PROCESSOR_H

View File

@@ -10,18 +10,6 @@ extern "C" {
// InterpolatorRIFE class definition
class InterpolatorRIFE : public Interpolator {
private:
RIFE *rife_;
int gpuid_;
bool tta_mode_;
bool tta_temporal_mode_;
bool uhd_mode_;
int num_threads_;
const StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
public:
// Constructor
InterpolatorRIFE(
@@ -54,6 +42,18 @@ class InterpolatorRIFE : public Interpolator {
int &out_width,
int &out_height
) const override;
private:
RIFE *rife_;
int gpuid_;
bool tta_mode_;
bool tta_temporal_mode_;
bool uhd_mode_;
int num_threads_;
const StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
};
#endif // INTERPOLATOR_RIFE_H

View File

@@ -1,17 +1,20 @@
#ifndef LIBVIDEO2X_H
#define LIBVIDEO2X_H
#include <filesystem>
#include <variant>
#include <vector>
#include <atomic>
#include <cstdint>
#include <memory>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
}
#include "fsutils.h"
#include "avutils.h"
#include "decoder.h"
#include "encoder.h"
#include "logging.h"
#include "processor.h"
#ifdef _WIN32
#ifdef LIBVIDEO2X_EXPORTS
@@ -23,105 +26,75 @@ extern "C" {
#define LIBVIDEO2X_API
#endif
enum class ProcessingMode {
Filter,
Interpolate,
};
enum class ProcessorType {
Libplacebo,
RealESRGAN,
RIFE,
};
struct LibplaceboConfig {
StringType shader_path;
};
struct RealESRGANConfig {
bool tta_mode;
StringType model_name;
};
struct RIFEConfig {
bool tta_mode;
bool tta_temporal_mode;
bool uhd_mode;
int num_threads;
StringType model_name;
};
// Unified filter configuration
struct ProcessorConfig {
ProcessorType processor_type;
int width;
int height;
int scaling_factor;
int frm_rate_mul;
float scn_det_thresh;
std::variant<LibplaceboConfig, RealESRGANConfig, RIFEConfig> config;
};
// Encoder configurations
struct EncoderConfig {
// Non-AVCodecContext options
AVCodecID codec;
bool copy_streams;
// Basic video options
int width;
int height;
AVPixelFormat pix_fmt;
// Rate control and compression
int64_t bit_rate;
int rc_buffer_size;
int rc_min_rate;
int rc_max_rate;
int qmin;
int qmax;
// GOP and frame structure
int gop_size;
int max_b_frames;
int keyint_min;
int refs;
// Performance and threading
int thread_count;
// Latency and buffering
int delay;
// Extra AVOptions
std::vector<std::pair<StringType, StringType>> extra_opts;
};
struct HardwareConfig {
uint32_t vk_device_index;
AVHWDeviceType hw_device_type;
};
// Video processing context
struct VideoProcessingContext {
int64_t processed_frames;
int64_t total_frames;
std::time_t start_time;
bool pause;
bool abort;
bool completed;
class LIBVIDEO2X_API VideoProcessor {
public:
VideoProcessor(
const HardwareConfig hw_cfg,
const ProcessorConfig proc_cfg,
EncoderConfig enc_cfg,
Video2xLogLevel = Video2xLogLevel::Info,
bool benchmark = false
);
virtual ~VideoProcessor() = default;
[[nodiscard]] int
process(const std::filesystem::path in_fname, const std::filesystem::path out_fname);
void pause() { paused_.store(true); }
void resume() { paused_.store(false); }
void abort() { aborted_.store(true); }
int64_t get_processed_frames() const { return frame_index_.load(); }
int64_t get_total_frames() const { return total_frames_.load(); }
bool is_paused() const { return paused_.load(); }
bool is_aborted() const { return aborted_.load(); }
bool is_completed() const { return completed_.load(); }
private:
[[nodiscard]] int
process_frames(Decoder &decoder, Encoder &encoder, std::unique_ptr<Processor> &processor);
[[nodiscard]] int write_frame(AVFrame *frame, Encoder &encoder);
[[nodiscard]] inline int write_raw_packet(
AVPacket *packet,
AVFormatContext *ifmt_ctx,
AVFormatContext *ofmt_ctx,
int *stream_map
);
[[nodiscard]] inline int process_filtering(
std::unique_ptr<Processor> &processor,
Encoder &encoder,
AVFrame *frame,
AVFrame *proc_frame
);
[[nodiscard]] inline int process_interpolation(
std::unique_ptr<Processor> &processor,
Encoder &encoder,
std::unique_ptr<AVFrame, decltype(&av_frame_deleter)> &prev_frame,
AVFrame *frame,
AVFrame *proc_frame
);
HardwareConfig hw_cfg_;
ProcessorConfig proc_cfg_;
EncoderConfig enc_cfg_;
bool benchmark_ = false;
std::atomic<int64_t> frame_index_ = 0;
std::atomic<int64_t> total_frames_ = 0;
std::atomic<bool> paused_ = false;
std::atomic<bool> aborted_ = false;
std::atomic<bool> completed_ = false;
};
// Process a video file using the specified configurations
[[nodiscard]] LIBVIDEO2X_API int process_video(
const std::filesystem::path in_fname,
const std::filesystem::path out_fname,
const HardwareConfig hw_cfg,
const ProcessorConfig proc_cfg,
EncoderConfig enc_cfg,
VideoProcessingContext *proc_ctx,
Libvideo2xLogLevel log_level,
bool benchmark
);
#endif // LIBVIDEO2X_H

View File

@@ -5,7 +5,7 @@
#include "fsutils.h"
enum class Libvideo2xLogLevel {
enum class Video2xLogLevel {
Unknown,
Trace,
Debug,
@@ -16,8 +16,8 @@ enum class Libvideo2xLogLevel {
Off
};
void set_log_level(Libvideo2xLogLevel log_level);
void set_log_level(Video2xLogLevel log_level);
std::optional<Libvideo2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
std::optional<Video2xLogLevel> find_log_level_by_name(const StringType &log_level_name);
#endif // LOGGING_H

View File

@@ -1,6 +1,7 @@
#ifndef PROCESSOR_H
#define PROCESSOR_H
#include <variant>
#include <vector>
extern "C" {
@@ -9,7 +10,46 @@ extern "C" {
#include <libavutil/buffer.h>
}
#include "libvideo2x.h"
#include "fsutils.h"
enum class ProcessingMode {
Filter,
Interpolate,
};
enum class ProcessorType {
Libplacebo,
RealESRGAN,
RIFE,
};
struct LibplaceboConfig {
StringType shader_path;
};
struct RealESRGANConfig {
bool tta_mode;
StringType model_name;
};
struct RIFEConfig {
bool tta_mode;
bool tta_temporal_mode;
bool uhd_mode;
int num_threads;
StringType model_name;
};
// Unified filter configuration
struct ProcessorConfig {
ProcessorType processor_type;
int width;
int height;
int scaling_factor;
int frm_rate_mul;
float scn_det_thresh;
std::variant<LibplaceboConfig, RealESRGANConfig, RIFEConfig> config;
};
class Processor {
public: