mirror of
https://github.com/k4yt3x/video2x.git
synced 2026-02-04 11:24:41 +08:00
feat(rife): add support for frame interpolation and RIFE (#1244)
* feat: add RIFE files and processor/interpolator abstractions * feat: add `rife` as processor option * feat: add frame interpolation math except first frame * feat: complete motion interpolation and add scene detection * feat: improve Vulkan device validation * fix: fix casting issues and variable names * refactor: improve error-checking; add abstractions and factories * refactor: improve readability of the frames processor * docs: update changelog Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
@@ -7,9 +7,13 @@ extern "C" {
|
||||
|
||||
#define CALC_FFMPEG_VERSION(a, b, c) (a << 16 | b << 8 | c)
|
||||
|
||||
AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx);
|
||||
|
||||
int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx);
|
||||
|
||||
enum AVPixelFormat
|
||||
get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt);
|
||||
|
||||
float get_frame_diff(AVFrame *frame1, AVFrame *frame2);
|
||||
|
||||
#endif // AVUTILS_H
|
||||
|
||||
@@ -22,6 +22,7 @@ class Encoder {
|
||||
AVFormatContext *ifmt_ctx,
|
||||
AVCodecContext *dec_ctx,
|
||||
EncoderConfig *encoder_config,
|
||||
const ProcessorConfig *processor_config,
|
||||
int in_vstream_idx
|
||||
);
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
#ifndef FILTER_H
|
||||
#define FILTER_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavfilter/avfilter.h>
|
||||
#include <libavutil/buffer.h>
|
||||
}
|
||||
|
||||
// Abstract base class for filters
|
||||
class Filter {
|
||||
public:
|
||||
virtual ~Filter() = default;
|
||||
virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) = 0;
|
||||
virtual int process_frame(AVFrame *in_frame, AVFrame **out_frame) = 0;
|
||||
virtual int flush(std::vector<AVFrame *> &_) { return 0; }
|
||||
};
|
||||
|
||||
#endif // FILTER_H
|
||||
61
include/libvideo2x/filter_libplacebo.h
Normal file
61
include/libvideo2x/filter_libplacebo.h
Normal file
@@ -0,0 +1,61 @@
|
||||
#ifndef FILTER_LIBPLACEBO_H
|
||||
#define FILTER_LIBPLACEBO_H
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavfilter/buffersink.h>
|
||||
#include <libavfilter/buffersrc.h>
|
||||
}
|
||||
|
||||
#include "processor.h"
|
||||
|
||||
// FilterLibplacebo class definition
|
||||
class FilterLibplacebo : public Filter {
|
||||
private:
|
||||
AVFilterGraph *filter_graph_;
|
||||
AVFilterContext *buffersrc_ctx_;
|
||||
AVFilterContext *buffersink_ctx_;
|
||||
uint32_t vk_device_index_;
|
||||
const std::filesystem::path shader_path_;
|
||||
int width_;
|
||||
int height_;
|
||||
AVRational in_time_base_;
|
||||
AVRational out_time_base_;
|
||||
|
||||
public:
|
||||
// Constructor
|
||||
FilterLibplacebo(
|
||||
uint32_t vk_device_index,
|
||||
const std::filesystem::path &shader_path,
|
||||
int width,
|
||||
int height
|
||||
);
|
||||
|
||||
// Destructor
|
||||
virtual ~FilterLibplacebo() override;
|
||||
|
||||
// Initializes the filter with decoder and encoder contexts
|
||||
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
|
||||
|
||||
// Processes an input frame and returns the processed frame
|
||||
int filter(AVFrame *in_frame, AVFrame **out_frame) override;
|
||||
|
||||
// Flushes any remaining frames
|
||||
int flush(std::vector<AVFrame *> &flushed_frames) override;
|
||||
|
||||
// Returns the filter's type
|
||||
ProcessorType get_processor_type() const override { return PROCESSOR_LIBPLACEBO; }
|
||||
|
||||
// Returns the filter's output dimensions
|
||||
void get_output_dimensions(
|
||||
const ProcessorConfig *processor_config,
|
||||
int in_width,
|
||||
int in_height,
|
||||
int &out_width,
|
||||
int &out_height
|
||||
) const override;
|
||||
};
|
||||
|
||||
#endif // FILTER_LIBPLACEBO_H
|
||||
55
include/libvideo2x/filter_realesrgan.h
Normal file
55
include/libvideo2x/filter_realesrgan.h
Normal file
@@ -0,0 +1,55 @@
|
||||
#ifndef FILTER_REALESRGAN_H
|
||||
#define FILTER_REALESRGAN_H
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
}
|
||||
|
||||
#include "char_defs.h"
|
||||
#include "processor.h"
|
||||
#include "realesrgan.h"
|
||||
|
||||
// FilterRealesrgan class definition
|
||||
class FilterRealesrgan : public Filter {
|
||||
private:
|
||||
RealESRGAN *realesrgan_;
|
||||
int gpuid_;
|
||||
bool tta_mode_;
|
||||
int scaling_factor_;
|
||||
const StringType model_name_;
|
||||
AVRational in_time_base_;
|
||||
AVRational out_time_base_;
|
||||
AVPixelFormat out_pix_fmt_;
|
||||
|
||||
public:
|
||||
// Constructor
|
||||
FilterRealesrgan(
|
||||
int gpuid = 0,
|
||||
bool tta_mode = false,
|
||||
int scaling_factor = 4,
|
||||
const StringType model_name = STR("realesr-animevideov3")
|
||||
);
|
||||
|
||||
// Destructor
|
||||
virtual ~FilterRealesrgan() override;
|
||||
|
||||
// Initializes the filter with decoder and encoder contexts
|
||||
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
|
||||
|
||||
// Processes an input frame and returns the processed frame
|
||||
int filter(AVFrame *in_frame, AVFrame **out_frame) override;
|
||||
|
||||
// Returns the filter's type
|
||||
ProcessorType get_processor_type() const override { return PROCESSOR_REALESRGAN; }
|
||||
|
||||
// Returns the filter's output dimensions
|
||||
void get_output_dimensions(
|
||||
const ProcessorConfig *processor_config,
|
||||
int in_width,
|
||||
int in_height,
|
||||
int &out_width,
|
||||
int &out_height
|
||||
) const override;
|
||||
};
|
||||
|
||||
#endif // FILTER_REALESRGAN_H
|
||||
19
include/libvideo2x/frames_processor.h
Normal file
19
include/libvideo2x/frames_processor.h
Normal file
@@ -0,0 +1,19 @@
|
||||
#ifndef FRAMES_PROCESSOR_H
|
||||
#define FRAMES_PROCESSOR_H
|
||||
|
||||
#include "decoder.h"
|
||||
#include "encoder.h"
|
||||
#include "libvideo2x.h"
|
||||
#include "processor.h"
|
||||
|
||||
int process_frames(
|
||||
const EncoderConfig *encoder_config,
|
||||
const ProcessorConfig *processor_config,
|
||||
VideoProcessingContext *proc_ctx,
|
||||
Decoder &decoder,
|
||||
Encoder &encoder,
|
||||
Processor *processor,
|
||||
bool benchmark = false
|
||||
);
|
||||
|
||||
#endif // FRAMES_PROCESSOR_H
|
||||
64
include/libvideo2x/interpolator_rife.h
Normal file
64
include/libvideo2x/interpolator_rife.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifndef INTERPOLATOR_RIFE_H
|
||||
#define INTERPOLATOR_RIFE_H
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
}
|
||||
|
||||
#include "char_defs.h"
|
||||
#include "processor.h"
|
||||
#include "rife.h"
|
||||
|
||||
// InterpolatorRIFE class definition
|
||||
class InterpolatorRIFE : public Interpolator {
|
||||
private:
|
||||
RIFE *rife_;
|
||||
int gpuid_;
|
||||
bool tta_mode_;
|
||||
bool tta_temporal_mode_;
|
||||
bool uhd_mode_;
|
||||
int num_threads_;
|
||||
bool rife_v2_;
|
||||
bool rife_v4_;
|
||||
const StringType model_name_;
|
||||
AVRational in_time_base_;
|
||||
AVRational out_time_base_;
|
||||
AVPixelFormat out_pix_fmt_;
|
||||
|
||||
public:
|
||||
// Constructor
|
||||
InterpolatorRIFE(
|
||||
int gpuid = 0,
|
||||
bool tta_mode = false,
|
||||
bool tta_temporal_mode = false,
|
||||
bool uhd_mode = false,
|
||||
int num_threads = 1,
|
||||
bool rife_v2 = false,
|
||||
bool rife_v4 = true,
|
||||
const StringType model_name = STR("rife-v4.6")
|
||||
);
|
||||
|
||||
// Destructor
|
||||
virtual ~InterpolatorRIFE() override;
|
||||
|
||||
// Initializes the interpolator with decoder and encoder contexts
|
||||
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
|
||||
|
||||
// Processes an input frame and returns the processed frame
|
||||
int interpolate(AVFrame *prev_frame, AVFrame *in_frame, AVFrame **out_frame, float time_step)
|
||||
override;
|
||||
|
||||
// Returns the interpolator's type
|
||||
ProcessorType get_processor_type() const override { return PROCESSOR_RIFE; }
|
||||
|
||||
// Returns the interpolator's output dimensions
|
||||
void get_output_dimensions(
|
||||
const ProcessorConfig *processor_config,
|
||||
int in_width,
|
||||
int in_height,
|
||||
int &out_width,
|
||||
int &out_height
|
||||
) const override;
|
||||
};
|
||||
|
||||
#endif // INTERPOLATOR_RIFE_H
|
||||
@@ -1,49 +0,0 @@
|
||||
#ifndef LIBPLACEBO_FILTER_H
|
||||
#define LIBPLACEBO_FILTER_H
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavfilter/buffersink.h>
|
||||
#include <libavfilter/buffersrc.h>
|
||||
}
|
||||
|
||||
#include "filter.h"
|
||||
|
||||
// LibplaceboFilter class definition
|
||||
class LibplaceboFilter : public Filter {
|
||||
private:
|
||||
AVFilterGraph *filter_graph;
|
||||
AVFilterContext *buffersrc_ctx;
|
||||
AVFilterContext *buffersink_ctx;
|
||||
uint32_t vk_device_index;
|
||||
const std::filesystem::path shader_path;
|
||||
int out_width;
|
||||
int out_height;
|
||||
AVRational in_time_base;
|
||||
AVRational out_time_base;
|
||||
|
||||
public:
|
||||
// Constructor
|
||||
LibplaceboFilter(
|
||||
uint32_t vk_device_index,
|
||||
const std::filesystem::path &shader_path,
|
||||
int width,
|
||||
int height
|
||||
);
|
||||
|
||||
// Destructor
|
||||
virtual ~LibplaceboFilter() override;
|
||||
|
||||
// Initializes the filter with decoder and encoder contexts
|
||||
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
|
||||
|
||||
// Processes an input frame and returns the processed frame
|
||||
int process_frame(AVFrame *in_frame, AVFrame **out_frame) override;
|
||||
|
||||
// Flushes any remaining frames
|
||||
int flush(std::vector<AVFrame *> &flushed_frames) override;
|
||||
};
|
||||
|
||||
#endif // LIBPLACEBO_FILTER_H
|
||||
@@ -30,13 +30,17 @@ extern "C" {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Enum to specify filter type
|
||||
enum FilterType {
|
||||
FILTER_LIBPLACEBO,
|
||||
FILTER_REALESRGAN
|
||||
enum ProcessingMode {
|
||||
PROCESSING_MODE_FILTER,
|
||||
PROCESSING_MODE_INTERPOLATE,
|
||||
};
|
||||
|
||||
enum ProcessorType {
|
||||
PROCESSOR_LIBPLACEBO,
|
||||
PROCESSOR_REALESRGAN,
|
||||
PROCESSOR_RIFE,
|
||||
};
|
||||
|
||||
// Enum to specify log level
|
||||
enum Libvideo2xLogLevel {
|
||||
LIBVIDEO2X_LOG_LEVEL_TRACE,
|
||||
LIBVIDEO2X_LOG_LEVEL_DEBUG,
|
||||
@@ -47,26 +51,37 @@ enum Libvideo2xLogLevel {
|
||||
LIBVIDEO2X_LOG_LEVEL_OFF
|
||||
};
|
||||
|
||||
// Configuration for Libplacebo filter
|
||||
struct LibplaceboConfig {
|
||||
int out_width;
|
||||
int out_height;
|
||||
const CharType *shader_path;
|
||||
};
|
||||
|
||||
// Configuration for RealESRGAN filter
|
||||
struct RealESRGANConfig {
|
||||
bool tta_mode;
|
||||
int scaling_factor;
|
||||
const CharType *model_name;
|
||||
};
|
||||
|
||||
struct RIFEConfig {
|
||||
bool tta_mode;
|
||||
bool tta_temporal_mode;
|
||||
bool uhd_mode;
|
||||
int num_threads;
|
||||
bool rife_v2;
|
||||
bool rife_v4;
|
||||
const CharType *model_name;
|
||||
};
|
||||
|
||||
// Unified filter configuration
|
||||
struct FilterConfig {
|
||||
enum FilterType filter_type;
|
||||
struct ProcessorConfig {
|
||||
enum ProcessorType processor_type;
|
||||
int width;
|
||||
int height;
|
||||
int scaling_factor;
|
||||
int frm_rate_mul;
|
||||
float scn_det_thresh;
|
||||
union {
|
||||
struct LibplaceboConfig libplacebo;
|
||||
struct RealESRGANConfig realesrgan;
|
||||
struct RIFEConfig rife;
|
||||
} config;
|
||||
};
|
||||
|
||||
@@ -140,7 +155,7 @@ LIBVIDEO2X_API int process_video(
|
||||
bool benchmark,
|
||||
uint32_t vk_device_index,
|
||||
enum AVHWDeviceType hw_device_type,
|
||||
const struct FilterConfig *filter_config,
|
||||
const struct ProcessorConfig *filter_config,
|
||||
struct EncoderConfig *encoder_config,
|
||||
struct VideoProcessingContext *proc_ctx
|
||||
);
|
||||
|
||||
45
include/libvideo2x/processor.h
Normal file
45
include/libvideo2x/processor.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#ifndef PROCESSOR_H
|
||||
#define PROCESSOR_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavfilter/avfilter.h>
|
||||
#include <libavutil/buffer.h>
|
||||
}
|
||||
|
||||
#include "libvideo2x.h"
|
||||
|
||||
class Processor {
|
||||
public:
|
||||
virtual ~Processor() = default;
|
||||
virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) = 0;
|
||||
virtual int flush(std::vector<AVFrame *> &_) { return 0; }
|
||||
virtual ProcessingMode get_processing_mode() const = 0;
|
||||
virtual ProcessorType get_processor_type() const = 0;
|
||||
virtual void get_output_dimensions(
|
||||
const ProcessorConfig *processor_config,
|
||||
int in_width,
|
||||
int in_height,
|
||||
int &width,
|
||||
int &height
|
||||
) const = 0;
|
||||
};
|
||||
|
||||
// Abstract base class for filters
|
||||
class Filter : public Processor {
|
||||
public:
|
||||
ProcessingMode get_processing_mode() const override { return PROCESSING_MODE_FILTER; }
|
||||
virtual int filter(AVFrame *in_frame, AVFrame **out_frame) = 0;
|
||||
};
|
||||
|
||||
// Abstract base class for interpolators
|
||||
class Interpolator : public Processor {
|
||||
public:
|
||||
ProcessingMode get_processing_mode() const override { return PROCESSING_MODE_INTERPOLATE; }
|
||||
virtual int
|
||||
interpolate(AVFrame *prev_frame, AVFrame *in_frame, AVFrame **out_frame, float time_step) = 0;
|
||||
};
|
||||
|
||||
#endif // PROCESSOR_H
|
||||
36
include/libvideo2x/processor_factory.h
Normal file
36
include/libvideo2x/processor_factory.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef PROCESSOR_FACTORY_H
|
||||
#define PROCESSOR_FACTORY_H
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "processor.h"
|
||||
|
||||
// Processor Factory Class
|
||||
class ProcessorFactory {
|
||||
public:
|
||||
using Creator = std::function<std::unique_ptr<Processor>(const ProcessorConfig *, uint32_t)>;
|
||||
|
||||
// Singleton instance accessor
|
||||
static ProcessorFactory &instance();
|
||||
|
||||
// Register a processor type with its creation function
|
||||
void register_processor(ProcessorType type, Creator creator);
|
||||
|
||||
// Create a processor instance based on configuration
|
||||
std::unique_ptr<Processor>
|
||||
create_processor(const ProcessorConfig *processor_config, uint32_t vk_device_index) const;
|
||||
|
||||
private:
|
||||
// Private constructor for Singleton
|
||||
ProcessorFactory() = default;
|
||||
|
||||
// Map of processor types to their creation functions
|
||||
std::unordered_map<ProcessorType, Creator> creators;
|
||||
|
||||
// Static initializer for default processors
|
||||
static void init_default_processors(ProcessorFactory &factory);
|
||||
};
|
||||
|
||||
#endif // PROCESSOR_FACTORY_H
|
||||
@@ -1,43 +0,0 @@
|
||||
#ifndef REALSRGAN_FILTER_H
|
||||
#define REALSRGAN_FILTER_H
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
}
|
||||
|
||||
#include "char_defs.h"
|
||||
#include "filter.h"
|
||||
#include "realesrgan.h"
|
||||
|
||||
// RealesrganFilter class definition
|
||||
class RealesrganFilter : public Filter {
|
||||
private:
|
||||
RealESRGAN *realesrgan;
|
||||
int gpuid;
|
||||
bool tta_mode;
|
||||
int scaling_factor;
|
||||
const StringType model_name;
|
||||
AVRational in_time_base;
|
||||
AVRational out_time_base;
|
||||
AVPixelFormat out_pix_fmt;
|
||||
|
||||
public:
|
||||
// Constructor
|
||||
RealesrganFilter(
|
||||
int gpuid = 0,
|
||||
bool tta_mode = false,
|
||||
int scaling_factor = 4,
|
||||
const StringType model_name = STR("realesr-animevideov3")
|
||||
);
|
||||
|
||||
// Destructor
|
||||
virtual ~RealesrganFilter() override;
|
||||
|
||||
// Initializes the filter with decoder and encoder contexts
|
||||
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
|
||||
|
||||
// Processes an input frame and returns the processed frame
|
||||
int process_frame(AVFrame *in_frame, AVFrame **out_frame) override;
|
||||
};
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user