feat(rife): add support for frame interpolation and RIFE (#1244)

* feat: add RIFE files and processor/interpolator abstractions
* feat: add `rife` as processor option
* feat: add frame interpolation math except first frame
* feat: complete motion interpolation and add scene detection
* feat: improve Vulkan device validation
* fix: fix casting issues and variable names
* refactor: improve error-checking; add abstractions and factories
* refactor: improve readability of the frames processor
* docs: update changelog

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
K4YT3X
2024-12-01 09:55:56 +00:00
committed by GitHub
parent 2fc89e3883
commit 627f3d84a4
84 changed files with 4914 additions and 615 deletions

View File

@@ -7,9 +7,13 @@ extern "C" {
#define CALC_FFMPEG_VERSION(a, b, c) (a << 16 | b << 8 | c)
AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx);
int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx);
enum AVPixelFormat
get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt);
float get_frame_diff(AVFrame *frame1, AVFrame *frame2);
#endif // AVUTILS_H

View File

@@ -22,6 +22,7 @@ class Encoder {
AVFormatContext *ifmt_ctx,
AVCodecContext *dec_ctx,
EncoderConfig *encoder_config,
const ProcessorConfig *processor_config,
int in_vstream_idx
);

View File

@@ -1,21 +0,0 @@
#ifndef FILTER_H
#define FILTER_H
#include <vector>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavutil/buffer.h>
}
// Abstract base class for filters
class Filter {
public:
virtual ~Filter() = default;
virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) = 0;
virtual int process_frame(AVFrame *in_frame, AVFrame **out_frame) = 0;
virtual int flush(std::vector<AVFrame *> &_) { return 0; }
};
#endif // FILTER_H

View File

@@ -0,0 +1,61 @@
#ifndef FILTER_LIBPLACEBO_H
#define FILTER_LIBPLACEBO_H
#include <filesystem>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
}
#include "processor.h"
// FilterLibplacebo class definition
class FilterLibplacebo : public Filter {
private:
AVFilterGraph *filter_graph_;
AVFilterContext *buffersrc_ctx_;
AVFilterContext *buffersink_ctx_;
uint32_t vk_device_index_;
const std::filesystem::path shader_path_;
int width_;
int height_;
AVRational in_time_base_;
AVRational out_time_base_;
public:
// Constructor
FilterLibplacebo(
uint32_t vk_device_index,
const std::filesystem::path &shader_path,
int width,
int height
);
// Destructor
virtual ~FilterLibplacebo() override;
// Initializes the filter with decoder and encoder contexts
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
// Processes an input frame and returns the processed frame
int filter(AVFrame *in_frame, AVFrame **out_frame) override;
// Flushes any remaining frames
int flush(std::vector<AVFrame *> &flushed_frames) override;
// Returns the filter's type
ProcessorType get_processor_type() const override { return PROCESSOR_LIBPLACEBO; }
// Returns the filter's output dimensions
void get_output_dimensions(
const ProcessorConfig *processor_config,
int in_width,
int in_height,
int &out_width,
int &out_height
) const override;
};
#endif // FILTER_LIBPLACEBO_H

View File

@@ -0,0 +1,55 @@
#ifndef FILTER_REALESRGAN_H
#define FILTER_REALESRGAN_H
extern "C" {
#include <libavcodec/avcodec.h>
}
#include "char_defs.h"
#include "processor.h"
#include "realesrgan.h"
// FilterRealesrgan class definition
class FilterRealesrgan : public Filter {
private:
RealESRGAN *realesrgan_;
int gpuid_;
bool tta_mode_;
int scaling_factor_;
const StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
public:
// Constructor
FilterRealesrgan(
int gpuid = 0,
bool tta_mode = false,
int scaling_factor = 4,
const StringType model_name = STR("realesr-animevideov3")
);
// Destructor
virtual ~FilterRealesrgan() override;
// Initializes the filter with decoder and encoder contexts
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
// Processes an input frame and returns the processed frame
int filter(AVFrame *in_frame, AVFrame **out_frame) override;
// Returns the filter's type
ProcessorType get_processor_type() const override { return PROCESSOR_REALESRGAN; }
// Returns the filter's output dimensions
void get_output_dimensions(
const ProcessorConfig *processor_config,
int in_width,
int in_height,
int &out_width,
int &out_height
) const override;
};
#endif // FILTER_REALESRGAN_H

View File

@@ -0,0 +1,19 @@
#ifndef FRAMES_PROCESSOR_H
#define FRAMES_PROCESSOR_H
#include "decoder.h"
#include "encoder.h"
#include "libvideo2x.h"
#include "processor.h"
int process_frames(
const EncoderConfig *encoder_config,
const ProcessorConfig *processor_config,
VideoProcessingContext *proc_ctx,
Decoder &decoder,
Encoder &encoder,
Processor *processor,
bool benchmark = false
);
#endif // FRAMES_PROCESSOR_H

View File

@@ -0,0 +1,64 @@
#ifndef INTERPOLATOR_RIFE_H
#define INTERPOLATOR_RIFE_H
extern "C" {
#include <libavcodec/avcodec.h>
}
#include "char_defs.h"
#include "processor.h"
#include "rife.h"
// InterpolatorRIFE class definition
class InterpolatorRIFE : public Interpolator {
private:
RIFE *rife_;
int gpuid_;
bool tta_mode_;
bool tta_temporal_mode_;
bool uhd_mode_;
int num_threads_;
bool rife_v2_;
bool rife_v4_;
const StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
public:
// Constructor
InterpolatorRIFE(
int gpuid = 0,
bool tta_mode = false,
bool tta_temporal_mode = false,
bool uhd_mode = false,
int num_threads = 1,
bool rife_v2 = false,
bool rife_v4 = true,
const StringType model_name = STR("rife-v4.6")
);
// Destructor
virtual ~InterpolatorRIFE() override;
// Initializes the interpolator with decoder and encoder contexts
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
// Processes an input frame and returns the processed frame
int interpolate(AVFrame *prev_frame, AVFrame *in_frame, AVFrame **out_frame, float time_step)
override;
// Returns the interpolator's type
ProcessorType get_processor_type() const override { return PROCESSOR_RIFE; }
// Returns the interpolator's output dimensions
void get_output_dimensions(
const ProcessorConfig *processor_config,
int in_width,
int in_height,
int &out_width,
int &out_height
) const override;
};
#endif // INTERPOLATOR_RIFE_H

View File

@@ -1,49 +0,0 @@
#ifndef LIBPLACEBO_FILTER_H
#define LIBPLACEBO_FILTER_H
#include <filesystem>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
}
#include "filter.h"
// LibplaceboFilter class definition
class LibplaceboFilter : public Filter {
private:
AVFilterGraph *filter_graph;
AVFilterContext *buffersrc_ctx;
AVFilterContext *buffersink_ctx;
uint32_t vk_device_index;
const std::filesystem::path shader_path;
int out_width;
int out_height;
AVRational in_time_base;
AVRational out_time_base;
public:
// Constructor
LibplaceboFilter(
uint32_t vk_device_index,
const std::filesystem::path &shader_path,
int width,
int height
);
// Destructor
virtual ~LibplaceboFilter() override;
// Initializes the filter with decoder and encoder contexts
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
// Processes an input frame and returns the processed frame
int process_frame(AVFrame *in_frame, AVFrame **out_frame) override;
// Flushes any remaining frames
int flush(std::vector<AVFrame *> &flushed_frames) override;
};
#endif // LIBPLACEBO_FILTER_H

View File

@@ -30,13 +30,17 @@ extern "C" {
extern "C" {
#endif
// Enum to specify filter type
enum FilterType {
FILTER_LIBPLACEBO,
FILTER_REALESRGAN
enum ProcessingMode {
PROCESSING_MODE_FILTER,
PROCESSING_MODE_INTERPOLATE,
};
enum ProcessorType {
PROCESSOR_LIBPLACEBO,
PROCESSOR_REALESRGAN,
PROCESSOR_RIFE,
};
// Enum to specify log level
enum Libvideo2xLogLevel {
LIBVIDEO2X_LOG_LEVEL_TRACE,
LIBVIDEO2X_LOG_LEVEL_DEBUG,
@@ -47,26 +51,37 @@ enum Libvideo2xLogLevel {
LIBVIDEO2X_LOG_LEVEL_OFF
};
// Configuration for Libplacebo filter
struct LibplaceboConfig {
int out_width;
int out_height;
const CharType *shader_path;
};
// Configuration for RealESRGAN filter
struct RealESRGANConfig {
bool tta_mode;
int scaling_factor;
const CharType *model_name;
};
struct RIFEConfig {
bool tta_mode;
bool tta_temporal_mode;
bool uhd_mode;
int num_threads;
bool rife_v2;
bool rife_v4;
const CharType *model_name;
};
// Unified filter configuration
struct FilterConfig {
enum FilterType filter_type;
struct ProcessorConfig {
enum ProcessorType processor_type;
int width;
int height;
int scaling_factor;
int frm_rate_mul;
float scn_det_thresh;
union {
struct LibplaceboConfig libplacebo;
struct RealESRGANConfig realesrgan;
struct RIFEConfig rife;
} config;
};
@@ -140,7 +155,7 @@ LIBVIDEO2X_API int process_video(
bool benchmark,
uint32_t vk_device_index,
enum AVHWDeviceType hw_device_type,
const struct FilterConfig *filter_config,
const struct ProcessorConfig *filter_config,
struct EncoderConfig *encoder_config,
struct VideoProcessingContext *proc_ctx
);

View File

@@ -0,0 +1,45 @@
#ifndef PROCESSOR_H
#define PROCESSOR_H
#include <vector>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavutil/buffer.h>
}
#include "libvideo2x.h"
class Processor {
public:
virtual ~Processor() = default;
virtual int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) = 0;
virtual int flush(std::vector<AVFrame *> &_) { return 0; }
virtual ProcessingMode get_processing_mode() const = 0;
virtual ProcessorType get_processor_type() const = 0;
virtual void get_output_dimensions(
const ProcessorConfig *processor_config,
int in_width,
int in_height,
int &width,
int &height
) const = 0;
};
// Abstract base class for filters
class Filter : public Processor {
public:
ProcessingMode get_processing_mode() const override { return PROCESSING_MODE_FILTER; }
virtual int filter(AVFrame *in_frame, AVFrame **out_frame) = 0;
};
// Abstract base class for interpolators
class Interpolator : public Processor {
public:
ProcessingMode get_processing_mode() const override { return PROCESSING_MODE_INTERPOLATE; }
virtual int
interpolate(AVFrame *prev_frame, AVFrame *in_frame, AVFrame **out_frame, float time_step) = 0;
};
#endif // PROCESSOR_H

View File

@@ -0,0 +1,36 @@
#ifndef PROCESSOR_FACTORY_H
#define PROCESSOR_FACTORY_H
#include <functional>
#include <memory>
#include <unordered_map>
#include "processor.h"
// Processor Factory Class
class ProcessorFactory {
public:
using Creator = std::function<std::unique_ptr<Processor>(const ProcessorConfig *, uint32_t)>;
// Singleton instance accessor
static ProcessorFactory &instance();
// Register a processor type with its creation function
void register_processor(ProcessorType type, Creator creator);
// Create a processor instance based on configuration
std::unique_ptr<Processor>
create_processor(const ProcessorConfig *processor_config, uint32_t vk_device_index) const;
private:
// Private constructor for Singleton
ProcessorFactory() = default;
// Map of processor types to their creation functions
std::unordered_map<ProcessorType, Creator> creators;
// Static initializer for default processors
static void init_default_processors(ProcessorFactory &factory);
};
#endif // PROCESSOR_FACTORY_H

View File

@@ -1,43 +0,0 @@
#ifndef REALSRGAN_FILTER_H
#define REALSRGAN_FILTER_H
extern "C" {
#include <libavcodec/avcodec.h>
}
#include "char_defs.h"
#include "filter.h"
#include "realesrgan.h"
// RealesrganFilter class definition
class RealesrganFilter : public Filter {
private:
RealESRGAN *realesrgan;
int gpuid;
bool tta_mode;
int scaling_factor;
const StringType model_name;
AVRational in_time_base;
AVRational out_time_base;
AVPixelFormat out_pix_fmt;
public:
// Constructor
RealesrganFilter(
int gpuid = 0,
bool tta_mode = false,
int scaling_factor = 4,
const StringType model_name = STR("realesr-animevideov3")
);
// Destructor
virtual ~RealesrganFilter() override;
// Initializes the filter with decoder and encoder contexts
int init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *hw_ctx) override;
// Processes an input frame and returns the processed frame
int process_frame(AVFrame *in_frame, AVFrame **out_frame) override;
};
#endif