mirror of
https://github.com/k4yt3x/video2x.git
synced 2026-02-13 08:34:44 +08:00
feat(rife): add support for frame interpolation and RIFE (#1244)
* feat: add RIFE files and processor/interpolator abstractions * feat: add `rife` as processor option * feat: add frame interpolation math except first frame * feat: complete motion interpolation and add scene detection * feat: improve Vulkan device validation * fix: fix casting issues and variable names * refactor: improve error-checking; add abstractions and factories * refactor: improve readability of the frames processor * docs: update changelog Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
#include "avutils.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavutil/pixdesc.h>
|
||||
@@ -7,6 +9,25 @@ extern "C" {
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "conversions.h"
|
||||
|
||||
AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx) {
|
||||
AVRational frame_rate = ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate;
|
||||
if (frame_rate.num == 0 && frame_rate.den == 0) {
|
||||
frame_rate = ifmt_ctx->streams[in_vstream_idx]->r_frame_rate;
|
||||
}
|
||||
if (frame_rate.num == 0 && frame_rate.den == 0) {
|
||||
frame_rate = av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[in_vstream_idx], nullptr);
|
||||
}
|
||||
if (frame_rate.num == 0 && frame_rate.den == 0) {
|
||||
frame_rate = ifmt_ctx->streams[in_vstream_idx]->time_base;
|
||||
}
|
||||
if (frame_rate.num == 0 && frame_rate.den == 0) {
|
||||
spdlog::warn("Unable to determine the video's frame rate");
|
||||
}
|
||||
return frame_rate;
|
||||
}
|
||||
|
||||
int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx) {
|
||||
// Use the 'nb_frames' field if it is available
|
||||
int64_t nb_frames = ifmt_ctx->streams[in_vstream_idx]->nb_frames;
|
||||
@@ -31,19 +52,7 @@ int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx) {
|
||||
spdlog::debug("Video duration: {}s", duration_secs);
|
||||
|
||||
// Calculate average FPS
|
||||
double fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate);
|
||||
if (fps <= 0) {
|
||||
spdlog::debug("Unable to read the average frame rate from 'avg_frame_rate'");
|
||||
fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->r_frame_rate);
|
||||
}
|
||||
if (fps <= 0) {
|
||||
spdlog::debug("Unable to read the average frame rate from 'r_frame_rate'");
|
||||
fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[in_vstream_idx], nullptr));
|
||||
}
|
||||
if (fps <= 0) {
|
||||
spdlog::debug("Unable to estimate the average frame rate with 'av_guess_frame_rate'");
|
||||
fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->time_base);
|
||||
}
|
||||
double fps = av_q2d(get_video_frame_rate(ifmt_ctx, in_vstream_idx));
|
||||
if (fps <= 0) {
|
||||
spdlog::warn("Unable to estimate the video's average frame rate");
|
||||
return -1;
|
||||
@@ -122,3 +131,58 @@ get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt
|
||||
|
||||
return best_pix_fmt;
|
||||
}
|
||||
|
||||
float get_frame_diff(AVFrame *frame1, AVFrame *frame2) {
|
||||
if (!frame1 || !frame2) {
|
||||
spdlog::error("Invalid frame(s) provided for comparison");
|
||||
return -1.0f;
|
||||
}
|
||||
|
||||
if (frame1->width != frame2->width || frame1->height != frame2->height) {
|
||||
spdlog::error("Frame dimensions do not match");
|
||||
return -1.0f;
|
||||
}
|
||||
|
||||
int width = frame1->width;
|
||||
int height = frame1->height;
|
||||
|
||||
// Convert both frames to the target pixel format using the provided function
|
||||
AVPixelFormat target_pix_fmt = AV_PIX_FMT_RGB24;
|
||||
AVFrame *rgb_frame1 = convert_avframe_pix_fmt(frame1, target_pix_fmt);
|
||||
AVFrame *rgb_frame2 = convert_avframe_pix_fmt(frame2, target_pix_fmt);
|
||||
|
||||
if (!rgb_frame1 || !rgb_frame2) {
|
||||
spdlog::error("Failed to convert frames to target pixel format");
|
||||
if (rgb_frame1) {
|
||||
av_frame_free(&rgb_frame1);
|
||||
}
|
||||
if (rgb_frame2) {
|
||||
av_frame_free(&rgb_frame2);
|
||||
}
|
||||
return -1.0f;
|
||||
}
|
||||
|
||||
uint64_t sum_diff = 0;
|
||||
uint64_t max_diff = 0;
|
||||
|
||||
// Calculate difference pixel by pixel
|
||||
for (int y = 0; y < height; y++) {
|
||||
uint8_t *ptr1 = rgb_frame1->data[0] + y * rgb_frame1->linesize[0];
|
||||
uint8_t *ptr2 = rgb_frame2->data[0] + y * rgb_frame2->linesize[0];
|
||||
for (int x = 0; x < width * 3; x++) {
|
||||
sum_diff += static_cast<uint64_t>(
|
||||
std::abs(static_cast<int>(ptr1[x]) - static_cast<int>(ptr2[x]))
|
||||
);
|
||||
max_diff += 255;
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up
|
||||
av_frame_free(&rgb_frame1);
|
||||
av_frame_free(&rgb_frame2);
|
||||
|
||||
// Calculate percentage difference
|
||||
float percent_diff = (static_cast<float>(sum_diff) / static_cast<float>(max_diff)) * 100.0f;
|
||||
|
||||
return percent_diff;
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ int Encoder::init(
|
||||
AVFormatContext *ifmt_ctx,
|
||||
AVCodecContext *dec_ctx,
|
||||
EncoderConfig *encoder_config,
|
||||
const ProcessorConfig *processor_config,
|
||||
int in_vstream_idx
|
||||
) {
|
||||
int ret;
|
||||
@@ -121,18 +122,26 @@ int Encoder::init(
|
||||
spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt));
|
||||
}
|
||||
|
||||
// Set the output video's time base
|
||||
if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
|
||||
enc_ctx_->time_base = dec_ctx->time_base;
|
||||
if (processor_config->frm_rate_mul > 0) {
|
||||
AVRational in_frame_rate = get_video_frame_rate(ifmt_ctx, in_vstream_idx);
|
||||
enc_ctx_->framerate = {
|
||||
in_frame_rate.num * processor_config->frm_rate_mul, in_frame_rate.den
|
||||
};
|
||||
enc_ctx_->time_base = av_inv_q(enc_ctx_->framerate);
|
||||
} else {
|
||||
enc_ctx_->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr));
|
||||
}
|
||||
// Set the output video's time base
|
||||
if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
|
||||
enc_ctx_->time_base = dec_ctx->time_base;
|
||||
} else {
|
||||
enc_ctx_->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr));
|
||||
}
|
||||
|
||||
// Set the output video's frame rate
|
||||
if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
|
||||
enc_ctx_->framerate = dec_ctx->framerate;
|
||||
} else {
|
||||
enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr);
|
||||
// Set the output video's frame rate
|
||||
if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
|
||||
enc_ctx_->framerate = dec_ctx->framerate;
|
||||
} else {
|
||||
enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
// Set extra AVOptions
|
||||
@@ -230,6 +239,13 @@ int Encoder::init(
|
||||
}
|
||||
}
|
||||
|
||||
// Write the output file header
|
||||
ret = avformat_write_header(ofmt_ctx_, nullptr);
|
||||
if (ret < 0) {
|
||||
spdlog::error("Error writing output file header");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "libplacebo_filter.h"
|
||||
#include "filter_libplacebo.h"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
@@ -8,81 +8,81 @@
|
||||
#include "fsutils.h"
|
||||
#include "libplacebo.h"
|
||||
|
||||
LibplaceboFilter::LibplaceboFilter(
|
||||
FilterLibplacebo::FilterLibplacebo(
|
||||
uint32_t vk_device_index,
|
||||
const std::filesystem::path &shader_path,
|
||||
int out_width,
|
||||
int out_height
|
||||
int width,
|
||||
int height
|
||||
)
|
||||
: filter_graph(nullptr),
|
||||
buffersrc_ctx(nullptr),
|
||||
buffersink_ctx(nullptr),
|
||||
vk_device_index(vk_device_index),
|
||||
shader_path(std::move(shader_path)),
|
||||
out_width(out_width),
|
||||
out_height(out_height) {}
|
||||
: filter_graph_(nullptr),
|
||||
buffersrc_ctx_(nullptr),
|
||||
buffersink_ctx_(nullptr),
|
||||
vk_device_index_(vk_device_index),
|
||||
shader_path_(std::move(shader_path)),
|
||||
width_(width),
|
||||
height_(height) {}
|
||||
|
||||
LibplaceboFilter::~LibplaceboFilter() {
|
||||
if (buffersrc_ctx) {
|
||||
avfilter_free(buffersrc_ctx);
|
||||
buffersrc_ctx = nullptr;
|
||||
FilterLibplacebo::~FilterLibplacebo() {
|
||||
if (buffersrc_ctx_) {
|
||||
avfilter_free(buffersrc_ctx_);
|
||||
buffersrc_ctx_ = nullptr;
|
||||
}
|
||||
if (buffersink_ctx) {
|
||||
avfilter_free(buffersink_ctx);
|
||||
buffersink_ctx = nullptr;
|
||||
if (buffersink_ctx_) {
|
||||
avfilter_free(buffersink_ctx_);
|
||||
buffersink_ctx_ = nullptr;
|
||||
}
|
||||
if (filter_graph) {
|
||||
avfilter_graph_free(&filter_graph);
|
||||
filter_graph = nullptr;
|
||||
if (filter_graph_) {
|
||||
avfilter_graph_free(&filter_graph_);
|
||||
filter_graph_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *_) {
|
||||
int FilterLibplacebo::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *) {
|
||||
// Construct the shader path
|
||||
std::filesystem::path shader_full_path;
|
||||
if (filepath_is_readable(shader_path)) {
|
||||
if (filepath_is_readable(shader_path_)) {
|
||||
// If the shader path is directly readable, use it
|
||||
shader_full_path = shader_path;
|
||||
shader_full_path = shader_path_;
|
||||
} else {
|
||||
// Construct the fallback path using std::filesystem
|
||||
shader_full_path = find_resource_file(
|
||||
std::filesystem::path(STR("models")) / STR("libplacebo") /
|
||||
(path_to_string_type(shader_path) + STR(".glsl"))
|
||||
(path_to_string_type(shader_path_) + STR(".glsl"))
|
||||
);
|
||||
}
|
||||
|
||||
// Check if the shader file exists
|
||||
if (!std::filesystem::exists(shader_full_path)) {
|
||||
spdlog::error("libplacebo shader file not found: '{}'", shader_path.u8string());
|
||||
spdlog::error("libplacebo shader file not found: '{}'", shader_path_.u8string());
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Save the output time base
|
||||
in_time_base = dec_ctx->time_base;
|
||||
out_time_base = enc_ctx->time_base;
|
||||
in_time_base_ = dec_ctx->time_base;
|
||||
out_time_base_ = enc_ctx->time_base;
|
||||
|
||||
// Initialize the libplacebo filter
|
||||
int ret = init_libplacebo(
|
||||
&filter_graph,
|
||||
&buffersrc_ctx,
|
||||
&buffersink_ctx,
|
||||
&filter_graph_,
|
||||
&buffersrc_ctx_,
|
||||
&buffersink_ctx_,
|
||||
dec_ctx,
|
||||
out_width,
|
||||
out_height,
|
||||
vk_device_index,
|
||||
width_,
|
||||
height_,
|
||||
vk_device_index_,
|
||||
shader_full_path
|
||||
);
|
||||
|
||||
// Set these resources to nullptr since they are already freed by `avfilter_graph_free`
|
||||
if (ret < 0) {
|
||||
buffersrc_ctx = nullptr;
|
||||
buffersink_ctx = nullptr;
|
||||
filter_graph = nullptr;
|
||||
buffersrc_ctx_ = nullptr;
|
||||
buffersink_ctx_ = nullptr;
|
||||
filter_graph_ = nullptr;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int LibplaceboFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
|
||||
int FilterLibplacebo::filter(AVFrame *in_frame, AVFrame **out_frame) {
|
||||
int ret;
|
||||
|
||||
// Get the filtered frame
|
||||
@@ -93,28 +93,28 @@ int LibplaceboFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
|
||||
}
|
||||
|
||||
// Feed the frame to the filter graph
|
||||
ret = av_buffersrc_add_frame(buffersrc_ctx, in_frame);
|
||||
ret = av_buffersrc_add_frame(buffersrc_ctx_, in_frame);
|
||||
if (ret < 0) {
|
||||
spdlog::error("Error while feeding the filter graph");
|
||||
av_frame_free(out_frame);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = av_buffersink_get_frame(buffersink_ctx, *out_frame);
|
||||
ret = av_buffersink_get_frame(buffersink_ctx_, *out_frame);
|
||||
if (ret < 0) {
|
||||
av_frame_free(out_frame);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Rescale PTS to encoder's time base
|
||||
(*out_frame)->pts = av_rescale_q((*out_frame)->pts, in_time_base, out_time_base);
|
||||
(*out_frame)->pts = av_rescale_q((*out_frame)->pts, in_time_base_, out_time_base_);
|
||||
|
||||
// Return the processed frame to the caller
|
||||
return 0;
|
||||
}
|
||||
|
||||
int LibplaceboFilter::flush(std::vector<AVFrame *> &flushed_frames) {
|
||||
int ret = av_buffersrc_add_frame(buffersrc_ctx, nullptr);
|
||||
int FilterLibplacebo::flush(std::vector<AVFrame *> &flushed_frames) {
|
||||
int ret = av_buffersrc_add_frame(buffersrc_ctx_, nullptr);
|
||||
if (ret < 0) {
|
||||
spdlog::error("Error while flushing filter graph");
|
||||
return ret;
|
||||
@@ -127,7 +127,7 @@ int LibplaceboFilter::flush(std::vector<AVFrame *> &flushed_frames) {
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
ret = av_buffersink_get_frame(buffersink_ctx, filt_frame);
|
||||
ret = av_buffersink_get_frame(buffersink_ctx_, filt_frame);
|
||||
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
|
||||
av_frame_free(&filt_frame);
|
||||
break;
|
||||
@@ -138,7 +138,7 @@ int LibplaceboFilter::flush(std::vector<AVFrame *> &flushed_frames) {
|
||||
}
|
||||
|
||||
// Rescale PTS to encoder's time base
|
||||
filt_frame->pts = av_rescale_q(filt_frame->pts, in_time_base, out_time_base);
|
||||
filt_frame->pts = av_rescale_q(filt_frame->pts, in_time_base_, out_time_base_);
|
||||
|
||||
// Add to processed frames
|
||||
flushed_frames.push_back(filt_frame);
|
||||
@@ -146,3 +146,14 @@ int LibplaceboFilter::flush(std::vector<AVFrame *> &flushed_frames) {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void FilterLibplacebo::get_output_dimensions(
|
||||
const ProcessorConfig *processor_config,
|
||||
int,
|
||||
int,
|
||||
int &out_width,
|
||||
int &out_height
|
||||
) const {
|
||||
out_width = processor_config->width;
|
||||
out_height = processor_config->height;
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "realesrgan_filter.h"
|
||||
#include "filter_realesrgan.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
@@ -9,34 +9,34 @@
|
||||
#include "conversions.h"
|
||||
#include "fsutils.h"
|
||||
|
||||
RealesrganFilter::RealesrganFilter(
|
||||
FilterRealesrgan::FilterRealesrgan(
|
||||
int gpuid,
|
||||
bool tta_mode,
|
||||
int scaling_factor,
|
||||
const StringType model_name
|
||||
)
|
||||
: realesrgan(nullptr),
|
||||
gpuid(gpuid),
|
||||
tta_mode(tta_mode),
|
||||
scaling_factor(scaling_factor),
|
||||
model_name(std::move(model_name)) {}
|
||||
: realesrgan_(nullptr),
|
||||
gpuid_(gpuid),
|
||||
tta_mode_(tta_mode),
|
||||
scaling_factor_(scaling_factor),
|
||||
model_name_(std::move(model_name)) {}
|
||||
|
||||
RealesrganFilter::~RealesrganFilter() {
|
||||
if (realesrgan) {
|
||||
delete realesrgan;
|
||||
realesrgan = nullptr;
|
||||
FilterRealesrgan::~FilterRealesrgan() {
|
||||
if (realesrgan_) {
|
||||
delete realesrgan_;
|
||||
realesrgan_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *_) {
|
||||
int FilterRealesrgan::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *_) {
|
||||
// Construct the model paths using std::filesystem
|
||||
std::filesystem::path model_param_path;
|
||||
std::filesystem::path model_bin_path;
|
||||
|
||||
StringType param_file_name =
|
||||
model_name + STR("-x") + to_string_type(scaling_factor) + STR(".param");
|
||||
model_name_ + STR("-x") + to_string_type(scaling_factor_) + STR(".param");
|
||||
StringType bin_file_name =
|
||||
model_name + STR("-x") + to_string_type(scaling_factor) + STR(".bin");
|
||||
model_name_ + STR("-x") + to_string_type(scaling_factor_) + STR(".bin");
|
||||
|
||||
// Find the model paths by model name if provided
|
||||
model_param_path = std::filesystem::path(STR("models")) / STR("realesrgan") / param_file_name;
|
||||
@@ -57,39 +57,39 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
|
||||
}
|
||||
|
||||
// Create a new RealESRGAN instance
|
||||
realesrgan = new RealESRGAN(gpuid, tta_mode);
|
||||
realesrgan_ = new RealESRGAN(gpuid_, tta_mode_);
|
||||
|
||||
// Store the time bases
|
||||
in_time_base = dec_ctx->time_base;
|
||||
out_time_base = enc_ctx->time_base;
|
||||
out_pix_fmt = enc_ctx->pix_fmt;
|
||||
in_time_base_ = dec_ctx->time_base;
|
||||
out_time_base_ = enc_ctx->time_base;
|
||||
out_pix_fmt_ = enc_ctx->pix_fmt;
|
||||
|
||||
// Load the model
|
||||
if (realesrgan->load(model_param_full_path, model_bin_full_path) != 0) {
|
||||
if (realesrgan_->load(model_param_full_path, model_bin_full_path) != 0) {
|
||||
spdlog::error("Failed to load RealESRGAN model");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set RealESRGAN parameters
|
||||
realesrgan->scale = scaling_factor;
|
||||
realesrgan->prepadding = 10;
|
||||
realesrgan_->scale = scaling_factor_;
|
||||
realesrgan_->prepadding = 10;
|
||||
|
||||
// Calculate tilesize based on GPU heap budget
|
||||
uint32_t heap_budget = ncnn::get_gpu_device(gpuid)->get_heap_budget();
|
||||
uint32_t heap_budget = ncnn::get_gpu_device(gpuid_)->get_heap_budget();
|
||||
if (heap_budget > 1900) {
|
||||
realesrgan->tilesize = 200;
|
||||
realesrgan_->tilesize = 200;
|
||||
} else if (heap_budget > 550) {
|
||||
realesrgan->tilesize = 100;
|
||||
realesrgan_->tilesize = 100;
|
||||
} else if (heap_budget > 190) {
|
||||
realesrgan->tilesize = 64;
|
||||
realesrgan_->tilesize = 64;
|
||||
} else {
|
||||
realesrgan->tilesize = 32;
|
||||
realesrgan_->tilesize = 32;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int RealesrganFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
|
||||
int FilterRealesrgan::filter(AVFrame *in_frame, AVFrame **out_frame) {
|
||||
int ret;
|
||||
|
||||
// Convert the input frame to RGB24
|
||||
@@ -99,23 +99,34 @@ int RealesrganFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Allocate space for ouptut ncnn::Mat
|
||||
int output_width = in_mat.w * realesrgan->scale;
|
||||
int output_height = in_mat.h * realesrgan->scale;
|
||||
// Allocate space for output ncnn::Mat
|
||||
int output_width = in_mat.w * realesrgan_->scale;
|
||||
int output_height = in_mat.h * realesrgan_->scale;
|
||||
ncnn::Mat out_mat = ncnn::Mat(output_width, output_height, static_cast<size_t>(3), 3);
|
||||
|
||||
ret = realesrgan->process(in_mat, out_mat);
|
||||
ret = realesrgan_->process(in_mat, out_mat);
|
||||
if (ret != 0) {
|
||||
spdlog::error("RealESRGAN processing failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Convert ncnn::Mat to AVFrame
|
||||
*out_frame = ncnn_mat_to_avframe(out_mat, out_pix_fmt);
|
||||
*out_frame = ncnn_mat_to_avframe(out_mat, out_pix_fmt_);
|
||||
|
||||
// Rescale PTS to encoder's time base
|
||||
(*out_frame)->pts = av_rescale_q(in_frame->pts, in_time_base, out_time_base);
|
||||
(*out_frame)->pts = av_rescale_q(in_frame->pts, in_time_base_, out_time_base_);
|
||||
|
||||
// Return the processed frame to the caller
|
||||
return ret;
|
||||
}
|
||||
|
||||
void FilterRealesrgan::get_output_dimensions(
|
||||
const ProcessorConfig *,
|
||||
int in_width,
|
||||
int in_height,
|
||||
int &out_width,
|
||||
int &out_height
|
||||
) const {
|
||||
out_width = in_width * scaling_factor_;
|
||||
out_height = in_height * scaling_factor_;
|
||||
}
|
||||
371
src/frames_processor.cpp
Normal file
371
src/frames_processor.cpp
Normal file
@@ -0,0 +1,371 @@
|
||||
#include "frames_processor.h"
|
||||
|
||||
extern "C" {
|
||||
#include <libavutil/avutil.h>
|
||||
}
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "avutils.h"
|
||||
|
||||
// Deleter for AVFrame unique_ptr
|
||||
auto av_frame_deleter = [](AVFrame *frame) {
|
||||
if (frame != nullptr) {
|
||||
av_frame_free(&frame);
|
||||
frame = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
// Deleter for AVPacket unique_ptr
|
||||
auto av_packet_deleter = [](AVPacket *packet) {
|
||||
if (packet != nullptr) {
|
||||
av_packet_unref(packet);
|
||||
av_packet_free(&packet);
|
||||
packet = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
// Sets the total number of frames to process in the VideoProcessingContext
|
||||
void set_total_frames(
|
||||
const ProcessorConfig *processor_config,
|
||||
VideoProcessingContext *proc_ctx,
|
||||
AVFormatContext *ifmt_ctx,
|
||||
int in_vstream_idx,
|
||||
Processor *processor
|
||||
) {
|
||||
spdlog::debug("Estimating the total number of frames to process");
|
||||
proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx);
|
||||
|
||||
if (proc_ctx->total_frames <= 0) {
|
||||
spdlog::warn("Unable to determine the total number of frames");
|
||||
proc_ctx->total_frames = 0;
|
||||
} else {
|
||||
spdlog::debug("{} frames to process", proc_ctx->total_frames);
|
||||
}
|
||||
|
||||
// Set total frames for interpolation
|
||||
if (processor->get_processing_mode() == PROCESSING_MODE_INTERPOLATE) {
|
||||
proc_ctx->total_frames *= processor_config->frm_rate_mul;
|
||||
}
|
||||
}
|
||||
|
||||
int write_frame(
|
||||
AVFrame *frame,
|
||||
VideoProcessingContext *proc_ctx,
|
||||
Encoder &encoder,
|
||||
bool benchmark
|
||||
) {
|
||||
char errbuf[AV_ERROR_MAX_STRING_SIZE];
|
||||
int ret = 0;
|
||||
|
||||
if (!benchmark) {
|
||||
// Set the frame type to none to let the encoder decide
|
||||
frame->pict_type = AV_PICTURE_TYPE_NONE;
|
||||
ret = encoder.write_frame(frame, proc_ctx->processed_frames);
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error encoding/writing frame: {}", errbuf);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int write_raw_packet(
|
||||
AVPacket *packet,
|
||||
AVFormatContext *ifmt_ctx,
|
||||
AVFormatContext *ofmt_ctx,
|
||||
int *stream_map
|
||||
) {
|
||||
char errbuf[AV_ERROR_MAX_STRING_SIZE];
|
||||
int ret = 0;
|
||||
|
||||
AVStream *in_stream = ifmt_ctx->streams[packet->stream_index];
|
||||
int out_stream_index = stream_map[packet->stream_index];
|
||||
AVStream *out_stream = ofmt_ctx->streams[out_stream_index];
|
||||
|
||||
av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base);
|
||||
packet->stream_index = out_stream_index;
|
||||
|
||||
ret = av_interleaved_write_frame(ofmt_ctx, packet);
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int process_filtering(
|
||||
Processor *processor,
|
||||
VideoProcessingContext *proc_ctx,
|
||||
Encoder &encoder,
|
||||
bool benchmark,
|
||||
AVFrame *frame,
|
||||
AVFrame *raw_processed_frame
|
||||
) {
|
||||
char errbuf[AV_ERROR_MAX_STRING_SIZE];
|
||||
int ret = 0;
|
||||
|
||||
// Cast the processor to a Filter
|
||||
Filter *filter = static_cast<Filter *>(processor);
|
||||
|
||||
// Process the frame using the filter
|
||||
ret = filter->filter(frame, &raw_processed_frame);
|
||||
|
||||
// Write the processed frame
|
||||
if (ret < 0 && ret != AVERROR(EAGAIN)) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error filtering frame: {}", errbuf);
|
||||
} else if (ret == 0 && raw_processed_frame != nullptr) {
|
||||
auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
|
||||
raw_processed_frame, av_frame_deleter
|
||||
);
|
||||
ret = write_frame(processed_frame.get(), proc_ctx, encoder, benchmark);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int process_interpolation(
|
||||
Processor *processor,
|
||||
const ProcessorConfig *processor_config,
|
||||
VideoProcessingContext *proc_ctx,
|
||||
Encoder &encoder,
|
||||
bool benchmark,
|
||||
std::unique_ptr<AVFrame, decltype(av_frame_deleter)> &prev_frame,
|
||||
AVFrame *frame,
|
||||
AVFrame *raw_processed_frame
|
||||
) {
|
||||
char errbuf[AV_ERROR_MAX_STRING_SIZE];
|
||||
int ret = 0;
|
||||
|
||||
// Cast the processor to an Interpolator
|
||||
Interpolator *interpolator = static_cast<Interpolator *>(processor);
|
||||
|
||||
// Calculate the time step for each frame
|
||||
float time_step = 1.0f / static_cast<float>(processor_config->frm_rate_mul);
|
||||
float current_time_step = time_step;
|
||||
|
||||
// Check if a scene change is detected
|
||||
bool skip_frame = false;
|
||||
if (prev_frame != nullptr) {
|
||||
float frame_diff = get_frame_diff(prev_frame.get(), frame);
|
||||
if (frame_diff > processor_config->scn_det_thresh) {
|
||||
spdlog::debug(
|
||||
"Scene change detected ({:.2f}%), skipping frame {}",
|
||||
frame_diff,
|
||||
proc_ctx->processed_frames
|
||||
);
|
||||
skip_frame = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Write the interpolated frames
|
||||
for (int i = 0; i < processor_config->frm_rate_mul - 1; i++) {
|
||||
// Skip interpolation if this is the first frame
|
||||
if (prev_frame == nullptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Get the interpolated frame from the interpolator
|
||||
if (!skip_frame) {
|
||||
ret = interpolator->interpolate(
|
||||
prev_frame.get(), frame, &raw_processed_frame, current_time_step
|
||||
);
|
||||
} else {
|
||||
ret = 0;
|
||||
raw_processed_frame = av_frame_clone(prev_frame.get());
|
||||
}
|
||||
|
||||
// Write the interpolated frame
|
||||
if (ret < 0 && ret != AVERROR(EAGAIN)) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error interpolating frame: {}", errbuf);
|
||||
return ret;
|
||||
} else if (ret == 0 && raw_processed_frame != nullptr) {
|
||||
auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
|
||||
raw_processed_frame, av_frame_deleter
|
||||
);
|
||||
|
||||
processed_frame->pts = proc_ctx->processed_frames;
|
||||
ret = write_frame(processed_frame.get(), proc_ctx, encoder, benchmark);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
proc_ctx->processed_frames++;
|
||||
current_time_step += time_step;
|
||||
}
|
||||
|
||||
// Write the original frame
|
||||
frame->pts = proc_ctx->processed_frames;
|
||||
ret = write_frame(frame, proc_ctx, encoder, benchmark);
|
||||
|
||||
// Update the previous frame with the current frame
|
||||
prev_frame.reset(av_frame_clone(frame));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Process frames using the selected filter.
|
||||
int process_frames(
|
||||
const EncoderConfig *encoder_config,
|
||||
const ProcessorConfig *processor_config,
|
||||
VideoProcessingContext *proc_ctx,
|
||||
Decoder &decoder,
|
||||
Encoder &encoder,
|
||||
Processor *processor,
|
||||
bool benchmark
|
||||
) {
|
||||
char errbuf[AV_ERROR_MAX_STRING_SIZE];
|
||||
int ret = 0;
|
||||
|
||||
// Get required objects
|
||||
AVFormatContext *ifmt_ctx = decoder.get_format_context();
|
||||
AVCodecContext *dec_ctx = decoder.get_codec_context();
|
||||
int in_vstream_idx = decoder.get_video_stream_index();
|
||||
AVFormatContext *ofmt_ctx = encoder.get_format_context();
|
||||
int *stream_map = encoder.get_stream_map();
|
||||
|
||||
// Reference to the previous frame does not require allocation
|
||||
// It will be cloned from the current frame
|
||||
std::unique_ptr<AVFrame, decltype(av_frame_deleter)> prev_frame(nullptr, av_frame_deleter);
|
||||
|
||||
// Allocate space for the decoded frames
|
||||
std::unique_ptr<AVFrame, decltype(av_frame_deleter)> frame(av_frame_alloc(), av_frame_deleter);
|
||||
if (frame == nullptr) {
|
||||
spdlog::critical("Error allocating frame");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
// Allocate space for the decoded packets
|
||||
std::unique_ptr<AVPacket, decltype(av_packet_deleter)> packet(
|
||||
av_packet_alloc(), av_packet_deleter
|
||||
);
|
||||
if (packet == nullptr) {
|
||||
spdlog::critical("Error allocating packet");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
// Set the total number of frames in the VideoProcessingContext
|
||||
set_total_frames(processor_config, proc_ctx, ifmt_ctx, in_vstream_idx, processor);
|
||||
|
||||
// Read frames from the input file
|
||||
while (!proc_ctx->abort) {
|
||||
ret = av_read_frame(ifmt_ctx, packet.get());
|
||||
if (ret < 0) {
|
||||
if (ret == AVERROR_EOF) {
|
||||
spdlog::debug("Reached end of file");
|
||||
break;
|
||||
}
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error reading packet: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (packet->stream_index == in_vstream_idx) {
|
||||
// Send the packet to the decoder for decoding
|
||||
ret = avcodec_send_packet(dec_ctx, packet.get());
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error sending packet to decoder: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Process frames decoded from the packet
|
||||
while (!proc_ctx->abort) {
|
||||
// Sleep for 100 ms if processing is paused
|
||||
if (proc_ctx->pause) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Receive the decoded frame from the decoder
|
||||
ret = avcodec_receive_frame(dec_ctx, frame.get());
|
||||
if (ret == AVERROR(EAGAIN)) {
|
||||
// No more frames from this packet
|
||||
break;
|
||||
} else if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error decoding video frame: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
AVFrame *raw_processed_frame = nullptr;
|
||||
|
||||
// Process the frame based on the selected processing mode
|
||||
switch (processor->get_processing_mode()) {
|
||||
case PROCESSING_MODE_FILTER: {
|
||||
ret = process_filtering(
|
||||
processor,
|
||||
proc_ctx,
|
||||
encoder,
|
||||
benchmark,
|
||||
frame.get(),
|
||||
raw_processed_frame
|
||||
);
|
||||
break;
|
||||
}
|
||||
case PROCESSING_MODE_INTERPOLATE: {
|
||||
ret = process_interpolation(
|
||||
processor,
|
||||
processor_config,
|
||||
proc_ctx,
|
||||
encoder,
|
||||
benchmark,
|
||||
prev_frame,
|
||||
frame.get(),
|
||||
raw_processed_frame
|
||||
);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
spdlog::critical("Unknown processing mode");
|
||||
return -1;
|
||||
}
|
||||
if (ret < 0 && ret != AVERROR(EAGAIN)) {
|
||||
return ret;
|
||||
}
|
||||
av_frame_unref(frame.get());
|
||||
proc_ctx->processed_frames++;
|
||||
spdlog::debug(
|
||||
"Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames
|
||||
);
|
||||
}
|
||||
} else if (encoder_config->copy_streams && stream_map[packet->stream_index] >= 0) {
|
||||
write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map);
|
||||
}
|
||||
av_packet_unref(packet.get());
|
||||
}
|
||||
|
||||
// Flush the filter
|
||||
std::vector<AVFrame *> raw_flushed_frames;
|
||||
ret = processor->flush(raw_flushed_frames);
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error flushing filter: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Wrap flushed frames in unique_ptrs
|
||||
std::vector<std::unique_ptr<AVFrame, decltype(av_frame_deleter)>> flushed_frames;
|
||||
for (AVFrame *raw_frame : raw_flushed_frames) {
|
||||
flushed_frames.emplace_back(raw_frame, av_frame_deleter);
|
||||
}
|
||||
|
||||
// Encode and write all flushed frames
|
||||
for (auto &flushed_frame : flushed_frames) {
|
||||
ret = write_frame(flushed_frame.get(), proc_ctx, encoder, benchmark);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
proc_ctx->processed_frames++;
|
||||
}
|
||||
|
||||
// Flush the encoder
|
||||
ret = encoder.flush();
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error flushing encoder: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
121
src/interpolator_rife.cpp
Normal file
121
src/interpolator_rife.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
#include "interpolator_rife.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <filesystem>
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "conversions.h"
|
||||
#include "fsutils.h"
|
||||
|
||||
InterpolatorRIFE::InterpolatorRIFE(
|
||||
int gpuid,
|
||||
bool tta_mode,
|
||||
bool tta_temporal_mode,
|
||||
bool uhd_mode,
|
||||
int num_threads,
|
||||
bool rife_v2,
|
||||
bool rife_v4,
|
||||
const StringType model_name
|
||||
)
|
||||
: rife_(nullptr),
|
||||
gpuid_(gpuid),
|
||||
tta_mode_(tta_mode),
|
||||
tta_temporal_mode_(tta_temporal_mode),
|
||||
uhd_mode_(uhd_mode),
|
||||
num_threads_(num_threads),
|
||||
rife_v2_(rife_v2),
|
||||
rife_v4_(rife_v4),
|
||||
model_name_(std::move(model_name)) {}
|
||||
|
||||
InterpolatorRIFE::~InterpolatorRIFE() {
|
||||
if (rife_) {
|
||||
delete rife_;
|
||||
rife_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int InterpolatorRIFE::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *) {
|
||||
// Construct the model directory path using std::filesystem
|
||||
std::filesystem::path model_param_dir;
|
||||
|
||||
// Find the model paths by model name if provided
|
||||
model_param_dir = std::filesystem::path(STR("models")) / STR("rife") / model_name_;
|
||||
|
||||
// Get the full paths using a function that possibly modifies or validates the path
|
||||
std::filesystem::path model_param_full_path = find_resource_file(model_param_dir);
|
||||
|
||||
// Check if the model files exist
|
||||
if (!std::filesystem::exists(model_param_full_path)) {
|
||||
spdlog::error("RIFE model param directory not found: {}", model_param_dir.u8string());
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create a new RIFE instance
|
||||
rife_ = new RIFE(
|
||||
gpuid_, tta_mode_, tta_temporal_mode_, uhd_mode_, num_threads_, rife_v2_, rife_v4_
|
||||
);
|
||||
|
||||
// Store the time bases
|
||||
in_time_base_ = dec_ctx->time_base;
|
||||
out_time_base_ = enc_ctx->time_base;
|
||||
out_pix_fmt_ = enc_ctx->pix_fmt;
|
||||
|
||||
// Load the model
|
||||
if (rife_->load(model_param_full_path) != 0) {
|
||||
spdlog::error("Failed to load RIFE model");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int InterpolatorRIFE::interpolate(
|
||||
AVFrame *prev_frame,
|
||||
AVFrame *in_frame,
|
||||
AVFrame **out_frame,
|
||||
float time_step
|
||||
) {
|
||||
int ret;
|
||||
|
||||
ncnn::Mat in_mat1 = avframe_to_ncnn_mat(prev_frame);
|
||||
if (in_mat1.empty()) {
|
||||
spdlog::error("Failed to convert AVFrame to ncnn::Mat");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ncnn::Mat in_mat2 = avframe_to_ncnn_mat(in_frame);
|
||||
if (in_mat2.empty()) {
|
||||
spdlog::error("Failed to convert AVFrame to ncnn::Mat");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Allocate space for output ncnn::Mat
|
||||
ncnn::Mat out_mat = ncnn::Mat(in_mat2.w, in_mat2.h, static_cast<size_t>(3), 3);
|
||||
|
||||
ret = rife_->process(in_mat1, in_mat2, time_step, out_mat);
|
||||
if (ret != 0) {
|
||||
spdlog::error("RIFE processing failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Convert ncnn::Mat to AVFrame
|
||||
*out_frame = ncnn_mat_to_avframe(out_mat, out_pix_fmt_);
|
||||
|
||||
// Rescale PTS to encoder's time base
|
||||
(*out_frame)->pts = av_rescale_q(in_frame->pts, in_time_base_, out_time_base_);
|
||||
|
||||
// Return the processed frame to the caller
|
||||
return ret;
|
||||
}
|
||||
|
||||
void InterpolatorRIFE::get_output_dimensions(
|
||||
const ProcessorConfig *,
|
||||
int in_width,
|
||||
int in_height,
|
||||
int &out_width,
|
||||
int &out_height
|
||||
) const {
|
||||
out_width = in_width;
|
||||
out_height = in_height;
|
||||
}
|
||||
@@ -3,7 +3,6 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <thread>
|
||||
|
||||
extern "C" {
|
||||
#include <libavutil/avutil.h>
|
||||
@@ -11,199 +10,13 @@ extern "C" {
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
|
||||
#include "avutils.h"
|
||||
#include "decoder.h"
|
||||
#include "encoder.h"
|
||||
#include "filter.h"
|
||||
#include "libplacebo_filter.h"
|
||||
#include "realesrgan_filter.h"
|
||||
#include "frames_processor.h"
|
||||
#include "processor.h"
|
||||
#include "processor_factory.h"
|
||||
|
||||
// Process frames using the selected filter.
|
||||
static int process_frames(
|
||||
EncoderConfig *encoder_config,
|
||||
VideoProcessingContext *proc_ctx,
|
||||
Decoder &decoder,
|
||||
Encoder &encoder,
|
||||
Filter *filter,
|
||||
bool benchmark = false
|
||||
) {
|
||||
char errbuf[AV_ERROR_MAX_STRING_SIZE];
|
||||
int ret = 0;
|
||||
|
||||
// Get required objects
|
||||
AVFormatContext *ifmt_ctx = decoder.get_format_context();
|
||||
AVCodecContext *dec_ctx = decoder.get_codec_context();
|
||||
int in_vstream_idx = decoder.get_video_stream_index();
|
||||
AVFormatContext *ofmt_ctx = encoder.get_format_context();
|
||||
int *stream_map = encoder.get_stream_map();
|
||||
|
||||
// Get total number of frames
|
||||
spdlog::debug("Reading total number of frames");
|
||||
proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx);
|
||||
|
||||
if (proc_ctx->total_frames <= 0) {
|
||||
spdlog::warn("Unable to determine the total number of frames");
|
||||
} else {
|
||||
spdlog::debug("{} frames to process", proc_ctx->total_frames);
|
||||
}
|
||||
|
||||
// Allocate frame and packet
|
||||
auto av_frame_deleter = [](AVFrame *frame) { av_frame_free(&frame); };
|
||||
std::unique_ptr<AVFrame, decltype(av_frame_deleter)> frame(av_frame_alloc(), av_frame_deleter);
|
||||
if (!frame) {
|
||||
ret = AVERROR(ENOMEM);
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto av_packet_deleter = [](AVPacket *packet) { av_packet_free(&packet); };
|
||||
std::unique_ptr<AVPacket, decltype(av_packet_deleter)> packet(
|
||||
av_packet_alloc(), av_packet_deleter
|
||||
);
|
||||
if (!packet) {
|
||||
spdlog::critical("Could not allocate AVPacket");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
// Read frames from the input file
|
||||
while (!proc_ctx->abort) {
|
||||
ret = av_read_frame(ifmt_ctx, packet.get());
|
||||
if (ret < 0) {
|
||||
if (ret == AVERROR_EOF) {
|
||||
spdlog::debug("Reached end of file");
|
||||
break;
|
||||
}
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error reading packet: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (packet->stream_index == in_vstream_idx) {
|
||||
ret = avcodec_send_packet(dec_ctx, packet.get());
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error sending packet to decoder: {}", errbuf);
|
||||
av_packet_unref(packet.get());
|
||||
return ret;
|
||||
}
|
||||
|
||||
while (!proc_ctx->abort) {
|
||||
if (proc_ctx->pause) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = avcodec_receive_frame(dec_ctx, frame.get());
|
||||
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
|
||||
spdlog::debug("Frame not ready");
|
||||
break;
|
||||
} else if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error decoding video frame: {}", errbuf);
|
||||
av_packet_unref(packet.get());
|
||||
return ret;
|
||||
}
|
||||
|
||||
AVFrame *raw_processed_frame = nullptr;
|
||||
ret = filter->process_frame(frame.get(), &raw_processed_frame);
|
||||
|
||||
if (ret < 0 && ret != AVERROR(EAGAIN)) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
av_packet_unref(packet.get());
|
||||
return ret;
|
||||
} else if (ret == 0 && raw_processed_frame != nullptr) {
|
||||
auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
|
||||
raw_processed_frame, av_frame_deleter
|
||||
);
|
||||
|
||||
if (!benchmark) {
|
||||
ret =
|
||||
encoder.write_frame(processed_frame.get(), proc_ctx->processed_frames);
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error encoding/writing frame: {}", errbuf);
|
||||
av_packet_unref(packet.get());
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
proc_ctx->processed_frames++;
|
||||
}
|
||||
|
||||
av_frame_unref(frame.get());
|
||||
spdlog::debug(
|
||||
"Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames
|
||||
);
|
||||
}
|
||||
} else if (encoder_config->copy_streams && stream_map[packet->stream_index] >= 0) {
|
||||
AVStream *in_stream = ifmt_ctx->streams[packet->stream_index];
|
||||
int out_stream_index = stream_map[packet->stream_index];
|
||||
AVStream *out_stream = ofmt_ctx->streams[out_stream_index];
|
||||
|
||||
av_packet_rescale_ts(packet.get(), in_stream->time_base, out_stream->time_base);
|
||||
packet->stream_index = out_stream_index;
|
||||
|
||||
ret = av_interleaved_write_frame(ofmt_ctx, packet.get());
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
|
||||
av_packet_unref(packet.get());
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
av_packet_unref(packet.get());
|
||||
}
|
||||
|
||||
// Flush the filter
|
||||
std::vector<AVFrame *> raw_flushed_frames;
|
||||
ret = filter->flush(raw_flushed_frames);
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error flushing filter: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Wrap flushed frames in unique_ptrs
|
||||
std::vector<std::unique_ptr<AVFrame, decltype(av_frame_deleter)>> flushed_frames;
|
||||
for (AVFrame *raw_frame : raw_flushed_frames) {
|
||||
flushed_frames.emplace_back(raw_frame, av_frame_deleter);
|
||||
}
|
||||
|
||||
// Encode and write all flushed frames
|
||||
for (auto &flushed_frame : flushed_frames) {
|
||||
ret = encoder.write_frame(flushed_frame.get(), proc_ctx->processed_frames);
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error encoding/writing flushed frame: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
proc_ctx->processed_frames++;
|
||||
}
|
||||
|
||||
// Flush the encoder
|
||||
ret = encoder.flush();
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error flushing encoder: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern "C" int process_video(
|
||||
const CharType *in_fname,
|
||||
const CharType *out_fname,
|
||||
Libvideo2xLogLevel log_level,
|
||||
bool benchmark,
|
||||
uint32_t vk_device_index,
|
||||
AVHWDeviceType hw_type,
|
||||
const FilterConfig *filter_config,
|
||||
EncoderConfig *encoder_config,
|
||||
VideoProcessingContext *proc_ctx
|
||||
) {
|
||||
char errbuf[AV_ERROR_MAX_STRING_SIZE];
|
||||
int ret = 0;
|
||||
|
||||
// Set the log level for FFmpeg and spdlog
|
||||
static void set_log_level(Libvideo2xLogLevel log_level) {
|
||||
switch (log_level) {
|
||||
case LIBVIDEO2X_LOG_LEVEL_TRACE:
|
||||
av_log_set_level(AV_LOG_TRACE);
|
||||
@@ -238,13 +51,32 @@ extern "C" int process_video(
|
||||
spdlog::set_level(spdlog::level::info);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int process_video(
|
||||
const CharType *in_fname,
|
||||
const CharType *out_fname,
|
||||
Libvideo2xLogLevel log_level,
|
||||
bool benchmark,
|
||||
uint32_t vk_device_index,
|
||||
AVHWDeviceType hw_type,
|
||||
const ProcessorConfig *processor_config,
|
||||
EncoderConfig *encoder_config,
|
||||
VideoProcessingContext *proc_ctx
|
||||
) {
|
||||
char errbuf[AV_ERROR_MAX_STRING_SIZE];
|
||||
int ret = 0;
|
||||
|
||||
// Set the log level for FFmpeg and spdlog
|
||||
set_log_level(log_level);
|
||||
|
||||
// Convert the file names to std::filesystem::path
|
||||
std::filesystem::path in_fpath(in_fname);
|
||||
std::filesystem::path out_fpath(out_fname);
|
||||
|
||||
// Create a smart pointer to manage the hardware device context
|
||||
auto hw_ctx_deleter = [](AVBufferRef *ref) {
|
||||
if (ref) {
|
||||
if (ref != nullptr) {
|
||||
av_buffer_unref(&ref);
|
||||
}
|
||||
};
|
||||
@@ -275,22 +107,24 @@ extern "C" int process_video(
|
||||
AVCodecContext *dec_ctx = decoder.get_codec_context();
|
||||
int in_vstream_idx = decoder.get_video_stream_index();
|
||||
|
||||
// Create and initialize the appropriate filter
|
||||
std::unique_ptr<Processor> processor(
|
||||
ProcessorFactory::instance().create_processor(processor_config, vk_device_index)
|
||||
);
|
||||
if (processor == nullptr) {
|
||||
spdlog::critical("Failed to create filter instance");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Initialize output dimensions based on filter configuration
|
||||
int output_width = 0, output_height = 0;
|
||||
switch (filter_config->filter_type) {
|
||||
case FILTER_LIBPLACEBO:
|
||||
output_width = filter_config->config.libplacebo.out_width;
|
||||
output_height = filter_config->config.libplacebo.out_height;
|
||||
break;
|
||||
case FILTER_REALESRGAN:
|
||||
output_width = dec_ctx->width * filter_config->config.realesrgan.scaling_factor;
|
||||
output_height = dec_ctx->height * filter_config->config.realesrgan.scaling_factor;
|
||||
break;
|
||||
default:
|
||||
spdlog::critical("Unknown filter type");
|
||||
return -1;
|
||||
processor->get_output_dimensions(
|
||||
processor_config, dec_ctx->width, dec_ctx->height, output_width, output_height
|
||||
);
|
||||
if (output_width <= 0 || output_height <= 0) {
|
||||
spdlog::critical("Failed to determine the output dimensions");
|
||||
return -1;
|
||||
}
|
||||
spdlog::debug("Output video dimensions: {}x{}", output_width, output_height);
|
||||
|
||||
// Update encoder configuration with output dimensions
|
||||
encoder_config->width = output_width;
|
||||
@@ -298,67 +132,26 @@ extern "C" int process_video(
|
||||
|
||||
// Initialize the encoder
|
||||
Encoder encoder;
|
||||
ret = encoder.init(hw_ctx.get(), out_fpath, ifmt_ctx, dec_ctx, encoder_config, in_vstream_idx);
|
||||
ret = encoder.init(
|
||||
hw_ctx.get(), out_fpath, ifmt_ctx, dec_ctx, encoder_config, processor_config, in_vstream_idx
|
||||
);
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Failed to initialize encoder: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Write the output file header
|
||||
ret = avformat_write_header(encoder.get_format_context(), NULL);
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error occurred when opening output file: {}", errbuf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Create and initialize the appropriate filter
|
||||
std::unique_ptr<Filter> filter;
|
||||
if (filter_config->filter_type == FILTER_LIBPLACEBO) {
|
||||
const auto &config = filter_config->config.libplacebo;
|
||||
if (!config.shader_path) {
|
||||
spdlog::critical("Shader path must be provided for the libplacebo filter");
|
||||
return -1;
|
||||
}
|
||||
filter = std::make_unique<LibplaceboFilter>(
|
||||
vk_device_index,
|
||||
std::filesystem::path(config.shader_path),
|
||||
config.out_width,
|
||||
config.out_height
|
||||
);
|
||||
} else if (filter_config->filter_type == FILTER_REALESRGAN) {
|
||||
const auto &config = filter_config->config.realesrgan;
|
||||
if (!config.model_name) {
|
||||
spdlog::critical("Model name must be provided for the RealESRGAN filter");
|
||||
return -1;
|
||||
}
|
||||
filter = std::make_unique<RealesrganFilter>(
|
||||
static_cast<int>(vk_device_index),
|
||||
config.tta_mode,
|
||||
config.scaling_factor,
|
||||
config.model_name
|
||||
);
|
||||
} else {
|
||||
spdlog::critical("Unknown filter type");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check if the filter instance was created successfully
|
||||
if (filter == nullptr) {
|
||||
spdlog::critical("Failed to create filter instance");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Initialize the filter
|
||||
ret = filter->init(dec_ctx, encoder.get_encoder_context(), hw_ctx.get());
|
||||
ret = processor->init(dec_ctx, encoder.get_encoder_context(), hw_ctx.get());
|
||||
if (ret < 0) {
|
||||
spdlog::critical("Failed to initialize filter");
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Process frames using the encoder and decoder
|
||||
ret = process_frames(encoder_config, proc_ctx, decoder, encoder, filter.get(), benchmark);
|
||||
ret = process_frames(
|
||||
encoder_config, processor_config, proc_ctx, decoder, encoder, processor.get(), benchmark
|
||||
);
|
||||
if (ret < 0) {
|
||||
av_strerror(ret, errbuf, sizeof(errbuf));
|
||||
spdlog::critical("Error processing frames: {}", errbuf);
|
||||
|
||||
112
src/processor_factory.cpp
Normal file
112
src/processor_factory.cpp
Normal file
@@ -0,0 +1,112 @@
|
||||
#include "processor_factory.h"
|
||||
|
||||
#include <spdlog/spdlog.h>
|
||||
#include <utility>
|
||||
|
||||
#include "filter_libplacebo.h"
|
||||
#include "filter_realesrgan.h"
|
||||
#include "interpolator_rife.h"
|
||||
|
||||
// Access the singleton instance
|
||||
ProcessorFactory &ProcessorFactory::instance() {
|
||||
static ProcessorFactory factory;
|
||||
|
||||
// Ensure default processors are registered only once
|
||||
static bool initialized = false;
|
||||
if (!initialized) {
|
||||
ProcessorFactory::init_default_processors(factory);
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
return factory;
|
||||
}
|
||||
|
||||
// Register a processor type and its creator
|
||||
void ProcessorFactory::register_processor(ProcessorType type, Creator creator) {
|
||||
creators[type] = std::move(creator);
|
||||
}
|
||||
|
||||
// Create a processor instance
|
||||
std::unique_ptr<Processor> ProcessorFactory::create_processor(
|
||||
const ProcessorConfig *processor_config,
|
||||
uint32_t vk_device_index
|
||||
) const {
|
||||
auto it = creators.find(processor_config->processor_type);
|
||||
if (it == creators.end()) {
|
||||
spdlog::critical(
|
||||
"Processor type not registered: {}", static_cast<int>(processor_config->processor_type)
|
||||
);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Call the corresponding creator function
|
||||
return it->second(processor_config, vk_device_index);
|
||||
}
|
||||
|
||||
// Initialize default processors
|
||||
void ProcessorFactory::init_default_processors(ProcessorFactory &factory) {
|
||||
factory.register_processor(
|
||||
PROCESSOR_LIBPLACEBO,
|
||||
[](const ProcessorConfig *config, uint32_t vk_device_index) -> std::unique_ptr<Processor> {
|
||||
const auto &cfg = config->config.libplacebo;
|
||||
if (!cfg.shader_path) {
|
||||
spdlog::critical("Shader path must be provided for the libplacebo filter");
|
||||
return nullptr;
|
||||
}
|
||||
if (config->width <= 0 || config->height <= 0) {
|
||||
spdlog::critical(
|
||||
"Output width and height must be provided for the libplacebo filter"
|
||||
);
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<FilterLibplacebo>(
|
||||
vk_device_index,
|
||||
std::filesystem::path(cfg.shader_path),
|
||||
config->width,
|
||||
config->height
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
factory.register_processor(
|
||||
PROCESSOR_REALESRGAN,
|
||||
[](const ProcessorConfig *config, uint32_t vk_device_index) -> std::unique_ptr<Processor> {
|
||||
const auto &cfg = config->config.realesrgan;
|
||||
if (config->scaling_factor <= 0) {
|
||||
spdlog::critical("Scaling factor must be provided for the RealESRGAN filter");
|
||||
return nullptr;
|
||||
}
|
||||
if (!cfg.model_name) {
|
||||
spdlog::critical("Model name must be provided for the RealESRGAN filter");
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<FilterRealesrgan>(
|
||||
static_cast<int>(vk_device_index),
|
||||
cfg.tta_mode,
|
||||
config->scaling_factor,
|
||||
cfg.model_name
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
factory.register_processor(
|
||||
PROCESSOR_RIFE,
|
||||
[](const ProcessorConfig *config, uint32_t vk_device_index) -> std::unique_ptr<Processor> {
|
||||
const auto &cfg = config->config.rife;
|
||||
if (!cfg.model_name) {
|
||||
spdlog::critical("Model name must be provided for the RIFE filter");
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<InterpolatorRIFE>(
|
||||
static_cast<int>(vk_device_index),
|
||||
cfg.tta_mode,
|
||||
cfg.tta_temporal_mode,
|
||||
cfg.uhd_mode,
|
||||
cfg.num_threads,
|
||||
cfg.rife_v2,
|
||||
cfg.rife_v4,
|
||||
cfg.model_name
|
||||
);
|
||||
}
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user