feat(libvideo2x): use OpenCV to retrieve total frame count (#1194)

This commit is contained in:
K4YT3X
2024-10-21 16:54:22 -07:00
committed by GitHub
parent e09f348890
commit bc168d11ab
20 changed files with 327 additions and 208 deletions

View File

@@ -22,17 +22,17 @@ static enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelF
int init_decoder(
AVHWDeviceType hw_type,
AVBufferRef *hw_ctx,
const char *input_filename,
const char *in_fname,
AVFormatContext **fmt_ctx,
AVCodecContext **dec_ctx,
int *video_stream_index
int *vstream_idx
) {
AVFormatContext *ifmt_ctx = NULL;
AVCodecContext *codec_ctx = NULL;
int ret;
if ((ret = avformat_open_input(&ifmt_ctx, input_filename, NULL, NULL)) < 0) {
spdlog::error("Could not open input file '{}'", input_filename);
if ((ret = avformat_open_input(&ifmt_ctx, in_fname, NULL, NULL)) < 0) {
spdlog::error("Could not open input file '{}'", in_fname);
return ret;
}
@@ -109,7 +109,7 @@ int init_decoder(
*fmt_ctx = ifmt_ctx;
*dec_ctx = codec_ctx;
*video_stream_index = stream_index;
*vstream_idx = stream_index;
return 0;
}

View File

@@ -19,21 +19,21 @@ static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) {
int init_encoder(
AVBufferRef *hw_ctx,
const char *output_filename,
const char *out_fname,
AVFormatContext *ifmt_ctx,
AVFormatContext **ofmt_ctx,
AVCodecContext **enc_ctx,
AVCodecContext *dec_ctx,
EncoderConfig *encoder_config,
int video_stream_index,
int **stream_mapping
int vstream_idx,
int **stream_map
) {
AVFormatContext *fmt_ctx = NULL;
AVCodecContext *codec_ctx = NULL;
int stream_index = 0;
int ret;
avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, output_filename);
avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fname);
if (!fmt_ctx) {
spdlog::error("Could not create output context");
return AVERROR_UNKNOWN;
@@ -67,8 +67,8 @@ int init_encoder(
}
// Set encoding parameters
codec_ctx->height = encoder_config->output_height;
codec_ctx->width = encoder_config->output_width;
codec_ctx->height = encoder_config->out_height;
codec_ctx->width = encoder_config->out_width;
codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
codec_ctx->bit_rate = encoder_config->bit_rate;
@@ -116,28 +116,28 @@ int init_encoder(
if (encoder_config->copy_streams) {
// Allocate the stream map
*stream_mapping = (int *)av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_mapping));
if (!*stream_mapping) {
*stream_map = (int *)av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_map));
if (!*stream_map) {
spdlog::error("Could not allocate stream mapping");
return AVERROR(ENOMEM);
}
// Map the video stream
(*stream_mapping)[video_stream_index] = stream_index++;
(*stream_map)[vstream_idx] = stream_index++;
// Loop through each stream in the input file
for (int i = 0; i < ifmt_ctx->nb_streams; i++) {
AVStream *in_stream = ifmt_ctx->streams[i];
AVCodecParameters *in_codecpar = in_stream->codecpar;
if (i == video_stream_index) {
if (i == vstream_idx) {
// Video stream is already handled
continue;
}
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
(*stream_mapping)[i] = -1;
(*stream_map)[i] = -1;
continue;
}
@@ -158,15 +158,15 @@ int init_encoder(
// Copy time base
out_stream->time_base = in_stream->time_base;
(*stream_mapping)[i] = stream_index++;
(*stream_map)[i] = stream_index++;
}
}
// Open the output file
if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&fmt_ctx->pb, output_filename, AVIO_FLAG_WRITE);
ret = avio_open(&fmt_ctx->pb, out_fname, AVIO_FLAG_WRITE);
if (ret < 0) {
spdlog::error("Could not open output file '{}'", output_filename);
spdlog::error("Could not open output file '{}'", out_fname);
return ret;
}
}
@@ -181,7 +181,7 @@ int encode_and_write_frame(
AVFrame *frame,
AVCodecContext *enc_ctx,
AVFormatContext *ofmt_ctx,
int video_stream_index
int vstream_idx
) {
int ret;
@@ -223,9 +223,9 @@ int encode_and_write_frame(
// Rescale packet timestamps
av_packet_rescale_ts(
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[video_stream_index]->time_base
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[vstream_idx]->time_base
);
enc_pkt->stream_index = video_stream_index;
enc_pkt->stream_index = vstream_idx;
// Write the packet
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);

View File

@@ -13,8 +13,8 @@ int init_libplacebo(
AVFilterContext **buffersrc_ctx,
AVFilterContext **buffersink_ctx,
AVCodecContext *dec_ctx,
int output_width,
int output_height,
int out_width,
int out_height,
const std::filesystem::path &shader_path
) {
char args[512];
@@ -77,8 +77,8 @@ int init_libplacebo(
filter_args,
sizeof(filter_args),
"w=%d:h=%d:upscaler=ewa_lanczos:custom_shader_path=%s",
output_width,
output_height,
out_width,
out_height,
shader_path_string.c_str()
);

View File

@@ -7,12 +7,16 @@
#include "fsutils.h"
#include "libplacebo.h"
LibplaceboFilter::LibplaceboFilter(int width, int height, const std::filesystem::path &shader_path)
LibplaceboFilter::LibplaceboFilter(
int out_width,
int out_height,
const std::filesystem::path &shader_path
)
: filter_graph(nullptr),
buffersrc_ctx(nullptr),
buffersink_ctx(nullptr),
output_width(width),
output_height(height),
out_width(out_width),
out_height(out_height),
shader_path(std::move(shader_path)) {}
LibplaceboFilter::~LibplaceboFilter() {
@@ -49,7 +53,8 @@ int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
}
// Save the output time base
output_time_base = enc_ctx->time_base;
in_time_base = dec_ctx->time_base;
out_time_base = enc_ctx->time_base;
return init_libplacebo(
hw_ctx,
@@ -57,44 +62,43 @@ int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
&buffersrc_ctx,
&buffersink_ctx,
dec_ctx,
output_width,
output_height,
out_width,
out_height,
shader_full_path
);
}
int LibplaceboFilter::process_frame(AVFrame *input_frame, AVFrame **output_frame) {
int LibplaceboFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
int ret;
// Get the filtered frame
*output_frame = av_frame_alloc();
if (*output_frame == nullptr) {
*out_frame = av_frame_alloc();
if (*out_frame == nullptr) {
spdlog::error("Failed to allocate output frame");
return -1;
}
// Feed the frame to the filter graph
ret = av_buffersrc_add_frame(buffersrc_ctx, input_frame);
ret = av_buffersrc_add_frame(buffersrc_ctx, in_frame);
if (ret < 0) {
spdlog::error("Error while feeding the filter graph");
return ret;
}
ret = av_buffersink_get_frame(buffersink_ctx, *output_frame);
ret = av_buffersink_get_frame(buffersink_ctx, *out_frame);
if (ret < 0) {
av_frame_free(output_frame);
av_frame_free(out_frame);
return ret;
}
// Rescale PTS to encoder's time base
(*output_frame)->pts =
av_rescale_q((*output_frame)->pts, buffersink_ctx->inputs[0]->time_base, output_time_base);
(*out_frame)->pts = av_rescale_q((*out_frame)->pts, in_time_base, out_time_base);
// Return the processed frame to the caller
return 0;
}
int LibplaceboFilter::flush(std::vector<AVFrame *> &processed_frames) {
int LibplaceboFilter::flush(std::vector<AVFrame *> &flushed_frames) {
int ret = av_buffersrc_add_frame(buffersrc_ctx, nullptr);
if (ret < 0) {
spdlog::error("Error while flushing filter graph");
@@ -119,11 +123,10 @@ int LibplaceboFilter::flush(std::vector<AVFrame *> &processed_frames) {
}
// Rescale PTS to encoder's time base
filt_frame->pts =
av_rescale_q(filt_frame->pts, buffersink_ctx->inputs[0]->time_base, output_time_base);
filt_frame->pts = av_rescale_q(filt_frame->pts, in_time_base, out_time_base);
// Add to processed frames
processed_frames.push_back(filt_frame);
flushed_frames.push_back(filt_frame);
}
return 0;

View File

@@ -7,6 +7,7 @@
#include <thread>
#include <spdlog/spdlog.h>
#include <opencv2/videoio.hpp>
#include "decoder.h"
#include "encoder.h"
@@ -24,8 +25,8 @@
* @param[in] dec_ctx Decoder context
* @param[in] enc_ctx Encoder context
* @param[in] filter Filter instance
* @param[in] video_stream_index Index of the video stream in the input format context
* @param[in] stream_mapping Array mapping input stream indexes to output stream indexes
* @param[in] vstream_idx Index of the video stream in the input format context
* @param[in] stream_map Array mapping input stream indexes to output stream indexes
* @param[in] benchmark Flag to enable benchmarking mode
* @return int 0 on success, negative value on error
*/
@@ -37,8 +38,8 @@ int process_frames(
AVCodecContext *dec_ctx,
AVCodecContext *enc_ctx,
Filter *filter,
int video_stream_index,
int *stream_mapping,
int vstream_idx,
int *stream_map,
bool benchmark = false
) {
int ret;
@@ -47,11 +48,12 @@ int process_frames(
char errbuf[AV_ERROR_MAX_STRING_SIZE];
// Get the total number of frames in the video
AVStream *video_stream = ifmt_ctx->streams[video_stream_index];
AVStream *video_stream = ifmt_ctx->streams[vstream_idx];
proc_ctx->total_frames = video_stream->nb_frames;
// If nb_frames is not set, calculate total frames using duration and frame rate
// If nb_frames is not set, estimate total frames using duration and frame rate
if (proc_ctx->total_frames == 0) {
spdlog::debug("`nb_frames` is not set; estimating total frames with duration*framerate");
int64_t duration = video_stream->duration;
AVRational frame_rate = video_stream->avg_frame_rate;
if (duration != AV_NOPTS_VALUE && frame_rate.num != 0 && frame_rate.den != 0) {
@@ -59,6 +61,25 @@ int process_frames(
}
}
// If total_frames is still 0, read the total number of frames with OpenCV
if (proc_ctx->total_frames == 0) {
spdlog::debug("Unable to estimate total number of frames; reading with OpenCV");
cv::VideoCapture cap(ifmt_ctx->url);
if (!cap.isOpened()) {
spdlog::error("Failed to open video file with OpenCV");
return -1;
}
proc_ctx->total_frames = cap.get(cv::CAP_PROP_FRAME_COUNT);
cap.release();
}
// Check if the total number of frames is still 0
if (proc_ctx->total_frames == 0) {
spdlog::warn("Unable to determine total number of frames");
} else {
spdlog::debug("{} frames to process", proc_ctx->total_frames);
}
// Get start time
proc_ctx->start_time = time(NULL);
if (proc_ctx->start_time == -1) {
@@ -75,10 +96,16 @@ int process_frames(
while (!proc_ctx->abort) {
ret = av_read_frame(ifmt_ctx, &packet);
if (ret < 0) {
break; // End of file or error
if (ret == AVERROR_EOF) {
spdlog::debug("Reached end of file");
break;
}
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Error reading packet: {}", errbuf);
goto end;
}
if (packet.stream_index == video_stream_index) {
if (packet.stream_index == vstream_idx) {
// Send the packet to the decoder
ret = avcodec_send_packet(dec_ctx, &packet);
if (ret < 0) {
@@ -98,6 +125,7 @@ int process_frames(
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
spdlog::debug("Frame not ready");
break;
} else if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
@@ -111,9 +139,8 @@ int process_frames(
if (ret == 0 && processed_frame != nullptr) {
// Encode and write the processed frame
if (!benchmark) {
ret = encode_and_write_frame(
processed_frame, enc_ctx, ofmt_ctx, video_stream_index
);
ret =
encode_and_write_frame(processed_frame, enc_ctx, ofmt_ctx, vstream_idx);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Error encoding/writing frame: {}", errbuf);
@@ -134,9 +161,9 @@ int process_frames(
"Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames
);
}
} else if (encoder_config->copy_streams && stream_mapping[packet.stream_index] >= 0) {
} else if (encoder_config->copy_streams && stream_map[packet.stream_index] >= 0) {
AVStream *in_stream = ifmt_ctx->streams[packet.stream_index];
int out_stream_index = stream_mapping[packet.stream_index];
int out_stream_index = stream_map[packet.stream_index];
AVStream *out_stream = ofmt_ctx->streams[out_stream_index];
// Rescale packet timestamps
@@ -165,7 +192,7 @@ int process_frames(
// Encode and write all flushed frames
for (AVFrame *&flushed_frame : flushed_frames) {
ret = encode_and_write_frame(flushed_frame, enc_ctx, ofmt_ctx, video_stream_index);
ret = encode_and_write_frame(flushed_frame, enc_ctx, ofmt_ctx, vstream_idx);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Error encoding/writing flushed frame: {}", errbuf);
@@ -203,7 +230,7 @@ void cleanup(
AVCodecContext *dec_ctx,
AVCodecContext *enc_ctx,
AVBufferRef *hw_ctx,
int *stream_mapping,
int *stream_map,
Filter *filter
) {
if (ifmt_ctx) {
@@ -224,8 +251,8 @@ void cleanup(
if (hw_ctx) {
av_buffer_unref(&hw_ctx);
}
if (stream_mapping) {
av_free(stream_mapping);
if (stream_map) {
av_free(stream_map);
}
if (filter) {
delete filter;
@@ -235,8 +262,8 @@ void cleanup(
/**
* @brief Process a video file using the selected filter and encoder settings.
*
* @param[in] input_filename Path to the input video file
* @param[in] output_filename Path to the output video file
* @param[in] in_fname Path to the input video file
* @param[in] out_fname Path to the output video file
* @param[in] log_level Log level
* @param[in] benchmark Flag to enable benchmarking mode
* @param[in] hw_type Hardware device type
@@ -246,8 +273,8 @@ void cleanup(
* @return int 0 on success, non-zero value on error
*/
extern "C" int process_video(
const char *input_filename,
const char *output_filename,
const char *in_fname,
const char *out_fname,
Libvideo2xLogLevel log_level,
bool benchmark,
AVHWDeviceType hw_type,
@@ -260,9 +287,9 @@ extern "C" int process_video(
AVCodecContext *dec_ctx = nullptr;
AVCodecContext *enc_ctx = nullptr;
AVBufferRef *hw_ctx = nullptr;
int *stream_mapping = nullptr;
int *stream_map = nullptr;
Filter *filter = nullptr;
int video_stream_index = -1;
int vstream_idx = -1;
char errbuf[AV_ERROR_MAX_STRING_SIZE];
int ret = 0;
@@ -313,11 +340,11 @@ extern "C" int process_video(
}
// Initialize input
ret = init_decoder(hw_type, hw_ctx, input_filename, &ifmt_ctx, &dec_ctx, &video_stream_index);
ret = init_decoder(hw_type, hw_ctx, in_fname, &ifmt_ctx, &dec_ctx, &vstream_idx);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Failed to initialize decoder: {}", errbuf);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return ret;
}
@@ -325,33 +352,34 @@ extern "C" int process_video(
int output_width = 0, output_height = 0;
switch (filter_config->filter_type) {
case FILTER_LIBPLACEBO:
output_width = filter_config->config.libplacebo.output_width;
output_height = filter_config->config.libplacebo.output_height;
output_width = filter_config->config.libplacebo.out_width;
output_height = filter_config->config.libplacebo.out_height;
break;
case FILTER_REALESRGAN:
// Calculate the output dimensions based on the scaling factor
output_width = dec_ctx->width * filter_config->config.realesrgan.scaling_factor;
output_height = dec_ctx->height * filter_config->config.realesrgan.scaling_factor;
}
spdlog::info("Output video dimensions: {}x{}", output_width, output_height);
// Initialize output encoder
encoder_config->output_width = output_width;
encoder_config->output_height = output_height;
encoder_config->out_width = output_width;
encoder_config->out_height = output_height;
ret = init_encoder(
hw_ctx,
output_filename,
out_fname,
ifmt_ctx,
&ofmt_ctx,
&enc_ctx,
dec_ctx,
encoder_config,
video_stream_index,
&stream_mapping
vstream_idx,
&stream_map
);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Failed to initialize encoder: {}", errbuf);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return ret;
}
@@ -360,7 +388,7 @@ extern "C" int process_video(
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Error occurred when opening output file: {}", errbuf);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return ret;
}
@@ -372,19 +400,19 @@ extern "C" int process_video(
// Validate shader path
if (!config.shader_path) {
spdlog::error("Shader path must be provided for the libplacebo filter");
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return -1;
}
// Validate output dimensions
if (config.output_width <= 0 || config.output_height <= 0) {
if (config.out_width <= 0 || config.out_height <= 0) {
spdlog::error("Output dimensions must be provided for the libplacebo filter");
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return -1;
}
filter = new LibplaceboFilter{
config.output_width, config.output_height, std::filesystem::path(config.shader_path)
config.out_width, config.out_height, std::filesystem::path(config.shader_path)
};
break;
}
@@ -394,14 +422,14 @@ extern "C" int process_video(
// Validate model name
if (!config.model) {
spdlog::error("Model name must be provided for the RealESRGAN filter");
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return -1;
}
// Validate scaling factor
if (config.scaling_factor <= 0) {
spdlog::error("Scaling factor must be provided for the RealESRGAN filter");
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return -1;
}
@@ -412,7 +440,7 @@ extern "C" int process_video(
}
default:
spdlog::error("Unknown filter type");
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return -1;
}
@@ -421,7 +449,7 @@ extern "C" int process_video(
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Failed to initialize filter: {}", errbuf);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return ret;
}
@@ -434,14 +462,14 @@ extern "C" int process_video(
dec_ctx,
enc_ctx,
filter,
video_stream_index,
stream_mapping,
vstream_idx,
stream_map,
benchmark
);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Error processing frames: {}", errbuf);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
return ret;
}
@@ -449,7 +477,7 @@ extern "C" int process_video(
av_write_trailer(ofmt_ctx);
// Cleanup before returning
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_mapping, filter);
cleanup(ifmt_ctx, ofmt_ctx, dec_ctx, enc_ctx, hw_ctx, stream_map, filter);
if (ret < 0 && ret != AVERROR_EOF) {
av_strerror(ret, errbuf, sizeof(errbuf));

View File

@@ -71,9 +71,9 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
realesrgan = new RealESRGAN(gpuid, tta_mode);
// Store the time bases
input_time_base = dec_ctx->time_base;
output_time_base = enc_ctx->time_base;
output_pix_fmt = enc_ctx->pix_fmt;
in_time_base = dec_ctx->time_base;
out_time_base = enc_ctx->time_base;
out_pix_fmt = enc_ctx->pix_fmt;
// Load the model
if (realesrgan->load(model_param_full_path, model_bin_full_path) != 0) {
@@ -100,38 +100,33 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
return 0;
}
int RealesrganFilter::process_frame(AVFrame *input_frame, AVFrame **output_frame) {
int RealesrganFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
int ret;
// Convert the input frame to RGB24
ncnn::Mat input_mat = avframe_to_ncnn_mat(input_frame);
if (input_mat.empty()) {
ncnn::Mat in_mat = avframe_to_ncnn_mat(in_frame);
if (in_mat.empty()) {
spdlog::error("Failed to convert AVFrame to ncnn::Mat");
return -1;
}
// Allocate space for ouptut ncnn::Mat
int output_width = input_mat.w * realesrgan->scale;
int output_height = input_mat.h * realesrgan->scale;
ncnn::Mat output_mat = ncnn::Mat(output_width, output_height, (size_t)3, 3);
int output_width = in_mat.w * realesrgan->scale;
int output_height = in_mat.h * realesrgan->scale;
ncnn::Mat out_mat = ncnn::Mat(output_width, output_height, (size_t)3, 3);
ret = realesrgan->process(input_mat, output_mat);
ret = realesrgan->process(in_mat, out_mat);
if (ret != 0) {
spdlog::error("RealESRGAN processing failed");
return ret;
}
// Convert ncnn::Mat to AVFrame
*output_frame = ncnn_mat_to_avframe(output_mat, output_pix_fmt);
*out_frame = ncnn_mat_to_avframe(out_mat, out_pix_fmt);
// Rescale PTS to encoder's time base
(*output_frame)->pts = av_rescale_q(input_frame->pts, input_time_base, output_time_base);
(*out_frame)->pts = av_rescale_q(in_frame->pts, in_time_base, out_time_base);
// Return the processed frame to the caller
return ret;
}
int RealesrganFilter::flush(std::vector<AVFrame *> &processed_frames) {
// No special flushing needed for RealESRGAN
return 0;
}

View File

@@ -22,23 +22,6 @@
#include "getopt.h"
// Set UNIX terminal input to non-blocking mode
#ifndef _WIN32
void set_nonblocking_input(bool enable) {
static struct termios oldt, newt;
if (enable) {
tcgetattr(STDIN_FILENO, &oldt);
newt = oldt;
newt.c_lflag &= ~(ICANON | ECHO);
tcsetattr(STDIN_FILENO, TCSANOW, &newt);
fcntl(STDIN_FILENO, F_SETFL, O_NONBLOCK);
} else {
tcsetattr(STDIN_FILENO, TCSANOW, &oldt);
fcntl(STDIN_FILENO, F_SETFL, 0);
}
}
#endif
// Define command line options
static struct option long_options[] = {
{"loglevel", required_argument, NULL, 0},
@@ -73,13 +56,23 @@ static struct option long_options[] = {
{0, 0, 0, 0}
};
// List of valid RealESRGAN models
const char *valid_realesrgan_models[] = {
"realesrgan-plus",
"realesrgan-plus-anime",
"realesr-animevideov3",
};
// Indicate if a newline needs to be printed before the next output
bool newline_required = false;
// Structure to hold parsed arguments
struct arguments {
// General options
const char *loglevel;
bool noprogress;
const char *input_filename;
const char *output_filename;
const char *in_fname;
const char *out_fname;
const char *filter_type;
const char *hwaccel;
bool nocopystreams;
@@ -94,8 +87,8 @@ struct arguments {
// libplacebo options
const char *shader_path;
int output_width;
int output_height;
int out_width;
int out_height;
// RealESRGAN options
int gpuid;
@@ -111,18 +104,38 @@ struct ProcessVideoThreadArguments {
struct VideoProcessingContext *proc_ctx;
};
const char *valid_models[] = {
"realesrgan-plus",
"realesrgan-plus-anime",
"realesr-animevideov3",
};
// Set UNIX terminal input to non-blocking mode
#ifndef _WIN32
void set_nonblocking_input(bool enable) {
static struct termios oldt, newt;
if (enable) {
tcgetattr(STDIN_FILENO, &oldt);
newt = oldt;
newt.c_lflag &= ~(ICANON | ECHO);
tcsetattr(STDIN_FILENO, TCSANOW, &newt);
fcntl(STDIN_FILENO, F_SETFL, O_NONBLOCK);
} else {
tcsetattr(STDIN_FILENO, TCSANOW, &oldt);
fcntl(STDIN_FILENO, F_SETFL, 0);
}
}
#endif
// Newline-safe log callback for FFmpeg
void newline_safe_ffmpeg_log_callback(void *ptr, int level, const char *fmt, va_list vl) {
if (level <= av_log_get_level() && newline_required) {
putchar('\n');
newline_required = false;
}
av_log_default_callback(ptr, level, fmt, vl);
}
int is_valid_realesrgan_model(const char *model) {
if (!model) {
return 0;
}
for (int i = 0; i < sizeof(valid_models) / sizeof(valid_models[0]); i++) {
if (strcmp(model, valid_models[i]) == 0) {
for (int i = 0; i < sizeof(valid_realesrgan_models) / sizeof(valid_realesrgan_models[0]); i++) {
if (strcmp(model, valid_realesrgan_models[i]) == 0) {
return 1;
}
}
@@ -175,8 +188,8 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
// Default argument values
arguments->loglevel = "info";
arguments->noprogress = false;
arguments->input_filename = NULL;
arguments->output_filename = NULL;
arguments->in_fname = NULL;
arguments->out_fname = NULL;
arguments->filter_type = NULL;
arguments->hwaccel = "none";
arguments->nocopystreams = false;
@@ -191,8 +204,8 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
// libplacebo options
arguments->shader_path = NULL;
arguments->output_width = 0;
arguments->output_height = 0;
arguments->out_width = 0;
arguments->out_height = 0;
// RealESRGAN options
arguments->gpuid = 0;
@@ -204,10 +217,10 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
)) != -1) {
switch (c) {
case 'i':
arguments->input_filename = optarg;
arguments->in_fname = optarg;
break;
case 'o':
arguments->output_filename = optarg;
arguments->out_fname = optarg;
break;
case 'f':
arguments->filter_type = optarg;
@@ -242,15 +255,15 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
arguments->shader_path = optarg;
break;
case 'w':
arguments->output_width = atoi(optarg);
if (arguments->output_width <= 0) {
arguments->out_width = atoi(optarg);
if (arguments->out_width <= 0) {
fprintf(stderr, "Error: Output width must be greater than 0.\n");
exit(1);
}
break;
case 'h':
arguments->output_height = atoi(optarg);
if (arguments->output_height <= 0) {
arguments->out_height = atoi(optarg);
if (arguments->out_height <= 0) {
fprintf(stderr, "Error: Output height must be greater than 0.\n");
exit(1);
}
@@ -301,12 +314,12 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
}
// Check for required arguments
if (!arguments->input_filename) {
if (!arguments->in_fname) {
fprintf(stderr, "Error: Input file path is required.\n");
exit(1);
}
if (!arguments->output_filename && !arguments->benchmark) {
if (!arguments->out_fname && !arguments->benchmark) {
fprintf(stderr, "Error: Output file path is required.\n");
exit(1);
}
@@ -317,8 +330,7 @@ void parse_arguments(int argc, char **argv, struct arguments *arguments) {
}
if (strcmp(arguments->filter_type, "libplacebo") == 0) {
if (!arguments->shader_path || arguments->output_width == 0 ||
arguments->output_height == 0) {
if (!arguments->shader_path || arguments->out_width == 0 || arguments->out_height == 0) {
fprintf(
stderr,
"Error: For libplacebo, shader name/path (-s), width (-w), "
@@ -371,8 +383,8 @@ int process_video_thread(void *arg) {
// Call the process_video function
int result = process_video(
arguments->input_filename,
arguments->output_filename,
arguments->in_fname,
arguments->out_fname,
log_level,
arguments->benchmark,
hw_device_type,
@@ -400,8 +412,8 @@ int main(int argc, char **argv) {
struct FilterConfig filter_config;
if (strcmp(arguments.filter_type, "libplacebo") == 0) {
filter_config.filter_type = FILTER_LIBPLACEBO;
filter_config.config.libplacebo.output_width = arguments.output_width;
filter_config.config.libplacebo.output_height = arguments.output_height;
filter_config.config.libplacebo.out_width = arguments.out_width;
filter_config.config.libplacebo.out_height = arguments.out_height;
filter_config.config.libplacebo.shader_path = arguments.shader_path;
} else if (strcmp(arguments.filter_type, "realesrgan") == 0) {
filter_config.filter_type = FILTER_REALESRGAN;
@@ -433,8 +445,8 @@ int main(int argc, char **argv) {
// Setup encoder configuration
struct EncoderConfig encoder_config = {
.output_width = 0, // To be filled by libvideo2x
.output_height = 0, // To be filled by libvideo2x
.out_width = 0, // To be filled by libvideo2x
.out_height = 0, // To be filled by libvideo2x
.copy_streams = !arguments.nocopystreams,
.codec = codec->id,
.pix_fmt = pix_fmt,
@@ -472,6 +484,10 @@ int main(int argc, char **argv) {
.proc_ctx = &proc_ctx
};
// Register a newline-safe log callback for FFmpeg
// This will ensure that log messages are printed on a new line after the progress bar
av_log_set_callback(newline_safe_ffmpeg_log_callback);
// Create a thread for video processing
thrd_t processing_thread;
if (thrd_create(&processing_thread, process_video_thread, &thread_args) != thrd_success) {
@@ -509,8 +525,9 @@ int main(int argc, char **argv) {
}
} else if (ch == 'q' || ch == 'Q') {
// Abort processing
printf("Aborting processing...\n");
printf("\nAborting processing...\n");
proc_ctx.abort = true;
newline_required = false;
break;
}
@@ -526,10 +543,11 @@ int main(int argc, char **argv) {
time(NULL) - proc_ctx.start_time
);
fflush(stdout);
newline_required = true;
}
// Sleep for 50ms
thrd_sleep(&(struct timespec){.tv_sec = 0, .tv_nsec = 50000000}, NULL);
thrd_sleep(&(struct timespec){.tv_sec = 0, .tv_nsec = 100000000}, NULL);
}
// Restore terminal to blocking mode
@@ -542,18 +560,19 @@ int main(int argc, char **argv) {
thrd_join(processing_thread, &process_result);
// Print a newline if progress bar was displayed
if (!arguments.noprogress && process_result == 0) {
puts("");
if (newline_required) {
putchar('\n');
}
// Print final message based on processing result
if (proc_ctx.abort) {
fprintf(stderr, "Video processing aborted\n");
return 2;
}
if (process_result != 0) {
} else if (process_result != 0) {
fprintf(stderr, "Video processing failed\n");
return process_result;
} else {
printf("Video processing completed successfully\n");
}
// Calculate statistics
@@ -563,14 +582,14 @@ int main(int argc, char **argv) {
// Print processing summary
printf("====== Video2X %s summary ======\n", arguments.benchmark ? "Benchmark" : "Processing");
printf("Video file processed: %s\n", arguments.input_filename);
printf("Video file processed: %s\n", arguments.in_fname);
printf("Total frames processed: %ld\n", proc_ctx.processed_frames);
printf("Total time taken: %lds\n", time_elapsed);
printf("Average processing speed: %.2f FPS\n", average_speed_fps);
// Print additional information if not in benchmark mode
if (!arguments.benchmark) {
printf("Output written to: %s\n", arguments.output_filename);
printf("Output written to: %s\n", arguments.out_fname);
}
return 0;