mirror of
https://github.com/k4yt3x/video2x.git
synced 2026-02-12 16:14:44 +08:00
feat(*): added support for copying audio/subtitle streams and pause/abort (#1179)
* feat: added Makefile target for debian * fix: fixed Dockerfile installing the wrong package * feat: added hwaccel for encoder and decoder * feat: added benchmark mode * feat: removed hard-coded keyframe info * chore: cleaned up headers and organized code * style: cleaned up headers and includes * feat: added a progress bar for CLI * feat: removed atomicity requirements on processed frames * feat: added pause and abort for CLI * chore: updated default preset and crf settings * feat: added support for copying audio and subtitle streams * fix: fixed syntax issues for MSVC * fix: fixed audio/subtitle timestamp rescaling Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
155
src/encoder.cpp
155
src/encoder.cpp
@@ -1,32 +1,34 @@
|
||||
#include "encoder.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavcodec/codec.h>
|
||||
#include <libavcodec/codec_id.h>
|
||||
#include <libavfilter/avfilter.h>
|
||||
#include <libavfilter/buffersink.h>
|
||||
#include <libavfilter/buffersrc.h>
|
||||
#include <libavformat/avformat.h>
|
||||
#include <libavutil/opt.h>
|
||||
#include <libavutil/pixdesc.h>
|
||||
#include <libavutil/rational.h>
|
||||
#include "conversions.h"
|
||||
|
||||
static enum AVPixelFormat get_encoder_default_pix_fmt(const AVCodec *encoder) {
|
||||
const enum AVPixelFormat *p = encoder->pix_fmts;
|
||||
if (!p) {
|
||||
fprintf(stderr, "No pixel formats supported by encoder\n");
|
||||
return AV_PIX_FMT_NONE;
|
||||
}
|
||||
return *p;
|
||||
}
|
||||
|
||||
#include "conversions.h"
|
||||
#include "libvideo2x.h"
|
||||
|
||||
int init_encoder(
|
||||
AVBufferRef *hw_ctx,
|
||||
const char *output_filename,
|
||||
AVFormatContext *ifmt_ctx,
|
||||
AVFormatContext **ofmt_ctx,
|
||||
AVCodecContext **enc_ctx,
|
||||
AVCodecContext *dec_ctx,
|
||||
EncoderConfig *encoder_config
|
||||
EncoderConfig *encoder_config,
|
||||
int video_stream_index,
|
||||
int **stream_mapping
|
||||
) {
|
||||
AVFormatContext *fmt_ctx = NULL;
|
||||
AVCodecContext *codec_ctx = NULL;
|
||||
int stream_index = 0;
|
||||
int ret;
|
||||
|
||||
avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, output_filename);
|
||||
@@ -35,66 +37,130 @@ int init_encoder(
|
||||
return AVERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
// Create a new video stream
|
||||
const AVCodec *enc = avcodec_find_encoder(encoder_config->codec);
|
||||
if (!enc) {
|
||||
fprintf(stderr, "Necessary encoder not found\n");
|
||||
const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec);
|
||||
if (!encoder) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"Required video encoder not found for vcodec %s\n",
|
||||
avcodec_get_name(encoder_config->codec)
|
||||
);
|
||||
return AVERROR_ENCODER_NOT_FOUND;
|
||||
}
|
||||
|
||||
// Create a new video stream in the output file
|
||||
AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
|
||||
if (!out_stream) {
|
||||
fprintf(stderr, "Failed allocating output stream\n");
|
||||
fprintf(stderr, "Failed to allocate the output video stream\n");
|
||||
return AVERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
codec_ctx = avcodec_alloc_context3(enc);
|
||||
codec_ctx = avcodec_alloc_context3(encoder);
|
||||
if (!codec_ctx) {
|
||||
fprintf(stderr, "Failed to allocate the encoder context\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
// Set hardware device context
|
||||
if (hw_ctx != nullptr) {
|
||||
codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
|
||||
}
|
||||
|
||||
// Set encoding parameters
|
||||
codec_ctx->height = encoder_config->output_height;
|
||||
codec_ctx->width = encoder_config->output_width;
|
||||
codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
|
||||
codec_ctx->pix_fmt = encoder_config->pix_fmt;
|
||||
codec_ctx->time_base = av_inv_q(dec_ctx->framerate);
|
||||
codec_ctx->bit_rate = encoder_config->bit_rate;
|
||||
|
||||
if (codec_ctx->time_base.num == 0 || codec_ctx->time_base.den == 0) {
|
||||
codec_ctx->time_base = av_inv_q(av_guess_frame_rate(fmt_ctx, out_stream, NULL));
|
||||
// Set the pixel format
|
||||
if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) {
|
||||
// Use the specified pixel format
|
||||
codec_ctx->pix_fmt = encoder_config->pix_fmt;
|
||||
} else {
|
||||
// Fall back to the default pixel format
|
||||
codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder);
|
||||
if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) {
|
||||
fprintf(stderr, "Could not get the default pixel format for the encoder\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
// Set the bit rate and other encoder parameters if needed
|
||||
codec_ctx->bit_rate = encoder_config->bit_rate;
|
||||
codec_ctx->gop_size = 60; // Keyframe interval
|
||||
codec_ctx->max_b_frames = 3; // B-frames
|
||||
codec_ctx->keyint_min = 60; // Maximum GOP size
|
||||
// Set the time base
|
||||
codec_ctx->time_base = av_inv_q(dec_ctx->framerate);
|
||||
if (codec_ctx->time_base.num == 0 || codec_ctx->time_base.den == 0) {
|
||||
codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_stream, NULL));
|
||||
}
|
||||
|
||||
// Set the CRF and preset for any codecs that support it
|
||||
char crf_str[16];
|
||||
snprintf(crf_str, sizeof(crf_str), "%.f", encoder_config->crf);
|
||||
if (encoder_config->codec == AV_CODEC_ID_H264 || encoder_config->codec == AV_CODEC_ID_HEVC) {
|
||||
av_opt_set(codec_ctx->priv_data, "crf", crf_str, 0);
|
||||
av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0);
|
||||
}
|
||||
av_opt_set(codec_ctx->priv_data, "crf", crf_str, 0);
|
||||
av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0);
|
||||
|
||||
if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) {
|
||||
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
||||
}
|
||||
|
||||
if ((ret = avcodec_open2(codec_ctx, enc, NULL)) < 0) {
|
||||
if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) {
|
||||
fprintf(stderr, "Cannot open video encoder\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = avcodec_parameters_from_context(out_stream->codecpar, codec_ctx);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "Failed to copy encoder parameters to output stream\n");
|
||||
fprintf(stderr, "Failed to copy encoder parameters to output video stream\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
out_stream->time_base = codec_ctx->time_base;
|
||||
|
||||
if (encoder_config->copy_streams) {
|
||||
// Allocate the stream map
|
||||
*stream_mapping = (int *)av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_mapping));
|
||||
if (!*stream_mapping) {
|
||||
fprintf(stderr, "Could not allocate stream mapping\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
// Map the video stream
|
||||
(*stream_mapping)[video_stream_index] = stream_index++;
|
||||
|
||||
// Loop through each stream in the input file
|
||||
for (int i = 0; i < ifmt_ctx->nb_streams; i++) {
|
||||
AVStream *in_stream = ifmt_ctx->streams[i];
|
||||
AVCodecParameters *in_codecpar = in_stream->codecpar;
|
||||
|
||||
if (i == video_stream_index) {
|
||||
// Video stream is already handled
|
||||
continue;
|
||||
}
|
||||
|
||||
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
|
||||
in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
|
||||
(*stream_mapping)[i] = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create corresponding output stream
|
||||
AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
|
||||
if (!out_stream) {
|
||||
fprintf(stderr, "Failed allocating output stream\n");
|
||||
return AVERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "Failed to copy codec parameters\n");
|
||||
return ret;
|
||||
}
|
||||
out_stream->codecpar->codec_tag = 0;
|
||||
|
||||
// Copy time base
|
||||
out_stream->time_base = in_stream->time_base;
|
||||
|
||||
(*stream_mapping)[i] = stream_index++;
|
||||
}
|
||||
}
|
||||
|
||||
// Open the output file
|
||||
if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
|
||||
ret = avio_open(&fmt_ctx->pb, output_filename, AVIO_FLAG_WRITE);
|
||||
@@ -110,7 +176,12 @@ int init_encoder(
|
||||
return 0;
|
||||
}
|
||||
|
||||
int encode_and_write_frame(AVFrame *frame, AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) {
|
||||
int encode_and_write_frame(
|
||||
AVFrame *frame,
|
||||
AVCodecContext *enc_ctx,
|
||||
AVFormatContext *ofmt_ctx,
|
||||
int video_stream_index
|
||||
) {
|
||||
int ret;
|
||||
|
||||
// Convert the frame to the encoder's pixel format if needed
|
||||
@@ -144,14 +215,16 @@ int encode_and_write_frame(AVFrame *frame, AVCodecContext *enc_ctx, AVFormatCont
|
||||
av_packet_unref(enc_pkt);
|
||||
break;
|
||||
} else if (ret < 0) {
|
||||
fprintf(stderr, "Error during encoding\n");
|
||||
fprintf(stderr, "Error encoding frame\n");
|
||||
av_packet_free(&enc_pkt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Rescale packet timestamps
|
||||
av_packet_rescale_ts(enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[0]->time_base);
|
||||
enc_pkt->stream_index = ofmt_ctx->streams[0]->index;
|
||||
av_packet_rescale_ts(
|
||||
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[video_stream_index]->time_base
|
||||
);
|
||||
enc_pkt->stream_index = video_stream_index;
|
||||
|
||||
// Write the packet
|
||||
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
|
||||
@@ -182,7 +255,7 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) {
|
||||
av_packet_unref(enc_pkt);
|
||||
break;
|
||||
} else if (ret < 0) {
|
||||
fprintf(stderr, "Error during encoding\n");
|
||||
fprintf(stderr, "Error encoding frame\n");
|
||||
av_packet_free(&enc_pkt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user