diff --git a/CHANGELOG.md b/CHANGELOG.md index 52a94a1..01b48b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Option to recalculate the PTS. - Real-ESRGAN models `realesr-general-x4v3` and `realesr-general-wdn-x4v3` (#1319). +- (Video2X Qt6) Korean translation. + +### Changed + +- Separate audio and subtitle stream copying options. ## [6.4.0] - 2025-01-24 diff --git a/include/libvideo2x/encoder.h b/include/libvideo2x/encoder.h index 33fc086..6d78718 100644 --- a/include/libvideo2x/encoder.h +++ b/include/libvideo2x/encoder.h @@ -17,7 +17,9 @@ namespace encoder { struct EncoderConfig { // Non-AVCodecContext options std::string codec = "libx264"; - bool copy_streams = true; + bool recalculate_pts = true; + bool copy_audio_streams = true; + bool copy_subtitle_streams = true; // Basic video options AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; @@ -72,6 +74,7 @@ class Encoder { int get_output_video_stream_index() const; private: + EncoderConfig enc_cfg_; AVFormatContext* ofmt_ctx_; AVCodecContext* enc_ctx_; int out_vstream_idx_; diff --git a/src/encoder.cpp b/src/encoder.cpp index dfb6e31..cb735d5 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -6,10 +6,9 @@ extern "C" { #include } -#include "logger_manager.h" - #include "avutils.h" #include "conversions.h" +#include "logger_manager.h" namespace video2x { namespace encoder { @@ -45,6 +44,9 @@ int Encoder::init( ) { int ret; + // Copy the encoder configuration + enc_cfg_ = enc_cfg; + // Allocate the output format context avformat_alloc_output_context2(&ofmt_ctx_, nullptr, nullptr, out_fpath.u8string().c_str()); if (!ofmt_ctx_) { @@ -188,8 +190,8 @@ int Encoder::init( out_vstream->r_frame_rate = enc_ctx_->framerate; // Copy other streams if necessary - if (enc_cfg.copy_streams) { - // Allocate the stream mape frame o + if (enc_cfg.copy_audio_streams || enc_cfg.copy_subtitle_streams) { + // Allocate the stream map stream_map_ = reinterpret_cast(av_malloc_array(ifmt_ctx->nb_streams, sizeof(*stream_map_))); if (!stream_map_) { @@ -198,22 +200,37 @@ int Encoder::init( } // Map each input stream to an output stream - for (int i = 0; i < static_cast(ifmt_ctx->nb_streams); i++) { - AVStream* in_stream = ifmt_ctx->streams[i]; + for (int stream_index = 0; stream_index < static_cast(ifmt_ctx->nb_streams); + stream_index++) { + AVStream* in_stream = ifmt_ctx->streams[stream_index]; AVCodecParameters* in_codecpar = in_stream->codecpar; // Skip the input video stream as it's already processed - if (i == in_vstream_idx) { - stream_map_[i] = out_vstream_idx_; + if (stream_index == in_vstream_idx) { + stream_map_[stream_index] = out_vstream_idx_; continue; } - // Map only audio and subtitle streams (skip other types) - if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && - in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) { - stream_map_[i] = -1; - logger()->warn("Skipping unsupported stream type at index: {}", i); - continue; + // Determine if the current stream should be skipped + switch (in_codecpar->codec_type) { + case AVMEDIA_TYPE_AUDIO: + if (!enc_cfg.copy_audio_streams) { + stream_map_[stream_index] = -1; + continue; + } + logger()->debug("Copying audio stream at index: {}", stream_index); + break; + case AVMEDIA_TYPE_SUBTITLE: + if (!enc_cfg.copy_subtitle_streams) { + stream_map_[stream_index] = -1; + continue; + } + logger()->debug("Copying subtitle stream at index: {}", stream_index); + break; + default: + stream_map_[stream_index] = -1; + logger()->warn("Skipping unsupported stream type at index: {}", stream_index); + continue; } // Create corresponding output stream for audio and subtitle streams @@ -242,8 +259,8 @@ int Encoder::init( out_stream->time_base = in_stream->time_base; // Map input stream index to output stream index - logger()->debug("Stream mapping: {} (in) -> {} (out)", i, out_stream->index); - stream_map_[i] = out_stream->index; + logger()->debug("Stream mapping: {} (in) -> {} (out)", stream_index, out_stream->index); + stream_map_[stream_index] = out_stream->index; } } @@ -275,7 +292,9 @@ int Encoder::write_frame(AVFrame* frame, int64_t frame_idx) { frame->pict_type = AV_PICTURE_TYPE_NONE; // Calculate this frame's presentation timestamp (PTS) - frame->pts = av_rescale_q(frame_idx, av_inv_q(enc_ctx_->framerate), enc_ctx_->time_base); + if (enc_cfg_.recalculate_pts) { + frame->pts = av_rescale_q(frame_idx, av_inv_q(enc_ctx_->framerate), enc_ctx_->time_base); + } // Convert the frame to the encoder's pixel format if needed if (frame->format != enc_ctx_->pix_fmt) { diff --git a/src/filter_libplacebo.cpp b/src/filter_libplacebo.cpp index 7987f61..1f65c33 100644 --- a/src/filter_libplacebo.cpp +++ b/src/filter_libplacebo.cpp @@ -1,7 +1,5 @@ #include "filter_libplacebo.h" -#include - #include #include "fsutils.h" diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 626e59f..c259ed0 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -1,5 +1,4 @@ #include "libvideo2x.h" -#include extern "C" { #include @@ -238,8 +237,10 @@ int VideoProcessor::process_frames( } // Calculate this frame's presentation timestamp (PTS) - frame->pts = - av_rescale_q(frame_idx_, av_inv_q(enc_ctx->framerate), enc_ctx->time_base); + if (enc_cfg_.recalculate_pts) { + frame->pts = + av_rescale_q(frame_idx_, av_inv_q(enc_ctx->framerate), enc_ctx->time_base); + } // Process the frame based on the selected processing mode AVFrame* proc_frame = nullptr; @@ -265,7 +266,8 @@ int VideoProcessor::process_frames( frame_idx_.fetch_add(1); logger()->debug("Processed frame {}/{}", frame_idx_.load(), total_frames_.load()); } - } else if (enc_cfg_.copy_streams && stream_map[packet->stream_index] >= 0) { + } else if ((enc_cfg_.copy_audio_streams || enc_cfg_.copy_subtitle_streams) && + stream_map[packet->stream_index] >= 0) { ret = write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map); if (ret < 0) { return ret; diff --git a/tools/video2x/src/argparse.cpp b/tools/video2x/src/argparse.cpp index ca67995..522a426 100644 --- a/tools/video2x/src/argparse.cpp +++ b/tools/video2x/src/argparse.cpp @@ -95,7 +95,9 @@ int parse_args( encoder_opts.add_options() ("codec,c", PO_STR_VALUE() ->default_value(STR("libx264"), "libx264"), "Output codec") - ("no-copy-streams", "Do not copy audio and subtitle streams") + ("no-recalculate-pts", "Do not recalculate presentation timestamps") + ("no-copy-audio-streams", "Do not copy audio streams") + ("no-copy-subtitle-streams", "Do not copy subtitle streams") ("pix-fmt", PO_STR_VALUE(), "Output pixel format") ("bit-rate", po::value(&enc_cfg.bit_rate)->default_value(0), "Bitrate in bits per second") @@ -134,7 +136,7 @@ int parse_args( ("scaling-factor,s", po::value(&proc_cfg.scaling_factor) ->notifier([](int v) { validate_min(v, "scaling-factor", 2); }), "Scaling factor") ("noise-level,n", po::value(&proc_cfg.noise_level) - ->notifier([](int v) { validate_min(v, "noise-level", 0); }), "Noise level") + ->notifier([](int v) { validate_min(v, "noise-level", -1); }), "Noise level") ; po::options_description interp_opts("Frame interpolation options"); @@ -323,8 +325,10 @@ int parse_args( enc_cfg.codec = codec_str; } - // Parse copy streams flag - enc_cfg.copy_streams = vm.count("no-copy-streams") == 0; + // Parse copy streams options + enc_cfg.recalculate_pts = vm.count("no-recalculate-pts") == 0; + enc_cfg.copy_audio_streams = vm.count("no-copy-audio-streams") == 0; + enc_cfg.copy_subtitle_streams = vm.count("no-copy-subtitle-streams") == 0; // Parse pixel format to AVPixelFormat enc_cfg.pix_fmt = AV_PIX_FMT_NONE;