refactor(*): refactored the encoder and decoder into classes

Signed-off-by: k4yt3x <i@k4yt3x.com>
2026-02-12 16:14:44 +08:00 · 2024-11-17 00:00:00 +00:00
parent b520d51c6c
commit 169509b7d4
6 changed files with 318 additions and 347 deletions
--- a/src/encoder.cpp
+++ b/src/encoder.cpp
@@ -1,138 +1,152 @@
 #include "encoder.h"

-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <cstdint>
+#include <spdlog/spdlog.h>

 extern "C" {
-#include <libavutil/pixdesc.h>
+#include <libavutil/opt.h>
 }

-#include <spdlog/spdlog.h>
-
 #include "avutils.h"
 #include "conversions.h"

-int init_encoder(
+Encoder::Encoder()
+    : ofmt_ctx_(nullptr), enc_ctx_(nullptr), out_vstream_idx_(-1), stream_map_(nullptr) {}
+
+Encoder::~Encoder() {
+    if (enc_ctx_) {
+        avcodec_free_context(&enc_ctx_);
+    }
+    if (ofmt_ctx_) {
+        if (!(ofmt_ctx_->oformat->flags & AVFMT_NOFILE)) {
+            avio_closep(&ofmt_ctx_->pb);
+        }
+        avformat_free_context(ofmt_ctx_);
+    }
+    if (stream_map_) {
+        av_free(stream_map_);
+    }
+}
+
+int Encoder::init(
    AVBufferRef *hw_ctx,
-    std::filesystem::path out_fpath,
+    const std::filesystem::path &out_fpath,
    AVFormatContext *ifmt_ctx,
-    AVFormatContext **ofmt_ctx,
-    AVCodecContext **enc_ctx,
    AVCodecContext *dec_ctx,
    EncoderConfig *encoder_config,
-    int in_vstream_idx,
-    int *out_vstream_idx,
-    int **stream_map
+    int in_vstream_idx
 ) {
-    AVFormatContext *fmt_ctx = NULL;
-    AVCodecContext *codec_ctx = NULL;
    int ret;

-    avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str());
-    if (!fmt_ctx) {
+    // Allocate the output format context
+    avformat_alloc_output_context2(&ofmt_ctx_, nullptr, nullptr, out_fpath.u8string().c_str());
+    if (!ofmt_ctx_) {
        spdlog::error("Could not create output context");
        return AVERROR_UNKNOWN;
    }

+    // Find the encoder
    const AVCodec *encoder = avcodec_find_encoder(encoder_config->codec);
    if (!encoder) {
        spdlog::error(
-            "Required video encoder not found for vcodec {}",
-            avcodec_get_name(encoder_config->codec)
+            "Required video encoder not found for codec {}", avcodec_get_name(encoder_config->codec)
        );
        return AVERROR_ENCODER_NOT_FOUND;
    }

    // Create a new video stream in the output file
-    AVStream *out_vstream = avformat_new_stream(fmt_ctx, NULL);
+    AVStream *out_vstream = avformat_new_stream(ofmt_ctx_, nullptr);
    if (!out_vstream) {
        spdlog::error("Failed to allocate the output video stream");
        return AVERROR_UNKNOWN;
    }
-    *out_vstream_idx = out_vstream->index;
+    out_vstream_idx_ = out_vstream->index;

-    codec_ctx = avcodec_alloc_context3(encoder);
-    if (!codec_ctx) {
+    // Allocate the encoder context
+    enc_ctx_ = avcodec_alloc_context3(encoder);
+    if (!enc_ctx_) {
        spdlog::error("Failed to allocate the encoder context");
        return AVERROR(ENOMEM);
    }

    // Set hardware device context
    if (hw_ctx != nullptr) {
-        codec_ctx->hw_device_ctx = av_buffer_ref(hw_ctx);
+        enc_ctx_->hw_device_ctx = av_buffer_ref(hw_ctx);
    }

    // Set encoding parameters
-    codec_ctx->height = encoder_config->out_height;
-    codec_ctx->width = encoder_config->out_width;
-    codec_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
-    codec_ctx->bit_rate = encoder_config->bit_rate;
+    enc_ctx_->height = encoder_config->out_height;
+    enc_ctx_->width = encoder_config->out_width;
+    enc_ctx_->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
+    enc_ctx_->bit_rate = encoder_config->bit_rate;

    // Set the color properties
-    codec_ctx->color_range = dec_ctx->color_range;
-    codec_ctx->color_primaries = dec_ctx->color_primaries;
-    codec_ctx->color_trc = dec_ctx->color_trc;
-    codec_ctx->colorspace = dec_ctx->colorspace;
-    codec_ctx->chroma_sample_location = dec_ctx->chroma_sample_location;
+    enc_ctx_->color_range = dec_ctx->color_range;
+    enc_ctx_->color_primaries = dec_ctx->color_primaries;
+    enc_ctx_->color_trc = dec_ctx->color_trc;
+    enc_ctx_->colorspace = dec_ctx->colorspace;
+    enc_ctx_->chroma_sample_location = dec_ctx->chroma_sample_location;

    // Set the pixel format
    if (encoder_config->pix_fmt != AV_PIX_FMT_NONE) {
        // Use the specified pixel format
-        codec_ctx->pix_fmt = encoder_config->pix_fmt;
+        enc_ctx_->pix_fmt = encoder_config->pix_fmt;
    } else {
-        codec_ctx->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt);
-        if (codec_ctx->pix_fmt == AV_PIX_FMT_NONE) {
+        // Automatically select the pixel format
+        enc_ctx_->pix_fmt = get_encoder_default_pix_fmt(encoder, dec_ctx->pix_fmt);
+        if (enc_ctx_->pix_fmt == AV_PIX_FMT_NONE) {
            spdlog::error("Could not get the default pixel format for the encoder");
            return AVERROR(EINVAL);
        }
-        spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(codec_ctx->pix_fmt));
+        spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt));
    }

    // Set the output video's time base
    if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
-        codec_ctx->time_base = dec_ctx->time_base;
+        enc_ctx_->time_base = dec_ctx->time_base;
    } else {
-        codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, NULL));
+        enc_ctx_->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr));
    }

    // Set the output video's frame rate
    if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
-        codec_ctx->framerate = dec_ctx->framerate;
+        enc_ctx_->framerate = dec_ctx->framerate;
    } else {
-        codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, NULL);
+        enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr);
    }

    // Set the CRF and preset for any codecs that support it
    std::string crf_str = std::to_string(encoder_config->crf);
-    av_opt_set(codec_ctx->priv_data, "crf", crf_str.c_str(), 0);
-    av_opt_set(codec_ctx->priv_data, "preset", encoder_config->preset, 0);
+    av_opt_set(enc_ctx_->priv_data, "crf", crf_str.c_str(), 0);
+    av_opt_set(enc_ctx_->priv_data, "preset", encoder_config->preset, 0);

-    if (fmt_ctx->oformat->flags & AVFMT_GLOBALHEADER) {
-        codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+    // Use global headers if necessary
+    if (ofmt_ctx_->oformat->flags & AVFMT_GLOBALHEADER) {
+        enc_ctx_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }

-    if ((ret = avcodec_open2(codec_ctx, encoder, NULL)) < 0) {
+    // Open the encoder
+    if ((ret = avcodec_open2(enc_ctx_, encoder, nullptr)) < 0) {
        spdlog::error("Cannot open video encoder");
        return ret;
    }

-    ret = avcodec_parameters_from_context(out_vstream->codecpar, codec_ctx);
+    // Copy encoder parameters to output video stream
+    ret = avcodec_parameters_from_context(out_vstream->codecpar, enc_ctx_);
    if (ret < 0) {
        spdlog::error("Failed to copy encoder parameters to output video stream");
        return ret;
    }

-    out_vstream->time_base = codec_ctx->time_base;
-    out_vstream->avg_frame_rate = codec_ctx->framerate;
-    out_vstream->r_frame_rate = codec_ctx->framerate;
+    out_vstream->time_base = enc_ctx_->time_base;
+    out_vstream->avg_frame_rate = enc_ctx_->framerate;
+    out_vstream->r_frame_rate = enc_ctx_->framerate;

+    // Copy other streams if necessary
    if (encoder_config->copy_streams) {
-        // Allocate the stream map
-        *stream_map =
-            reinterpret_cast<int *>(av_malloc_array(ifmt_ctx->nb_streams, sizeof(**stream_map)));
-        if (!*stream_map) {
+        // Allocate the stream mape frame o
+        stream_map_ =
+            reinterpret_cast<int *>(av_malloc_array(ifmt_ctx->nb_streams, sizeof(*stream_map_)));
+        if (!stream_map_) {
            spdlog::error("Could not allocate stream mapping");
            return AVERROR(ENOMEM);
        }
@@ -144,20 +158,20 @@ int init_encoder(

            // Skip the input video stream as it's already processed
            if (i == in_vstream_idx) {
-                (*stream_map)[i] = *out_vstream_idx;
+                stream_map_[i] = out_vstream_idx_;
                continue;
            }

            // Map only audio and subtitle streams (skip other types)
            if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
                in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
-                (*stream_map)[i] = -1;  // Stream not mapped
+                stream_map_[i] = -1;
                spdlog::warn("Skipping unsupported stream type at index: {}", i);
                continue;
            }

            // Create corresponding output stream for audio and subtitle streams
-            AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
+            AVStream *out_stream = avformat_new_stream(ofmt_ctx_, nullptr);
            if (!out_stream) {
                spdlog::error("Failed allocating output stream");
                return AVERROR_UNKNOWN;
@@ -176,32 +190,23 @@ int init_encoder(

            // Map input stream index to output stream index
            spdlog::debug("Stream mapping: {} (in) -> {} (out)", i, out_stream->index);
-            (*stream_map)[i] = out_stream->index;
+            stream_map_[i] = out_stream->index;
        }
    }

    // Open the output file
-    if (!(fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
-        ret = avio_open(&fmt_ctx->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE);
+    if (!(ofmt_ctx_->oformat->flags & AVFMT_NOFILE)) {
+        ret = avio_open(&ofmt_ctx_->pb, out_fpath.u8string().c_str(), AVIO_FLAG_WRITE);
        if (ret < 0) {
-            spdlog::error("Could not open output file '{}'", out_fpath.u8string().c_str());
+            spdlog::error("Could not open output file '{}'", out_fpath.u8string());
            return ret;
        }
    }

-    *ofmt_ctx = fmt_ctx;
-    *enc_ctx = codec_ctx;
-
    return 0;
 }

-int write_frame(
-    AVFrame *frame,
-    AVCodecContext *enc_ctx,
-    AVFormatContext *ofmt_ctx,
-    int out_vstream_idx,
-    int64_t frame_idx
-) {
+int Encoder::write_frame(AVFrame *frame, int64_t frame_idx) {
    AVFrame *converted_frame = nullptr;
    int ret;

@@ -211,13 +216,12 @@ int write_frame(
    }

    // Convert the frame to the encoder's pixel format if needed
-    if (frame->format != enc_ctx->pix_fmt) {
-        converted_frame = convert_avframe_pix_fmt(frame, enc_ctx->pix_fmt);
+    if (frame->format != enc_ctx_->pix_fmt) {
+        converted_frame = convert_avframe_pix_fmt(frame, enc_ctx_->pix_fmt);
        if (!converted_frame) {
            spdlog::error("Error converting frame to encoder's pixel format");
            return AVERROR_EXTERNAL;
        }
-
        converted_frame->pts = frame->pts;
    }

@@ -227,11 +231,12 @@ int write_frame(
        return AVERROR(ENOMEM);
    }

+    // Send the frame to the encoder
    if (converted_frame != nullptr) {
-        ret = avcodec_send_frame(enc_ctx, converted_frame);
+        ret = avcodec_send_frame(enc_ctx_, converted_frame);
        av_frame_free(&converted_frame);
    } else {
-        ret = avcodec_send_frame(enc_ctx, frame);
+        ret = avcodec_send_frame(enc_ctx_, frame);
    }
    if (ret < 0) {
        spdlog::error("Error sending frame to encoder");
@@ -239,8 +244,9 @@ int write_frame(
        return ret;
    }

+    // Receive packets from the encoder
    while (ret >= 0) {
-        ret = avcodec_receive_packet(enc_ctx, enc_pkt);
+        ret = avcodec_receive_packet(enc_ctx_, enc_pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            av_packet_unref(enc_pkt);
            break;
@@ -252,12 +258,12 @@ int write_frame(

        // Rescale packet timestamps
        av_packet_rescale_ts(
-            enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
+            enc_pkt, enc_ctx_->time_base, ofmt_ctx_->streams[out_vstream_idx_]->time_base
        );
-        enc_pkt->stream_index = out_vstream_idx;
+        enc_pkt->stream_index = out_vstream_idx_;

        // Write the packet
-        ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
+        ret = av_interleaved_write_frame(ofmt_ctx_, enc_pkt);
        av_packet_unref(enc_pkt);
        if (ret < 0) {
            spdlog::error("Error muxing packet");
@@ -270,7 +276,7 @@ int write_frame(
    return 0;
 }

-int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx) {
+int Encoder::flush() {
    int ret;
    AVPacket *enc_pkt = av_packet_alloc();
    if (!enc_pkt) {
@@ -278,16 +284,17 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs
        return AVERROR(ENOMEM);
    }

-    ret = avcodec_send_frame(enc_ctx, NULL);
+    // Send a NULL frame to signal the encoder to flush
+    ret = avcodec_send_frame(enc_ctx_, nullptr);
    if (ret < 0) {
        spdlog::error("Error sending NULL frame to encoder during flush");
        av_packet_free(&enc_pkt);
        return ret;
    }

-    // Write the packets to the output file
+    // Receive and write packets until flushing is complete
    while (true) {
-        ret = avcodec_receive_packet(enc_ctx, enc_pkt);
+        ret = avcodec_receive_packet(enc_ctx_, enc_pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            av_packet_unref(enc_pkt);
            break;
@@ -299,12 +306,12 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs

        // Rescale packet timestamps
        av_packet_rescale_ts(
-            enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
+            enc_pkt, enc_ctx_->time_base, ofmt_ctx_->streams[out_vstream_idx_]->time_base
        );
-        enc_pkt->stream_index = out_vstream_idx;
+        enc_pkt->stream_index = out_vstream_idx_;

        // Write the packet
-        ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
+        ret = av_interleaved_write_frame(ofmt_ctx_, enc_pkt);
        av_packet_unref(enc_pkt);
        if (ret < 0) {
            spdlog::error("Error muxing packet during flush");
@@ -316,3 +323,19 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vs
    av_packet_free(&enc_pkt);
    return 0;
 }
+
+AVCodecContext *Encoder::get_encoder_context() const {
+    return enc_ctx_;
+}
+
+AVFormatContext *Encoder::get_format_context() const {
+    return ofmt_ctx_;
+}
+
+int Encoder::get_output_video_stream_index() const {
+    return out_vstream_idx_;
+}
+
+int *Encoder::get_stream_map() const {
+    return stream_map_;
+}