refactor(*): refactored the encoder and decoder into classes

Signed-off-by: k4yt3x <i@k4yt3x.com>
2026-02-14 09:14:53 +08:00 · 2024-11-17 00:00:00 +00:00
parent b520d51c6c
commit 169509b7d4
6 changed files with 318 additions and 347 deletions
--- a/src/libvideo2x.cpp
+++ b/src/libvideo2x.cpp
@@ -22,65 +22,51 @@ extern "C" {
 static int process_frames(
    EncoderConfig *encoder_config,
    VideoProcessingContext *proc_ctx,
-    AVFormatContext *ifmt_ctx,
-    AVFormatContext *ofmt_ctx,
-    AVCodecContext *dec_ctx,
-    AVCodecContext *enc_ctx,
+    Decoder &decoder,
+    Encoder &encoder,
    Filter *filter,
-    int in_vstream_idx,
-    int out_vstream_idx,
-    int *stream_map,
    bool benchmark = false
 ) {
-    int ret;
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
-    std::vector<AVFrame *> flushed_frames;
+    int ret = 0;

-    // Get the total number of frames in the video with OpenCV
+    // Get required objects
+    AVFormatContext *ifmt_ctx = decoder.get_format_context();
+    AVCodecContext *dec_ctx = decoder.get_codec_context();
+    int in_vstream_idx = decoder.get_video_stream_index();
+    AVFormatContext *ofmt_ctx = encoder.get_format_context();
+    int *stream_map = encoder.get_stream_map();
+
+    // Get total number of frames
    spdlog::debug("Reading total number of frames");
    proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx);

-    // Check if the total number of frames is still 0
    if (proc_ctx->total_frames <= 0) {
        spdlog::warn("Unable to determine the total number of frames");
    } else {
        spdlog::debug("{} frames to process", proc_ctx->total_frames);
    }

-    AVFrame *frame = av_frame_alloc();
-    if (frame == nullptr) {
+    // Allocate frame and packet
+    auto av_frame_deleter = [](AVFrame *frame) { av_frame_free(&frame); };
+    std::unique_ptr<AVFrame, decltype(av_frame_deleter)> frame(av_frame_alloc(), av_frame_deleter);
+    if (!frame) {
        ret = AVERROR(ENOMEM);
        return ret;
    }

-    AVPacket *packet = av_packet_alloc();
-    if (packet == nullptr) {
+    auto av_packet_deleter = [](AVPacket *packet) { av_packet_free(&packet); };
+    std::unique_ptr<AVPacket, decltype(av_packet_deleter)> packet(
+        av_packet_alloc(), av_packet_deleter
+    );
+    if (!packet) {
        spdlog::critical("Could not allocate AVPacket");
-        av_frame_free(&frame);
        return AVERROR(ENOMEM);
    }

-    // Lambda function for cleaning up resources
-    auto cleanup = [&]() {
-        if (frame) {
-            av_frame_free(&frame);
-            frame = nullptr;
-        }
-        if (packet) {
-            av_packet_free(&packet);
-            packet = nullptr;
-        }
-        for (AVFrame *&flushed_frame : flushed_frames) {
-            if (flushed_frame) {
-                av_frame_free(&flushed_frame);
-                flushed_frame = nullptr;
-            }
-        }
-    };
-
    // Read frames from the input file
    while (!proc_ctx->abort) {
-        ret = av_read_frame(ifmt_ctx, packet);
+        ret = av_read_frame(ifmt_ctx, packet.get());
        if (ret < 0) {
            if (ret == AVERROR_EOF) {
                spdlog::debug("Reached end of file");
@@ -88,17 +74,15 @@ static int process_frames(
            }
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error reading packet: {}", errbuf);
-            cleanup();
            return ret;
        }

        if (packet->stream_index == in_vstream_idx) {
-            ret = avcodec_send_packet(dec_ctx, packet);
+            ret = avcodec_send_packet(dec_ctx, packet.get());
            if (ret < 0) {
                av_strerror(ret, errbuf, sizeof(errbuf));
                spdlog::critical("Error sending packet to decoder: {}", errbuf);
-                av_packet_unref(packet);
-                cleanup();
+                av_packet_unref(packet.get());
                return ret;
            }

@@ -108,49 +92,43 @@ static int process_frames(
                    continue;
                }

-                ret = avcodec_receive_frame(dec_ctx, frame);
+                ret = avcodec_receive_frame(dec_ctx, frame.get());
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                    spdlog::debug("Frame not ready");
                    break;
                } else if (ret < 0) {
                    av_strerror(ret, errbuf, sizeof(errbuf));
                    spdlog::critical("Error decoding video frame: {}", errbuf);
-                    av_packet_unref(packet);
-                    cleanup();
+                    av_packet_unref(packet.get());
                    return ret;
                }

-                AVFrame *processed_frame = nullptr;
-                ret = filter->process_frame(frame, &processed_frame);
+                AVFrame *raw_processed_frame = nullptr;
+                ret = filter->process_frame(frame.get(), &raw_processed_frame);
+
                if (ret < 0 && ret != AVERROR(EAGAIN)) {
                    av_strerror(ret, errbuf, sizeof(errbuf));
-                    av_frame_free(&processed_frame);
-                    av_packet_unref(packet);
-                    cleanup();
+                    av_packet_unref(packet.get());
                    return ret;
-                } else if (ret == 0 && processed_frame != nullptr) {
+                } else if (ret == 0 && raw_processed_frame != nullptr) {
+                    auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
+                        raw_processed_frame, av_frame_deleter
+                    );
+
                    if (!benchmark) {
-                        ret = write_frame(
-                            processed_frame,
-                            enc_ctx,
-                            ofmt_ctx,
-                            out_vstream_idx,
-                            proc_ctx->processed_frames
-                        );
+                        ret =
+                            encoder.write_frame(processed_frame.get(), proc_ctx->processed_frames);
                        if (ret < 0) {
                            av_strerror(ret, errbuf, sizeof(errbuf));
                            spdlog::critical("Error encoding/writing frame: {}", errbuf);
-                            av_frame_free(&processed_frame);
-                            av_packet_unref(packet);
-                            cleanup();
+                            av_packet_unref(packet.get());
                            return ret;
                        }
                    }
-                    av_frame_free(&processed_frame);
                    proc_ctx->processed_frames++;
                }

-                av_frame_unref(frame);
+                av_frame_unref(frame.get());
                spdlog::debug(
                    "Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames
                );
@@ -160,58 +138,54 @@ static int process_frames(
            int out_stream_index = stream_map[packet->stream_index];
            AVStream *out_stream = ofmt_ctx->streams[out_stream_index];

-            av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base);
+            av_packet_rescale_ts(packet.get(), in_stream->time_base, out_stream->time_base);
            packet->stream_index = out_stream_index;

-            ret = av_interleaved_write_frame(ofmt_ctx, packet);
+            ret = av_interleaved_write_frame(ofmt_ctx, packet.get());
            if (ret < 0) {
                av_strerror(ret, errbuf, sizeof(errbuf));
                spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
-                av_packet_unref(packet);
-                cleanup();
+                av_packet_unref(packet.get());
                return ret;
            }
        }
-        av_packet_unref(packet);
+        av_packet_unref(packet.get());
    }

    // Flush the filter
-    ret = filter->flush(flushed_frames);
+    std::vector<AVFrame *> raw_flushed_frames;
+    ret = filter->flush(raw_flushed_frames);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error flushing filter: {}", errbuf);
-        cleanup();
        return ret;
    }

+    // Wrap flushed frames in unique_ptrs
+    std::vector<std::unique_ptr<AVFrame, decltype(av_frame_deleter)>> flushed_frames;
+    for (AVFrame *raw_frame : raw_flushed_frames) {
+        flushed_frames.emplace_back(raw_frame, av_frame_deleter);
+    }
+
    // Encode and write all flushed frames
-    for (AVFrame *&flushed_frame : flushed_frames) {
-        ret = write_frame(
-            flushed_frame, enc_ctx, ofmt_ctx, out_vstream_idx, proc_ctx->processed_frames
-        );
+    for (auto &flushed_frame : flushed_frames) {
+        ret = encoder.write_frame(flushed_frame.get(), proc_ctx->processed_frames);
        if (ret < 0) {
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error encoding/writing flushed frame: {}", errbuf);
-            av_frame_free(&flushed_frame);
-            flushed_frame = nullptr;
-            cleanup();
            return ret;
        }
-        av_frame_free(&flushed_frame);
-        flushed_frame = nullptr;
        proc_ctx->processed_frames++;
    }

    // Flush the encoder
-    ret = flush_encoder(enc_ctx, ofmt_ctx, out_vstream_idx);
+    ret = encoder.flush();
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error flushing encoder: {}", errbuf);
-        cleanup();
        return ret;
    }

-    cleanup();
    return ret;
 }

@@ -226,55 +200,10 @@ extern "C" int process_video(
    EncoderConfig *encoder_config,
    VideoProcessingContext *proc_ctx
 ) {
-    AVFormatContext *ifmt_ctx = nullptr;
-    AVFormatContext *ofmt_ctx = nullptr;
-    AVCodecContext *dec_ctx = nullptr;
-    AVCodecContext *enc_ctx = nullptr;
-    AVBufferRef *hw_ctx = nullptr;
-    int *stream_map = nullptr;
-    Filter *filter = nullptr;
-    int in_vstream_idx = -1;
-    int out_vstream_idx = -1;
    char errbuf[AV_ERROR_MAX_STRING_SIZE];
    int ret = 0;

-    // Lambda function for cleaning up resources
-    auto cleanup = [&]() {
-        if (ifmt_ctx) {
-            avformat_close_input(&ifmt_ctx);
-            ifmt_ctx = nullptr;
-        }
-        if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
-            avio_closep(&ofmt_ctx->pb);
-            ofmt_ctx->pb = nullptr;
-        }
-        if (ofmt_ctx) {
-            avformat_free_context(ofmt_ctx);
-            ofmt_ctx = nullptr;
-        }
-        if (dec_ctx) {
-            avcodec_free_context(&dec_ctx);
-            dec_ctx = nullptr;
-        }
-        if (enc_ctx) {
-            avcodec_free_context(&enc_ctx);
-            enc_ctx = nullptr;
-        }
-        if (hw_ctx) {
-            av_buffer_unref(&hw_ctx);
-            hw_ctx = nullptr;
-        }
-        if (stream_map) {
-            av_free(stream_map);
-            stream_map = nullptr;
-        }
-        if (filter) {
-            delete filter;
-            filter = nullptr;
-        }
-    };
-
-    // Set the log level for FFmpeg and spdlog (libvideo2x)
+    // Set the log level for FFmpeg and spdlog
    switch (log_level) {
        case LIBVIDEO2X_LOG_LEVEL_TRACE:
            av_log_set_level(AV_LOG_TRACE);
@@ -314,26 +243,38 @@ extern "C" int process_video(
    std::filesystem::path in_fpath(in_fname);
    std::filesystem::path out_fpath(out_fname);

+    auto hw_ctx_deleter = [](AVBufferRef *ref) {
+        if (ref) {
+            av_buffer_unref(&ref);
+        }
+    };
+    std::unique_ptr<AVBufferRef, decltype(hw_ctx_deleter)> hw_ctx(nullptr, hw_ctx_deleter);
+
    // Initialize hardware device context
    if (hw_type != AV_HWDEVICE_TYPE_NONE) {
-        ret = av_hwdevice_ctx_create(&hw_ctx, hw_type, NULL, NULL, 0);
+        AVBufferRef *tmp_hw_ctx = nullptr;
+        ret = av_hwdevice_ctx_create(&tmp_hw_ctx, hw_type, NULL, NULL, 0);
        if (ret < 0) {
            av_strerror(ret, errbuf, sizeof(errbuf));
            spdlog::critical("Error initializing hardware device context: {}", errbuf);
-            cleanup();
            return ret;
        }
+        hw_ctx.reset(tmp_hw_ctx);
    }

-    // Initialize input
-    ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &in_vstream_idx);
+    // Initialize input decoder
+    Decoder decoder;
+    ret = decoder.init(hw_type, hw_ctx.get(), in_fpath);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Failed to initialize decoder: {}", errbuf);
-        cleanup();
        return ret;
    }

+    AVFormatContext *ifmt_ctx = decoder.get_format_context();
+    AVCodecContext *dec_ctx = decoder.get_codec_context();
+    int in_vstream_idx = decoder.get_video_stream_index();
+
    // Initialize output dimensions based on filter configuration
    int output_width = 0, output_height = 0;
    switch (filter_config->filter_type) {
@@ -347,116 +288,85 @@ extern "C" int process_video(
            break;
        default:
            spdlog::critical("Unknown filter type");
-            cleanup();
            return -1;
    }
    spdlog::debug("Output video dimensions: {}x{}", output_width, output_height);

-    // Initialize output encoder
+    // Update encoder configuration with output dimensions
    encoder_config->out_width = output_width;
    encoder_config->out_height = output_height;
-    ret = init_encoder(
-        hw_ctx,
-        out_fpath,
-        ifmt_ctx,
-        &ofmt_ctx,
-        &enc_ctx,
-        dec_ctx,
-        encoder_config,
-        in_vstream_idx,
-        &out_vstream_idx,
-        &stream_map
-    );
+
+    // Initialize the encoder
+    Encoder encoder;
+    ret = encoder.init(hw_ctx.get(), out_fpath, ifmt_ctx, dec_ctx, encoder_config, in_vstream_idx);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Failed to initialize encoder: {}", errbuf);
-        cleanup();
        return ret;
    }

    // Write the output file header
-    ret = avformat_write_header(ofmt_ctx, NULL);
+    ret = avformat_write_header(encoder.get_format_context(), NULL);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error occurred when opening output file: {}", errbuf);
-        cleanup();
        return ret;
    }

    // Create and initialize the appropriate filter
+    std::unique_ptr<Filter> filter;
    if (filter_config->filter_type == FILTER_LIBPLACEBO) {
        const auto &config = filter_config->config.libplacebo;
        if (!config.shader_path) {
            spdlog::critical("Shader path must be provided for the libplacebo filter");
-            cleanup();
            return -1;
        }
-        filter = new LibplaceboFilter{
+        filter = std::make_unique<LibplaceboFilter>(
            vk_device_index,
            std::filesystem::path(config.shader_path),
            config.out_width,
            config.out_height
-        };
+        );
    } else if (filter_config->filter_type == FILTER_REALESRGAN) {
        const auto &config = filter_config->config.realesrgan;
        if (!config.model_name) {
            spdlog::critical("Model name must be provided for the RealESRGAN filter");
-            cleanup();
            return -1;
        }
-        filter = new RealesrganFilter{
+        filter = std::make_unique<RealesrganFilter>(
            static_cast<int>(vk_device_index),
            config.tta_mode,
            config.scaling_factor,
            config.model_name
-        };
+        );
    } else {
        spdlog::critical("Unknown filter type");
-        cleanup();
        return -1;
    }

    // Check if the filter instance was created successfully
    if (filter == nullptr) {
        spdlog::critical("Failed to create filter instance");
-        cleanup();
        return -1;
    }

    // Initialize the filter
-    ret = filter->init(dec_ctx, enc_ctx, hw_ctx);
+    ret = filter->init(dec_ctx, encoder.get_encoder_context(), hw_ctx.get());
    if (ret < 0) {
        spdlog::critical("Failed to initialize filter");
-        cleanup();
        return ret;
    }

-    // Process frames
-    ret = process_frames(
-        encoder_config,
-        proc_ctx,
-        ifmt_ctx,
-        ofmt_ctx,
-        dec_ctx,
-        enc_ctx,
-        filter,
-        in_vstream_idx,
-        out_vstream_idx,
-        stream_map,
-        benchmark
-    );
+    // Process frames using the encoder and decoder
+    ret = process_frames(encoder_config, proc_ctx, decoder, encoder, filter.get(), benchmark);
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error processing frames: {}", errbuf);
-        cleanup();
        return ret;
    }

    // Write the output file trailer
-    av_write_trailer(ofmt_ctx);
-
-    // Cleanup before returning
-    cleanup();
+    av_write_trailer(encoder.get_format_context());

    if (ret < 0 && ret != AVERROR_EOF) {
        av_strerror(ret, errbuf, sizeof(errbuf));