feat(rife): add support for frame interpolation and RIFE (#1244)

* feat: add RIFE files and processor/interpolator abstractions * feat: add `rife` as processor option * feat: add frame interpolation math except first frame * feat: complete motion interpolation and add scene detection * feat: improve Vulkan device validation * fix: fix casting issues and variable names * refactor: improve error-checking; add abstractions and factories * refactor: improve readability of the frames processor * docs: update changelog Signed-off-by: k4yt3x <i@k4yt3x.com>
2026-02-13 08:34:44 +08:00 · 2024-12-01 09:55:56 +00:00
parent 2fc89e3883
commit 627f3d84a4
84 changed files with 4914 additions and 615 deletions
--- a/src/avutils.cpp
+++ b/src/avutils.cpp
@@ -1,5 +1,7 @@
 #include "avutils.h"

+#include <cstdint>
+
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavutil/pixdesc.h>
@@ -7,6 +9,25 @@ extern "C" {

 #include <spdlog/spdlog.h>

+#include "conversions.h"
+
+AVRational get_video_frame_rate(AVFormatContext *ifmt_ctx, int in_vstream_idx) {
+    AVRational frame_rate = ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate;
+    if (frame_rate.num == 0 && frame_rate.den == 0) {
+        frame_rate = ifmt_ctx->streams[in_vstream_idx]->r_frame_rate;
+    }
+    if (frame_rate.num == 0 && frame_rate.den == 0) {
+        frame_rate = av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[in_vstream_idx], nullptr);
+    }
+    if (frame_rate.num == 0 && frame_rate.den == 0) {
+        frame_rate = ifmt_ctx->streams[in_vstream_idx]->time_base;
+    }
+    if (frame_rate.num == 0 && frame_rate.den == 0) {
+        spdlog::warn("Unable to determine the video's frame rate");
+    }
+    return frame_rate;
+}
+
 int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx) {
    // Use the 'nb_frames' field if it is available
    int64_t nb_frames = ifmt_ctx->streams[in_vstream_idx]->nb_frames;
@@ -31,19 +52,7 @@ int64_t get_video_frame_count(AVFormatContext *ifmt_ctx, int in_vstream_idx) {
    spdlog::debug("Video duration: {}s", duration_secs);

    // Calculate average FPS
-    double fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate);
-    if (fps <= 0) {
-        spdlog::debug("Unable to read the average frame rate from 'avg_frame_rate'");
-        fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->r_frame_rate);
-    }
-    if (fps <= 0) {
-        spdlog::debug("Unable to read the average frame rate from 'r_frame_rate'");
-        fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[in_vstream_idx], nullptr));
-    }
-    if (fps <= 0) {
-        spdlog::debug("Unable to estimate the average frame rate with 'av_guess_frame_rate'");
-        fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->time_base);
-    }
+    double fps = av_q2d(get_video_frame_rate(ifmt_ctx, in_vstream_idx));
    if (fps <= 0) {
        spdlog::warn("Unable to estimate the video's average frame rate");
        return -1;
@@ -122,3 +131,58 @@ get_encoder_default_pix_fmt(const AVCodec *encoder, AVPixelFormat target_pix_fmt

    return best_pix_fmt;
 }
+
+float get_frame_diff(AVFrame *frame1, AVFrame *frame2) {
+    if (!frame1 || !frame2) {
+        spdlog::error("Invalid frame(s) provided for comparison");
+        return -1.0f;
+    }
+
+    if (frame1->width != frame2->width || frame1->height != frame2->height) {
+        spdlog::error("Frame dimensions do not match");
+        return -1.0f;
+    }
+
+    int width = frame1->width;
+    int height = frame1->height;
+
+    // Convert both frames to the target pixel format using the provided function
+    AVPixelFormat target_pix_fmt = AV_PIX_FMT_RGB24;
+    AVFrame *rgb_frame1 = convert_avframe_pix_fmt(frame1, target_pix_fmt);
+    AVFrame *rgb_frame2 = convert_avframe_pix_fmt(frame2, target_pix_fmt);
+
+    if (!rgb_frame1 || !rgb_frame2) {
+        spdlog::error("Failed to convert frames to target pixel format");
+        if (rgb_frame1) {
+            av_frame_free(&rgb_frame1);
+        }
+        if (rgb_frame2) {
+            av_frame_free(&rgb_frame2);
+        }
+        return -1.0f;
+    }
+
+    uint64_t sum_diff = 0;
+    uint64_t max_diff = 0;
+
+    // Calculate difference pixel by pixel
+    for (int y = 0; y < height; y++) {
+        uint8_t *ptr1 = rgb_frame1->data[0] + y * rgb_frame1->linesize[0];
+        uint8_t *ptr2 = rgb_frame2->data[0] + y * rgb_frame2->linesize[0];
+        for (int x = 0; x < width * 3; x++) {
+            sum_diff += static_cast<uint64_t>(
+                std::abs(static_cast<int>(ptr1[x]) - static_cast<int>(ptr2[x]))
+            );
+            max_diff += 255;
+        }
+    }
+
+    // Clean up
+    av_frame_free(&rgb_frame1);
+    av_frame_free(&rgb_frame2);
+
+    // Calculate percentage difference
+    float percent_diff = (static_cast<float>(sum_diff) / static_cast<float>(max_diff)) * 100.0f;
+
+    return percent_diff;
+}
--- a/src/encoder.cpp
+++ b/src/encoder.cpp
@@ -33,6 +33,7 @@ int Encoder::init(
    AVFormatContext *ifmt_ctx,
    AVCodecContext *dec_ctx,
    EncoderConfig *encoder_config,
+    const ProcessorConfig *processor_config,
    int in_vstream_idx
 ) {
    int ret;
@@ -121,18 +122,26 @@ int Encoder::init(
        spdlog::debug("Auto-selected pixel format: {}", av_get_pix_fmt_name(enc_ctx_->pix_fmt));
    }

-    // Set the output video's time base
-    if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
-        enc_ctx_->time_base = dec_ctx->time_base;
+    if (processor_config->frm_rate_mul > 0) {
+        AVRational in_frame_rate = get_video_frame_rate(ifmt_ctx, in_vstream_idx);
+        enc_ctx_->framerate = {
+            in_frame_rate.num * processor_config->frm_rate_mul, in_frame_rate.den
+        };
+        enc_ctx_->time_base = av_inv_q(enc_ctx_->framerate);
    } else {
-        enc_ctx_->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr));
-    }
+        // Set the output video's time base
+        if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
+            enc_ctx_->time_base = dec_ctx->time_base;
+        } else {
+            enc_ctx_->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr));
+        }

-    // Set the output video's frame rate
-    if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
-        enc_ctx_->framerate = dec_ctx->framerate;
-    } else {
-        enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr);
+        // Set the output video's frame rate
+        if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
+            enc_ctx_->framerate = dec_ctx->framerate;
+        } else {
+            enc_ctx_->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, nullptr);
+        }
    }

    // Set extra AVOptions
@@ -230,6 +239,13 @@ int Encoder::init(
        }
    }

+    // Write the output file header
+    ret = avformat_write_header(ofmt_ctx_, nullptr);
+    if (ret < 0) {
+        spdlog::error("Error writing output file header");
+        return ret;
+    }
+
    return 0;
 }

--- a/src/filter_libplacebo.cpp
+++ b/src/filter_libplacebo.cpp
@@ -1,4 +1,4 @@
-#include "libplacebo_filter.h"
+#include "filter_libplacebo.h"

 #include <cstdio>

@@ -8,81 +8,81 @@
 #include "fsutils.h"
 #include "libplacebo.h"

-LibplaceboFilter::LibplaceboFilter(
+FilterLibplacebo::FilterLibplacebo(
    uint32_t vk_device_index,
    const std::filesystem::path &shader_path,
-    int out_width,
-    int out_height
+    int width,
+    int height
 )
-    : filter_graph(nullptr),
-      buffersrc_ctx(nullptr),
-      buffersink_ctx(nullptr),
-      vk_device_index(vk_device_index),
-      shader_path(std::move(shader_path)),
-      out_width(out_width),
-      out_height(out_height) {}
+    : filter_graph_(nullptr),
+      buffersrc_ctx_(nullptr),
+      buffersink_ctx_(nullptr),
+      vk_device_index_(vk_device_index),
+      shader_path_(std::move(shader_path)),
+      width_(width),
+      height_(height) {}

-LibplaceboFilter::~LibplaceboFilter() {
-    if (buffersrc_ctx) {
-        avfilter_free(buffersrc_ctx);
-        buffersrc_ctx = nullptr;
+FilterLibplacebo::~FilterLibplacebo() {
+    if (buffersrc_ctx_) {
+        avfilter_free(buffersrc_ctx_);
+        buffersrc_ctx_ = nullptr;
    }
-    if (buffersink_ctx) {
-        avfilter_free(buffersink_ctx);
-        buffersink_ctx = nullptr;
+    if (buffersink_ctx_) {
+        avfilter_free(buffersink_ctx_);
+        buffersink_ctx_ = nullptr;
    }
-    if (filter_graph) {
-        avfilter_graph_free(&filter_graph);
-        filter_graph = nullptr;
+    if (filter_graph_) {
+        avfilter_graph_free(&filter_graph_);
+        filter_graph_ = nullptr;
    }
 }

-int LibplaceboFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *_) {
+int FilterLibplacebo::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *) {
    // Construct the shader path
    std::filesystem::path shader_full_path;
-    if (filepath_is_readable(shader_path)) {
+    if (filepath_is_readable(shader_path_)) {
        // If the shader path is directly readable, use it
-        shader_full_path = shader_path;
+        shader_full_path = shader_path_;
    } else {
        // Construct the fallback path using std::filesystem
        shader_full_path = find_resource_file(
            std::filesystem::path(STR("models")) / STR("libplacebo") /
-            (path_to_string_type(shader_path) + STR(".glsl"))
+            (path_to_string_type(shader_path_) + STR(".glsl"))
        );
    }

    // Check if the shader file exists
    if (!std::filesystem::exists(shader_full_path)) {
-        spdlog::error("libplacebo shader file not found: '{}'", shader_path.u8string());
+        spdlog::error("libplacebo shader file not found: '{}'", shader_path_.u8string());
        return -1;
    }

    // Save the output time base
-    in_time_base = dec_ctx->time_base;
-    out_time_base = enc_ctx->time_base;
+    in_time_base_ = dec_ctx->time_base;
+    out_time_base_ = enc_ctx->time_base;

    // Initialize the libplacebo filter
    int ret = init_libplacebo(
-        &filter_graph,
-        &buffersrc_ctx,
-        &buffersink_ctx,
+        &filter_graph_,
+        &buffersrc_ctx_,
+        &buffersink_ctx_,
        dec_ctx,
-        out_width,
-        out_height,
-        vk_device_index,
+        width_,
+        height_,
+        vk_device_index_,
        shader_full_path
    );

    // Set these resources to nullptr since they are already freed by `avfilter_graph_free`
    if (ret < 0) {
-        buffersrc_ctx = nullptr;
-        buffersink_ctx = nullptr;
-        filter_graph = nullptr;
+        buffersrc_ctx_ = nullptr;
+        buffersink_ctx_ = nullptr;
+        filter_graph_ = nullptr;
    }
    return ret;
 }

-int LibplaceboFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
+int FilterLibplacebo::filter(AVFrame *in_frame, AVFrame **out_frame) {
    int ret;

    // Get the filtered frame
@@ -93,28 +93,28 @@ int LibplaceboFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
    }

    // Feed the frame to the filter graph
-    ret = av_buffersrc_add_frame(buffersrc_ctx, in_frame);
+    ret = av_buffersrc_add_frame(buffersrc_ctx_, in_frame);
    if (ret < 0) {
        spdlog::error("Error while feeding the filter graph");
        av_frame_free(out_frame);
        return ret;
    }

-    ret = av_buffersink_get_frame(buffersink_ctx, *out_frame);
+    ret = av_buffersink_get_frame(buffersink_ctx_, *out_frame);
    if (ret < 0) {
        av_frame_free(out_frame);
        return ret;
    }

    // Rescale PTS to encoder's time base
-    (*out_frame)->pts = av_rescale_q((*out_frame)->pts, in_time_base, out_time_base);
+    (*out_frame)->pts = av_rescale_q((*out_frame)->pts, in_time_base_, out_time_base_);

    // Return the processed frame to the caller
    return 0;
 }

-int LibplaceboFilter::flush(std::vector<AVFrame *> &flushed_frames) {
-    int ret = av_buffersrc_add_frame(buffersrc_ctx, nullptr);
+int FilterLibplacebo::flush(std::vector<AVFrame *> &flushed_frames) {
+    int ret = av_buffersrc_add_frame(buffersrc_ctx_, nullptr);
    if (ret < 0) {
        spdlog::error("Error while flushing filter graph");
        return ret;
@@ -127,7 +127,7 @@ int LibplaceboFilter::flush(std::vector<AVFrame *> &flushed_frames) {
            return AVERROR(ENOMEM);
        }

-        ret = av_buffersink_get_frame(buffersink_ctx, filt_frame);
+        ret = av_buffersink_get_frame(buffersink_ctx_, filt_frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            av_frame_free(&filt_frame);
            break;
@@ -138,7 +138,7 @@ int LibplaceboFilter::flush(std::vector<AVFrame *> &flushed_frames) {
        }

        // Rescale PTS to encoder's time base
-        filt_frame->pts = av_rescale_q(filt_frame->pts, in_time_base, out_time_base);
+        filt_frame->pts = av_rescale_q(filt_frame->pts, in_time_base_, out_time_base_);

        // Add to processed frames
        flushed_frames.push_back(filt_frame);
@@ -146,3 +146,14 @@ int LibplaceboFilter::flush(std::vector<AVFrame *> &flushed_frames) {

    return 0;
 }
+
+void FilterLibplacebo::get_output_dimensions(
+    const ProcessorConfig *processor_config,
+    int,
+    int,
+    int &out_width,
+    int &out_height
+) const {
+    out_width = processor_config->width;
+    out_height = processor_config->height;
+}
--- a/src/filter_realesrgan.cpp
+++ b/src/filter_realesrgan.cpp
@@ -1,4 +1,4 @@
-#include "realesrgan_filter.h"
+#include "filter_realesrgan.h"

 #include <cstdint>
 #include <cstdio>
@@ -9,34 +9,34 @@
 #include "conversions.h"
 #include "fsutils.h"

-RealesrganFilter::RealesrganFilter(
+FilterRealesrgan::FilterRealesrgan(
    int gpuid,
    bool tta_mode,
    int scaling_factor,
    const StringType model_name
 )
-    : realesrgan(nullptr),
-      gpuid(gpuid),
-      tta_mode(tta_mode),
-      scaling_factor(scaling_factor),
-      model_name(std::move(model_name)) {}
+    : realesrgan_(nullptr),
+      gpuid_(gpuid),
+      tta_mode_(tta_mode),
+      scaling_factor_(scaling_factor),
+      model_name_(std::move(model_name)) {}

-RealesrganFilter::~RealesrganFilter() {
-    if (realesrgan) {
-        delete realesrgan;
-        realesrgan = nullptr;
+FilterRealesrgan::~FilterRealesrgan() {
+    if (realesrgan_) {
+        delete realesrgan_;
+        realesrgan_ = nullptr;
    }
 }

-int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *_) {
+int FilterRealesrgan::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *_) {
    // Construct the model paths using std::filesystem
    std::filesystem::path model_param_path;
    std::filesystem::path model_bin_path;

    StringType param_file_name =
-        model_name + STR("-x") + to_string_type(scaling_factor) + STR(".param");
+        model_name_ + STR("-x") + to_string_type(scaling_factor_) + STR(".param");
    StringType bin_file_name =
-        model_name + STR("-x") + to_string_type(scaling_factor) + STR(".bin");
+        model_name_ + STR("-x") + to_string_type(scaling_factor_) + STR(".bin");

    // Find the model paths by model name if provided
    model_param_path = std::filesystem::path(STR("models")) / STR("realesrgan") / param_file_name;
@@ -57,39 +57,39 @@ int RealesrganFilter::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVB
    }

    // Create a new RealESRGAN instance
-    realesrgan = new RealESRGAN(gpuid, tta_mode);
+    realesrgan_ = new RealESRGAN(gpuid_, tta_mode_);

    // Store the time bases
-    in_time_base = dec_ctx->time_base;
-    out_time_base = enc_ctx->time_base;
-    out_pix_fmt = enc_ctx->pix_fmt;
+    in_time_base_ = dec_ctx->time_base;
+    out_time_base_ = enc_ctx->time_base;
+    out_pix_fmt_ = enc_ctx->pix_fmt;

    // Load the model
-    if (realesrgan->load(model_param_full_path, model_bin_full_path) != 0) {
+    if (realesrgan_->load(model_param_full_path, model_bin_full_path) != 0) {
        spdlog::error("Failed to load RealESRGAN model");
        return -1;
    }

    // Set RealESRGAN parameters
-    realesrgan->scale = scaling_factor;
-    realesrgan->prepadding = 10;
+    realesrgan_->scale = scaling_factor_;
+    realesrgan_->prepadding = 10;

    // Calculate tilesize based on GPU heap budget
-    uint32_t heap_budget = ncnn::get_gpu_device(gpuid)->get_heap_budget();
+    uint32_t heap_budget = ncnn::get_gpu_device(gpuid_)->get_heap_budget();
    if (heap_budget > 1900) {
-        realesrgan->tilesize = 200;
+        realesrgan_->tilesize = 200;
    } else if (heap_budget > 550) {
-        realesrgan->tilesize = 100;
+        realesrgan_->tilesize = 100;
    } else if (heap_budget > 190) {
-        realesrgan->tilesize = 64;
+        realesrgan_->tilesize = 64;
    } else {
-        realesrgan->tilesize = 32;
+        realesrgan_->tilesize = 32;
    }

    return 0;
 }

-int RealesrganFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
+int FilterRealesrgan::filter(AVFrame *in_frame, AVFrame **out_frame) {
    int ret;

    // Convert the input frame to RGB24
@@ -99,23 +99,34 @@ int RealesrganFilter::process_frame(AVFrame *in_frame, AVFrame **out_frame) {
        return -1;
    }

-    // Allocate space for ouptut ncnn::Mat
-    int output_width = in_mat.w * realesrgan->scale;
-    int output_height = in_mat.h * realesrgan->scale;
+    // Allocate space for output ncnn::Mat
+    int output_width = in_mat.w * realesrgan_->scale;
+    int output_height = in_mat.h * realesrgan_->scale;
    ncnn::Mat out_mat = ncnn::Mat(output_width, output_height, static_cast<size_t>(3), 3);

-    ret = realesrgan->process(in_mat, out_mat);
+    ret = realesrgan_->process(in_mat, out_mat);
    if (ret != 0) {
        spdlog::error("RealESRGAN processing failed");
        return ret;
    }

    // Convert ncnn::Mat to AVFrame
-    *out_frame = ncnn_mat_to_avframe(out_mat, out_pix_fmt);
+    *out_frame = ncnn_mat_to_avframe(out_mat, out_pix_fmt_);

    // Rescale PTS to encoder's time base
-    (*out_frame)->pts = av_rescale_q(in_frame->pts, in_time_base, out_time_base);
+    (*out_frame)->pts = av_rescale_q(in_frame->pts, in_time_base_, out_time_base_);

    // Return the processed frame to the caller
    return ret;
 }
+
+void FilterRealesrgan::get_output_dimensions(
+    const ProcessorConfig *,
+    int in_width,
+    int in_height,
+    int &out_width,
+    int &out_height
+) const {
+    out_width = in_width * scaling_factor_;
+    out_height = in_height * scaling_factor_;
+}
--- a/src/frames_processor.cpp
+++ b/src/frames_processor.cpp
@@ -0,0 +1,371 @@
+#include "frames_processor.h"
+
+extern "C" {
+#include <libavutil/avutil.h>
+}
+
+#include <spdlog/spdlog.h>
+
+#include "avutils.h"
+
+// Deleter for AVFrame unique_ptr
+auto av_frame_deleter = [](AVFrame *frame) {
+    if (frame != nullptr) {
+        av_frame_free(&frame);
+        frame = nullptr;
+    }
+};
+
+// Deleter for AVPacket unique_ptr
+auto av_packet_deleter = [](AVPacket *packet) {
+    if (packet != nullptr) {
+        av_packet_unref(packet);
+        av_packet_free(&packet);
+        packet = nullptr;
+    }
+};
+
+// Sets the total number of frames to process in the VideoProcessingContext
+void set_total_frames(
+    const ProcessorConfig *processor_config,
+    VideoProcessingContext *proc_ctx,
+    AVFormatContext *ifmt_ctx,
+    int in_vstream_idx,
+    Processor *processor
+) {
+    spdlog::debug("Estimating the total number of frames to process");
+    proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx);
+
+    if (proc_ctx->total_frames <= 0) {
+        spdlog::warn("Unable to determine the total number of frames");
+        proc_ctx->total_frames = 0;
+    } else {
+        spdlog::debug("{} frames to process", proc_ctx->total_frames);
+    }
+
+    // Set total frames for interpolation
+    if (processor->get_processing_mode() == PROCESSING_MODE_INTERPOLATE) {
+        proc_ctx->total_frames *= processor_config->frm_rate_mul;
+    }
+}
+
+int write_frame(
+    AVFrame *frame,
+    VideoProcessingContext *proc_ctx,
+    Encoder &encoder,
+    bool benchmark
+) {
+    char errbuf[AV_ERROR_MAX_STRING_SIZE];
+    int ret = 0;
+
+    if (!benchmark) {
+        // Set the frame type to none to let the encoder decide
+        frame->pict_type = AV_PICTURE_TYPE_NONE;
+        ret = encoder.write_frame(frame, proc_ctx->processed_frames);
+        if (ret < 0) {
+            av_strerror(ret, errbuf, sizeof(errbuf));
+            spdlog::critical("Error encoding/writing frame: {}", errbuf);
+        }
+    }
+    return ret;
+}
+
+int write_raw_packet(
+    AVPacket *packet,
+    AVFormatContext *ifmt_ctx,
+    AVFormatContext *ofmt_ctx,
+    int *stream_map
+) {
+    char errbuf[AV_ERROR_MAX_STRING_SIZE];
+    int ret = 0;
+
+    AVStream *in_stream = ifmt_ctx->streams[packet->stream_index];
+    int out_stream_index = stream_map[packet->stream_index];
+    AVStream *out_stream = ofmt_ctx->streams[out_stream_index];
+
+    av_packet_rescale_ts(packet, in_stream->time_base, out_stream->time_base);
+    packet->stream_index = out_stream_index;
+
+    ret = av_interleaved_write_frame(ofmt_ctx, packet);
+    if (ret < 0) {
+        av_strerror(ret, errbuf, sizeof(errbuf));
+        spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
+    }
+    return ret;
+}
+
+int process_filtering(
+    Processor *processor,
+    VideoProcessingContext *proc_ctx,
+    Encoder &encoder,
+    bool benchmark,
+    AVFrame *frame,
+    AVFrame *raw_processed_frame
+) {
+    char errbuf[AV_ERROR_MAX_STRING_SIZE];
+    int ret = 0;
+
+    // Cast the processor to a Filter
+    Filter *filter = static_cast<Filter *>(processor);
+
+    // Process the frame using the filter
+    ret = filter->filter(frame, &raw_processed_frame);
+
+    // Write the processed frame
+    if (ret < 0 && ret != AVERROR(EAGAIN)) {
+        av_strerror(ret, errbuf, sizeof(errbuf));
+        spdlog::critical("Error filtering frame: {}", errbuf);
+    } else if (ret == 0 && raw_processed_frame != nullptr) {
+        auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
+            raw_processed_frame, av_frame_deleter
+        );
+        ret = write_frame(processed_frame.get(), proc_ctx, encoder, benchmark);
+    }
+    return ret;
+}
+
+int process_interpolation(
+    Processor *processor,
+    const ProcessorConfig *processor_config,
+    VideoProcessingContext *proc_ctx,
+    Encoder &encoder,
+    bool benchmark,
+    std::unique_ptr<AVFrame, decltype(av_frame_deleter)> &prev_frame,
+    AVFrame *frame,
+    AVFrame *raw_processed_frame
+) {
+    char errbuf[AV_ERROR_MAX_STRING_SIZE];
+    int ret = 0;
+
+    // Cast the processor to an Interpolator
+    Interpolator *interpolator = static_cast<Interpolator *>(processor);
+
+    // Calculate the time step for each frame
+    float time_step = 1.0f / static_cast<float>(processor_config->frm_rate_mul);
+    float current_time_step = time_step;
+
+    // Check if a scene change is detected
+    bool skip_frame = false;
+    if (prev_frame != nullptr) {
+        float frame_diff = get_frame_diff(prev_frame.get(), frame);
+        if (frame_diff > processor_config->scn_det_thresh) {
+            spdlog::debug(
+                "Scene change detected ({:.2f}%), skipping frame {}",
+                frame_diff,
+                proc_ctx->processed_frames
+            );
+            skip_frame = true;
+        }
+    }
+
+    // Write the interpolated frames
+    for (int i = 0; i < processor_config->frm_rate_mul - 1; i++) {
+        // Skip interpolation if this is the first frame
+        if (prev_frame == nullptr) {
+            break;
+        }
+
+        // Get the interpolated frame from the interpolator
+        if (!skip_frame) {
+            ret = interpolator->interpolate(
+                prev_frame.get(), frame, &raw_processed_frame, current_time_step
+            );
+        } else {
+            ret = 0;
+            raw_processed_frame = av_frame_clone(prev_frame.get());
+        }
+
+        // Write the interpolated frame
+        if (ret < 0 && ret != AVERROR(EAGAIN)) {
+            av_strerror(ret, errbuf, sizeof(errbuf));
+            spdlog::critical("Error interpolating frame: {}", errbuf);
+            return ret;
+        } else if (ret == 0 && raw_processed_frame != nullptr) {
+            auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
+                raw_processed_frame, av_frame_deleter
+            );
+
+            processed_frame->pts = proc_ctx->processed_frames;
+            ret = write_frame(processed_frame.get(), proc_ctx, encoder, benchmark);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+        proc_ctx->processed_frames++;
+        current_time_step += time_step;
+    }
+
+    // Write the original frame
+    frame->pts = proc_ctx->processed_frames;
+    ret = write_frame(frame, proc_ctx, encoder, benchmark);
+
+    // Update the previous frame with the current frame
+    prev_frame.reset(av_frame_clone(frame));
+    return ret;
+}
+
+// Process frames using the selected filter.
+int process_frames(
+    const EncoderConfig *encoder_config,
+    const ProcessorConfig *processor_config,
+    VideoProcessingContext *proc_ctx,
+    Decoder &decoder,
+    Encoder &encoder,
+    Processor *processor,
+    bool benchmark
+) {
+    char errbuf[AV_ERROR_MAX_STRING_SIZE];
+    int ret = 0;
+
+    // Get required objects
+    AVFormatContext *ifmt_ctx = decoder.get_format_context();
+    AVCodecContext *dec_ctx = decoder.get_codec_context();
+    int in_vstream_idx = decoder.get_video_stream_index();
+    AVFormatContext *ofmt_ctx = encoder.get_format_context();
+    int *stream_map = encoder.get_stream_map();
+
+    // Reference to the previous frame does not require allocation
+    // It will be cloned from the current frame
+    std::unique_ptr<AVFrame, decltype(av_frame_deleter)> prev_frame(nullptr, av_frame_deleter);
+
+    // Allocate space for the decoded frames
+    std::unique_ptr<AVFrame, decltype(av_frame_deleter)> frame(av_frame_alloc(), av_frame_deleter);
+    if (frame == nullptr) {
+        spdlog::critical("Error allocating frame");
+        return AVERROR(ENOMEM);
+    }
+
+    // Allocate space for the decoded packets
+    std::unique_ptr<AVPacket, decltype(av_packet_deleter)> packet(
+        av_packet_alloc(), av_packet_deleter
+    );
+    if (packet == nullptr) {
+        spdlog::critical("Error allocating packet");
+        return AVERROR(ENOMEM);
+    }
+
+    // Set the total number of frames in the VideoProcessingContext
+    set_total_frames(processor_config, proc_ctx, ifmt_ctx, in_vstream_idx, processor);
+
+    // Read frames from the input file
+    while (!proc_ctx->abort) {
+        ret = av_read_frame(ifmt_ctx, packet.get());
+        if (ret < 0) {
+            if (ret == AVERROR_EOF) {
+                spdlog::debug("Reached end of file");
+                break;
+            }
+            av_strerror(ret, errbuf, sizeof(errbuf));
+            spdlog::critical("Error reading packet: {}", errbuf);
+            return ret;
+        }
+
+        if (packet->stream_index == in_vstream_idx) {
+            // Send the packet to the decoder for decoding
+            ret = avcodec_send_packet(dec_ctx, packet.get());
+            if (ret < 0) {
+                av_strerror(ret, errbuf, sizeof(errbuf));
+                spdlog::critical("Error sending packet to decoder: {}", errbuf);
+                return ret;
+            }
+
+            // Process frames decoded from the packet
+            while (!proc_ctx->abort) {
+                // Sleep for 100 ms if processing is paused
+                if (proc_ctx->pause) {
+                    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+                    continue;
+                }
+
+                // Receive the decoded frame from the decoder
+                ret = avcodec_receive_frame(dec_ctx, frame.get());
+                if (ret == AVERROR(EAGAIN)) {
+                    // No more frames from this packet
+                    break;
+                } else if (ret < 0) {
+                    av_strerror(ret, errbuf, sizeof(errbuf));
+                    spdlog::critical("Error decoding video frame: {}", errbuf);
+                    return ret;
+                }
+
+                AVFrame *raw_processed_frame = nullptr;
+
+                // Process the frame based on the selected processing mode
+                switch (processor->get_processing_mode()) {
+                    case PROCESSING_MODE_FILTER: {
+                        ret = process_filtering(
+                            processor,
+                            proc_ctx,
+                            encoder,
+                            benchmark,
+                            frame.get(),
+                            raw_processed_frame
+                        );
+                        break;
+                    }
+                    case PROCESSING_MODE_INTERPOLATE: {
+                        ret = process_interpolation(
+                            processor,
+                            processor_config,
+                            proc_ctx,
+                            encoder,
+                            benchmark,
+                            prev_frame,
+                            frame.get(),
+                            raw_processed_frame
+                        );
+                        break;
+                    }
+                    default:
+                        spdlog::critical("Unknown processing mode");
+                        return -1;
+                }
+                if (ret < 0 && ret != AVERROR(EAGAIN)) {
+                    return ret;
+                }
+                av_frame_unref(frame.get());
+                proc_ctx->processed_frames++;
+                spdlog::debug(
+                    "Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames
+                );
+            }
+        } else if (encoder_config->copy_streams && stream_map[packet->stream_index] >= 0) {
+            write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map);
+        }
+        av_packet_unref(packet.get());
+    }
+
+    // Flush the filter
+    std::vector<AVFrame *> raw_flushed_frames;
+    ret = processor->flush(raw_flushed_frames);
+    if (ret < 0) {
+        av_strerror(ret, errbuf, sizeof(errbuf));
+        spdlog::critical("Error flushing filter: {}", errbuf);
+        return ret;
+    }
+
+    // Wrap flushed frames in unique_ptrs
+    std::vector<std::unique_ptr<AVFrame, decltype(av_frame_deleter)>> flushed_frames;
+    for (AVFrame *raw_frame : raw_flushed_frames) {
+        flushed_frames.emplace_back(raw_frame, av_frame_deleter);
+    }
+
+    // Encode and write all flushed frames
+    for (auto &flushed_frame : flushed_frames) {
+        ret = write_frame(flushed_frame.get(), proc_ctx, encoder, benchmark);
+        if (ret < 0) {
+            return ret;
+        }
+        proc_ctx->processed_frames++;
+    }
+
+    // Flush the encoder
+    ret = encoder.flush();
+    if (ret < 0) {
+        av_strerror(ret, errbuf, sizeof(errbuf));
+        spdlog::critical("Error flushing encoder: {}", errbuf);
+        return ret;
+    }
+
+    return ret;
+}
--- a/src/interpolator_rife.cpp
+++ b/src/interpolator_rife.cpp
@@ -0,0 +1,121 @@
+#include "interpolator_rife.h"
+
+#include <cstdio>
+#include <filesystem>
+
+#include <spdlog/spdlog.h>
+
+#include "conversions.h"
+#include "fsutils.h"
+
+InterpolatorRIFE::InterpolatorRIFE(
+    int gpuid,
+    bool tta_mode,
+    bool tta_temporal_mode,
+    bool uhd_mode,
+    int num_threads,
+    bool rife_v2,
+    bool rife_v4,
+    const StringType model_name
+)
+    : rife_(nullptr),
+      gpuid_(gpuid),
+      tta_mode_(tta_mode),
+      tta_temporal_mode_(tta_temporal_mode),
+      uhd_mode_(uhd_mode),
+      num_threads_(num_threads),
+      rife_v2_(rife_v2),
+      rife_v4_(rife_v4),
+      model_name_(std::move(model_name)) {}
+
+InterpolatorRIFE::~InterpolatorRIFE() {
+    if (rife_) {
+        delete rife_;
+        rife_ = nullptr;
+    }
+}
+
+int InterpolatorRIFE::init(AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, AVBufferRef *) {
+    // Construct the model directory path using std::filesystem
+    std::filesystem::path model_param_dir;
+
+    // Find the model paths by model name if provided
+    model_param_dir = std::filesystem::path(STR("models")) / STR("rife") / model_name_;
+
+    // Get the full paths using a function that possibly modifies or validates the path
+    std::filesystem::path model_param_full_path = find_resource_file(model_param_dir);
+
+    // Check if the model files exist
+    if (!std::filesystem::exists(model_param_full_path)) {
+        spdlog::error("RIFE model param directory not found: {}", model_param_dir.u8string());
+        return -1;
+    }
+
+    // Create a new RIFE instance
+    rife_ = new RIFE(
+        gpuid_, tta_mode_, tta_temporal_mode_, uhd_mode_, num_threads_, rife_v2_, rife_v4_
+    );
+
+    // Store the time bases
+    in_time_base_ = dec_ctx->time_base;
+    out_time_base_ = enc_ctx->time_base;
+    out_pix_fmt_ = enc_ctx->pix_fmt;
+
+    // Load the model
+    if (rife_->load(model_param_full_path) != 0) {
+        spdlog::error("Failed to load RIFE model");
+        return -1;
+    }
+
+    return 0;
+}
+
+int InterpolatorRIFE::interpolate(
+    AVFrame *prev_frame,
+    AVFrame *in_frame,
+    AVFrame **out_frame,
+    float time_step
+) {
+    int ret;
+
+    ncnn::Mat in_mat1 = avframe_to_ncnn_mat(prev_frame);
+    if (in_mat1.empty()) {
+        spdlog::error("Failed to convert AVFrame to ncnn::Mat");
+        return -1;
+    }
+
+    ncnn::Mat in_mat2 = avframe_to_ncnn_mat(in_frame);
+    if (in_mat2.empty()) {
+        spdlog::error("Failed to convert AVFrame to ncnn::Mat");
+        return -1;
+    }
+
+    // Allocate space for output ncnn::Mat
+    ncnn::Mat out_mat = ncnn::Mat(in_mat2.w, in_mat2.h, static_cast<size_t>(3), 3);
+
+    ret = rife_->process(in_mat1, in_mat2, time_step, out_mat);
+    if (ret != 0) {
+        spdlog::error("RIFE processing failed");
+        return ret;
+    }
+
+    // Convert ncnn::Mat to AVFrame
+    *out_frame = ncnn_mat_to_avframe(out_mat, out_pix_fmt_);
+
+    // Rescale PTS to encoder's time base
+    (*out_frame)->pts = av_rescale_q(in_frame->pts, in_time_base_, out_time_base_);
+
+    // Return the processed frame to the caller
+    return ret;
+}
+
+void InterpolatorRIFE::get_output_dimensions(
+    const ProcessorConfig *,
+    int in_width,
+    int in_height,
+    int &out_width,
+    int &out_height
+) const {
+    out_width = in_width;
+    out_height = in_height;
+}
--- a/src/libvideo2x.cpp
+++ b/src/libvideo2x.cpp
@@ -3,7 +3,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <thread>

 extern "C" {
 #include <libavutil/avutil.h>
@@ -11,199 +10,13 @@ extern "C" {

 #include <spdlog/spdlog.h>

-#include "avutils.h"
 #include "decoder.h"
 #include "encoder.h"
-#include "filter.h"
-#include "libplacebo_filter.h"
-#include "realesrgan_filter.h"
+#include "frames_processor.h"
+#include "processor.h"
+#include "processor_factory.h"

-// Process frames using the selected filter.
-static int process_frames(
-    EncoderConfig *encoder_config,
-    VideoProcessingContext *proc_ctx,
-    Decoder &decoder,
-    Encoder &encoder,
-    Filter *filter,
-    bool benchmark = false
-) {
-    char errbuf[AV_ERROR_MAX_STRING_SIZE];
-    int ret = 0;
-
-    // Get required objects
-    AVFormatContext *ifmt_ctx = decoder.get_format_context();
-    AVCodecContext *dec_ctx = decoder.get_codec_context();
-    int in_vstream_idx = decoder.get_video_stream_index();
-    AVFormatContext *ofmt_ctx = encoder.get_format_context();
-    int *stream_map = encoder.get_stream_map();
-
-    // Get total number of frames
-    spdlog::debug("Reading total number of frames");
-    proc_ctx->total_frames = get_video_frame_count(ifmt_ctx, in_vstream_idx);
-
-    if (proc_ctx->total_frames <= 0) {
-        spdlog::warn("Unable to determine the total number of frames");
-    } else {
-        spdlog::debug("{} frames to process", proc_ctx->total_frames);
-    }
-
-    // Allocate frame and packet
-    auto av_frame_deleter = [](AVFrame *frame) { av_frame_free(&frame); };
-    std::unique_ptr<AVFrame, decltype(av_frame_deleter)> frame(av_frame_alloc(), av_frame_deleter);
-    if (!frame) {
-        ret = AVERROR(ENOMEM);
-        return ret;
-    }
-
-    auto av_packet_deleter = [](AVPacket *packet) { av_packet_free(&packet); };
-    std::unique_ptr<AVPacket, decltype(av_packet_deleter)> packet(
-        av_packet_alloc(), av_packet_deleter
-    );
-    if (!packet) {
-        spdlog::critical("Could not allocate AVPacket");
-        return AVERROR(ENOMEM);
-    }
-
-    // Read frames from the input file
-    while (!proc_ctx->abort) {
-        ret = av_read_frame(ifmt_ctx, packet.get());
-        if (ret < 0) {
-            if (ret == AVERROR_EOF) {
-                spdlog::debug("Reached end of file");
-                break;
-            }
-            av_strerror(ret, errbuf, sizeof(errbuf));
-            spdlog::critical("Error reading packet: {}", errbuf);
-            return ret;
-        }
-
-        if (packet->stream_index == in_vstream_idx) {
-            ret = avcodec_send_packet(dec_ctx, packet.get());
-            if (ret < 0) {
-                av_strerror(ret, errbuf, sizeof(errbuf));
-                spdlog::critical("Error sending packet to decoder: {}", errbuf);
-                av_packet_unref(packet.get());
-                return ret;
-            }
-
-            while (!proc_ctx->abort) {
-                if (proc_ctx->pause) {
-                    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-                    continue;
-                }
-
-                ret = avcodec_receive_frame(dec_ctx, frame.get());
-                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
-                    spdlog::debug("Frame not ready");
-                    break;
-                } else if (ret < 0) {
-                    av_strerror(ret, errbuf, sizeof(errbuf));
-                    spdlog::critical("Error decoding video frame: {}", errbuf);
-                    av_packet_unref(packet.get());
-                    return ret;
-                }
-
-                AVFrame *raw_processed_frame = nullptr;
-                ret = filter->process_frame(frame.get(), &raw_processed_frame);
-
-                if (ret < 0 && ret != AVERROR(EAGAIN)) {
-                    av_strerror(ret, errbuf, sizeof(errbuf));
-                    av_packet_unref(packet.get());
-                    return ret;
-                } else if (ret == 0 && raw_processed_frame != nullptr) {
-                    auto processed_frame = std::unique_ptr<AVFrame, decltype(av_frame_deleter)>(
-                        raw_processed_frame, av_frame_deleter
-                    );
-
-                    if (!benchmark) {
-                        ret =
-                            encoder.write_frame(processed_frame.get(), proc_ctx->processed_frames);
-                        if (ret < 0) {
-                            av_strerror(ret, errbuf, sizeof(errbuf));
-                            spdlog::critical("Error encoding/writing frame: {}", errbuf);
-                            av_packet_unref(packet.get());
-                            return ret;
-                        }
-                    }
-                    proc_ctx->processed_frames++;
-                }
-
-                av_frame_unref(frame.get());
-                spdlog::debug(
-                    "Processed frame {}/{}", proc_ctx->processed_frames, proc_ctx->total_frames
-                );
-            }
-        } else if (encoder_config->copy_streams && stream_map[packet->stream_index] >= 0) {
-            AVStream *in_stream = ifmt_ctx->streams[packet->stream_index];
-            int out_stream_index = stream_map[packet->stream_index];
-            AVStream *out_stream = ofmt_ctx->streams[out_stream_index];
-
-            av_packet_rescale_ts(packet.get(), in_stream->time_base, out_stream->time_base);
-            packet->stream_index = out_stream_index;
-
-            ret = av_interleaved_write_frame(ofmt_ctx, packet.get());
-            if (ret < 0) {
-                av_strerror(ret, errbuf, sizeof(errbuf));
-                spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
-                av_packet_unref(packet.get());
-                return ret;
-            }
-        }
-        av_packet_unref(packet.get());
-    }
-
-    // Flush the filter
-    std::vector<AVFrame *> raw_flushed_frames;
-    ret = filter->flush(raw_flushed_frames);
-    if (ret < 0) {
-        av_strerror(ret, errbuf, sizeof(errbuf));
-        spdlog::critical("Error flushing filter: {}", errbuf);
-        return ret;
-    }
-
-    // Wrap flushed frames in unique_ptrs
-    std::vector<std::unique_ptr<AVFrame, decltype(av_frame_deleter)>> flushed_frames;
-    for (AVFrame *raw_frame : raw_flushed_frames) {
-        flushed_frames.emplace_back(raw_frame, av_frame_deleter);
-    }
-
-    // Encode and write all flushed frames
-    for (auto &flushed_frame : flushed_frames) {
-        ret = encoder.write_frame(flushed_frame.get(), proc_ctx->processed_frames);
-        if (ret < 0) {
-            av_strerror(ret, errbuf, sizeof(errbuf));
-            spdlog::critical("Error encoding/writing flushed frame: {}", errbuf);
-            return ret;
-        }
-        proc_ctx->processed_frames++;
-    }
-
-    // Flush the encoder
-    ret = encoder.flush();
-    if (ret < 0) {
-        av_strerror(ret, errbuf, sizeof(errbuf));
-        spdlog::critical("Error flushing encoder: {}", errbuf);
-        return ret;
-    }
-
-    return ret;
-}
-
-extern "C" int process_video(
-    const CharType *in_fname,
-    const CharType *out_fname,
-    Libvideo2xLogLevel log_level,
-    bool benchmark,
-    uint32_t vk_device_index,
-    AVHWDeviceType hw_type,
-    const FilterConfig *filter_config,
-    EncoderConfig *encoder_config,
-    VideoProcessingContext *proc_ctx
-) {
-    char errbuf[AV_ERROR_MAX_STRING_SIZE];
-    int ret = 0;
-
-    // Set the log level for FFmpeg and spdlog
+static void set_log_level(Libvideo2xLogLevel log_level) {
    switch (log_level) {
        case LIBVIDEO2X_LOG_LEVEL_TRACE:
            av_log_set_level(AV_LOG_TRACE);
@@ -238,13 +51,32 @@ extern "C" int process_video(
            spdlog::set_level(spdlog::level::info);
            break;
    }
+}
+
+extern "C" int process_video(
+    const CharType *in_fname,
+    const CharType *out_fname,
+    Libvideo2xLogLevel log_level,
+    bool benchmark,
+    uint32_t vk_device_index,
+    AVHWDeviceType hw_type,
+    const ProcessorConfig *processor_config,
+    EncoderConfig *encoder_config,
+    VideoProcessingContext *proc_ctx
+) {
+    char errbuf[AV_ERROR_MAX_STRING_SIZE];
+    int ret = 0;
+
+    // Set the log level for FFmpeg and spdlog
+    set_log_level(log_level);

    // Convert the file names to std::filesystem::path
    std::filesystem::path in_fpath(in_fname);
    std::filesystem::path out_fpath(out_fname);

+    // Create a smart pointer to manage the hardware device context
    auto hw_ctx_deleter = [](AVBufferRef *ref) {
-        if (ref) {
+        if (ref != nullptr) {
            av_buffer_unref(&ref);
        }
    };
@@ -275,22 +107,24 @@ extern "C" int process_video(
    AVCodecContext *dec_ctx = decoder.get_codec_context();
    int in_vstream_idx = decoder.get_video_stream_index();

+    // Create and initialize the appropriate filter
+    std::unique_ptr<Processor> processor(
+        ProcessorFactory::instance().create_processor(processor_config, vk_device_index)
+    );
+    if (processor == nullptr) {
+        spdlog::critical("Failed to create filter instance");
+        return -1;
+    }
+
    // Initialize output dimensions based on filter configuration
    int output_width = 0, output_height = 0;
-    switch (filter_config->filter_type) {
-        case FILTER_LIBPLACEBO:
-            output_width = filter_config->config.libplacebo.out_width;
-            output_height = filter_config->config.libplacebo.out_height;
-            break;
-        case FILTER_REALESRGAN:
-            output_width = dec_ctx->width * filter_config->config.realesrgan.scaling_factor;
-            output_height = dec_ctx->height * filter_config->config.realesrgan.scaling_factor;
-            break;
-        default:
-            spdlog::critical("Unknown filter type");
-            return -1;
+    processor->get_output_dimensions(
+        processor_config, dec_ctx->width, dec_ctx->height, output_width, output_height
+    );
+    if (output_width <= 0 || output_height <= 0) {
+        spdlog::critical("Failed to determine the output dimensions");
+        return -1;
    }
-    spdlog::debug("Output video dimensions: {}x{}", output_width, output_height);

    // Update encoder configuration with output dimensions
    encoder_config->width = output_width;
@@ -298,67 +132,26 @@ extern "C" int process_video(

    // Initialize the encoder
    Encoder encoder;
-    ret = encoder.init(hw_ctx.get(), out_fpath, ifmt_ctx, dec_ctx, encoder_config, in_vstream_idx);
+    ret = encoder.init(
+        hw_ctx.get(), out_fpath, ifmt_ctx, dec_ctx, encoder_config, processor_config, in_vstream_idx
+    );
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Failed to initialize encoder: {}", errbuf);
        return ret;
    }

-    // Write the output file header
-    ret = avformat_write_header(encoder.get_format_context(), NULL);
-    if (ret < 0) {
-        av_strerror(ret, errbuf, sizeof(errbuf));
-        spdlog::critical("Error occurred when opening output file: {}", errbuf);
-        return ret;
-    }
-
-    // Create and initialize the appropriate filter
-    std::unique_ptr<Filter> filter;
-    if (filter_config->filter_type == FILTER_LIBPLACEBO) {
-        const auto &config = filter_config->config.libplacebo;
-        if (!config.shader_path) {
-            spdlog::critical("Shader path must be provided for the libplacebo filter");
-            return -1;
-        }
-        filter = std::make_unique<LibplaceboFilter>(
-            vk_device_index,
-            std::filesystem::path(config.shader_path),
-            config.out_width,
-            config.out_height
-        );
-    } else if (filter_config->filter_type == FILTER_REALESRGAN) {
-        const auto &config = filter_config->config.realesrgan;
-        if (!config.model_name) {
-            spdlog::critical("Model name must be provided for the RealESRGAN filter");
-            return -1;
-        }
-        filter = std::make_unique<RealesrganFilter>(
-            static_cast<int>(vk_device_index),
-            config.tta_mode,
-            config.scaling_factor,
-            config.model_name
-        );
-    } else {
-        spdlog::critical("Unknown filter type");
-        return -1;
-    }
-
-    // Check if the filter instance was created successfully
-    if (filter == nullptr) {
-        spdlog::critical("Failed to create filter instance");
-        return -1;
-    }
-
    // Initialize the filter
-    ret = filter->init(dec_ctx, encoder.get_encoder_context(), hw_ctx.get());
+    ret = processor->init(dec_ctx, encoder.get_encoder_context(), hw_ctx.get());
    if (ret < 0) {
        spdlog::critical("Failed to initialize filter");
        return ret;
    }

    // Process frames using the encoder and decoder
-    ret = process_frames(encoder_config, proc_ctx, decoder, encoder, filter.get(), benchmark);
+    ret = process_frames(
+        encoder_config, processor_config, proc_ctx, decoder, encoder, processor.get(), benchmark
+    );
    if (ret < 0) {
        av_strerror(ret, errbuf, sizeof(errbuf));
        spdlog::critical("Error processing frames: {}", errbuf);
--- a/src/processor_factory.cpp
+++ b/src/processor_factory.cpp
@@ -0,0 +1,112 @@
+#include "processor_factory.h"
+
+#include <spdlog/spdlog.h>
+#include <utility>
+
+#include "filter_libplacebo.h"
+#include "filter_realesrgan.h"
+#include "interpolator_rife.h"
+
+// Access the singleton instance
+ProcessorFactory &ProcessorFactory::instance() {
+    static ProcessorFactory factory;
+
+    // Ensure default processors are registered only once
+    static bool initialized = false;
+    if (!initialized) {
+        ProcessorFactory::init_default_processors(factory);
+        initialized = true;
+    }
+
+    return factory;
+}
+
+// Register a processor type and its creator
+void ProcessorFactory::register_processor(ProcessorType type, Creator creator) {
+    creators[type] = std::move(creator);
+}
+
+// Create a processor instance
+std::unique_ptr<Processor> ProcessorFactory::create_processor(
+    const ProcessorConfig *processor_config,
+    uint32_t vk_device_index
+) const {
+    auto it = creators.find(processor_config->processor_type);
+    if (it == creators.end()) {
+        spdlog::critical(
+            "Processor type not registered: {}", static_cast<int>(processor_config->processor_type)
+        );
+        return nullptr;
+    }
+
+    // Call the corresponding creator function
+    return it->second(processor_config, vk_device_index);
+}
+
+// Initialize default processors
+void ProcessorFactory::init_default_processors(ProcessorFactory &factory) {
+    factory.register_processor(
+        PROCESSOR_LIBPLACEBO,
+        [](const ProcessorConfig *config, uint32_t vk_device_index) -> std::unique_ptr<Processor> {
+            const auto &cfg = config->config.libplacebo;
+            if (!cfg.shader_path) {
+                spdlog::critical("Shader path must be provided for the libplacebo filter");
+                return nullptr;
+            }
+            if (config->width <= 0 || config->height <= 0) {
+                spdlog::critical(
+                    "Output width and height must be provided for the libplacebo filter"
+                );
+                return nullptr;
+            }
+            return std::make_unique<FilterLibplacebo>(
+                vk_device_index,
+                std::filesystem::path(cfg.shader_path),
+                config->width,
+                config->height
+            );
+        }
+    );
+
+    factory.register_processor(
+        PROCESSOR_REALESRGAN,
+        [](const ProcessorConfig *config, uint32_t vk_device_index) -> std::unique_ptr<Processor> {
+            const auto &cfg = config->config.realesrgan;
+            if (config->scaling_factor <= 0) {
+                spdlog::critical("Scaling factor must be provided for the RealESRGAN filter");
+                return nullptr;
+            }
+            if (!cfg.model_name) {
+                spdlog::critical("Model name must be provided for the RealESRGAN filter");
+                return nullptr;
+            }
+            return std::make_unique<FilterRealesrgan>(
+                static_cast<int>(vk_device_index),
+                cfg.tta_mode,
+                config->scaling_factor,
+                cfg.model_name
+            );
+        }
+    );
+
+    factory.register_processor(
+        PROCESSOR_RIFE,
+        [](const ProcessorConfig *config, uint32_t vk_device_index) -> std::unique_ptr<Processor> {
+            const auto &cfg = config->config.rife;
+            if (!cfg.model_name) {
+                spdlog::critical("Model name must be provided for the RIFE filter");
+                return nullptr;
+            }
+            return std::make_unique<InterpolatorRIFE>(
+                static_cast<int>(vk_device_index),
+                cfg.tta_mode,
+                cfg.tta_temporal_mode,
+                cfg.uhd_mode,
+                cfg.num_threads,
+                cfg.rife_v2,
+                cfg.rife_v4,
+                cfg.model_name
+            );
+        }
+    );
+}