19 Commits

Author SHA1 Message Date
Salman Chishti
5909f627fb ci(*): upgrade GitHub Actions for Node 24 compatibility (#1454)
Signed-off-by: Salman Muin Kayser Chishti <13schishti@gmail.com>
2026-01-27 08:04:06 +00:00
Salman Chishti
ce07604217 ci(*): upgrade GitHub Actions to latest versions (#1455)
Signed-off-by: Salman Muin Kayser Chishti <13schishti@gmail.com>
2026-01-27 08:01:58 +00:00
k4yt3x
8a9e571114 feat(encoder): add separate audio/subtitle copy and recalculate PTS option
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-11-23 00:00:00 +00:00
k4yt3x
f3df895890 ci(build): update setup-vulkan-sdk to v1.2.1
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-10-15 00:00:00 +00:00
k4yt3x
d848db037a deps(rife): update librife-ncnn-vulkan to fix ncnn compatibility issues
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-10-15 00:00:00 +00:00
Michael
feb84c3cae deps(ncnn): bump ncnn to 20250503 to fix black output frames on RADV (#1410)
Signed-off-by: Michael <mwp.foss@gmail.com>
2025-08-03 08:10:57 +00:00
k4yt3x
6bf0ee527d docs(readme): add hardware requirements
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-03-26 00:00:00 +00:00
k4yt3x
4668556417 docs(book): correct the renamed CLI arguments
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-03-23 00:00:00 +00:00
k4yt3x
d6403dc1a4 docs(readme): update file server URLs
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-03-09 00:00:00 +00:00
k4yt3x
66778b7feb docs(book): add docs for setting the encoder options
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-02-24 00:00:00 +00:00
k4yt3x
445d13b73b fix(libvideo2x): fix atomic int64_t frame_idx_ self add
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-02-08 00:00:00 +00:00
k4yt3x
8803cf10a4 fix(video2x): allow Real-ESRGAN noise to be -1
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-02-08 00:00:00 +00:00
k4yt3x
3e987b9693 docs(book): update command arguments in the container usages (#1323)
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-02-04 00:00:00 +00:00
k4yt3x
afa37f9e58 chore(models): add the Real-ESRGAN general models (#1319)
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-02-02 00:00:00 +00:00
k4yt3x
2c5a059d39 build(arch): move PKGBUILD openmp from makedepends to depends
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-02-02 00:00:00 +00:00
lbrunkho
0585130f09 ci(dockerfile): add the missing openmp dependency (#1317) 2025-02-02 00:30:06 +00:00
k4yt3x
5d043cab3b docs(book): update Linux AppImage and Ubuntu build instructions
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-01-27 00:00:00 +00:00
Integral
73113feb38 docs: add archlinuxcn/video2x-qt6 to readme & linux installation guide (#1307) 2025-01-25 05:48:06 +00:00
k4yt3x
a0b8611ea2 docs(readme): update readme for 6.4.0
Signed-off-by: k4yt3x <i@k4yt3x.com>
2025-01-24 00:00:00 +00:00
26 changed files with 391 additions and 115 deletions

View File

@@ -26,7 +26,7 @@ jobs:
DEBIAN_FRONTEND: noninteractive
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
submodules: recursive
@@ -58,7 +58,7 @@ jobs:
cmake --build /tmp/build --config Debug --target install
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v6
with:
name: video2x-nightly-linux-amd64
path: /tmp/install
@@ -67,14 +67,14 @@ jobs:
runs-on: windows-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
submodules: recursive
- name: Install Vulkan SDK
uses: humbletim/setup-vulkan-sdk@v1.2.0
uses: humbletim/setup-vulkan-sdk@v1.2.1
with:
vulkan-query-version: 1.3.204.0
vulkan-query-version: 1.4.304.1
vulkan-components: Vulkan-Headers, Vulkan-Loader, Glslang, SPIRV-Tools, SPIRV-Headers
vulkan-use-cache: true
@@ -103,7 +103,7 @@ jobs:
cmake --build build --config Debug --parallel --target install
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v6
with:
name: video2x-nightly-windows-amd64
path: build/video2x_install
@@ -112,9 +112,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v6
- uses: mr-smithers-excellent/docker-build-push@v5
- uses: mr-smithers-excellent/docker-build-push@v6
name: Build the Docker image
with:
registry: ghcr.io
@@ -127,7 +127,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v6
- name: Install dependencies
run: |
@@ -173,7 +173,7 @@ jobs:
--output appimage
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v6
with:
name: Video2X-x86_64.AppImage
path: Video2X-x86_64.AppImage

View File

@@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@v6
- name: Install mdBook
run: |
@@ -40,7 +40,7 @@ jobs:
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
uses: actions/upload-pages-artifact@v4
with:
path: "build/docs/book"

View File

@@ -31,7 +31,7 @@ jobs:
DEBIAN_FRONTEND: noninteractive
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
submodules: recursive
@@ -63,7 +63,7 @@ jobs:
dpkg-deb --build build/video2x-linux-ubuntu-2404-amd64
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v6
with:
name: video2x-linux-ubuntu-2404-amd64
path: build/video2x-linux-ubuntu-2404-amd64.deb
@@ -77,14 +77,14 @@ jobs:
runs-on: windows-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
submodules: recursive
- name: Install Vulkan SDK
uses: humbletim/setup-vulkan-sdk@v1.2.0
uses: humbletim/setup-vulkan-sdk@v1.2.1
with:
vulkan-query-version: 1.3.204.0
vulkan-query-version: 1.4.304.1
vulkan-components: Vulkan-Headers, Vulkan-Loader, Glslang, SPIRV-Tools, SPIRV-Headers
vulkan-use-cache: true
@@ -117,7 +117,7 @@ jobs:
Compress-Archive -Path build/video2x_install/* -DestinationPath build/video2x-windows-amd64.zip
- name: Upload artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v6
with:
name: video2x-windows-amd64
path: build/video2x-windows-amd64.zip
@@ -129,9 +129,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v6
- uses: mr-smithers-excellent/docker-build-push@v5
- uses: mr-smithers-excellent/docker-build-push@v6
name: Build & push the Docker image
with:
registry: ghcr.io
@@ -153,7 +153,7 @@ jobs:
# upload_url: ${{ steps.create_release.outputs.upload_url }}
steps:
# - name: Download artifacts
# uses: actions/download-artifact@v4
# uses: actions/download-artifact@v7
- name: Create release
id: create_release

View File

@@ -5,12 +5,24 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Option to recalculate the PTS.
- Real-ESRGAN models `realesr-general-x4v3` and `realesr-general-wdn-x4v3` (#1319).
- (Video2X Qt6) Korean translation.
### Changed
- Separate audio and subtitle stream copying options.
## [6.4.0] - 2025-01-24
### Added
- Multi-versioning to critical functions to enhance performance in generic architecture builds.
- RIFE models v4.25 and v4.26 (#1304).
- RIFE models `v4.25` and `v4.26` (#1304).
- Support for processing videos without PTS information (#1278).
- The feature to copy input streams' metadata to the output streams (#1282).
- (Video2X Qt6) German translation (#1279).

View File

@@ -27,31 +27,46 @@ Version 6.0.0 is a complete rewrite of this project in C/C++. It:
- genuinely works this time, with much less hassle compared to the 5.0.0 beta;
- is blazing fast, thanks to the new optimized pipeline and the efficiency of C/C++;
- is cross-platform, available now for both Windows and Linux;
- offers significantly better output quality with Anime4K v4, RealESRGAN, RealCUGAN, and RIFE;
- offers significantly better output quality with Anime4K v4, Real-ESRGAN, Real-CUGAN, and RIFE;
- supports two modes: filtering (upscaling) and frame interpolation;
- supports Anime4K v4 and all custom MPV-compatible GLSL shaders;
- supports RealESRGAN, RealCUGAN, and RIFE (all models) via ncnn and Vulkan;
- supports Real-ESRGAN, Real-CUGAN, and RIFE (all models) via ncnn and Vulkan;
- requires zero additional disk space during processing, just space for the final output.
</details>
![6.3.0-screenshot](https://github.com/user-attachments/assets/c5442f84-5ffc-4476-915f-a0fc188a2cb3)
![6.4.0-screenshot](https://github.com/user-attachments/assets/9b1cc8a7-2903-4d2c-80a2-8d81f007e45b)
## 🖥️ Hardware Requirements
Your system must meet the minimum hardware requirements below to run Video2X.
- **CPU**
- The precompiled binaries require CPUs with AVX2 support.
- **Intel**: Haswell (Q2 2013) or newer
- **AMD**: Excavator (Q2 2015) or newer
- **GPU**
- The GPU must support Vulkan.
- **NVIDIA**: Kepler (GTX 600 series, Q2 2012) or newer
- **AMD**: GCN 1.0 (Radeon HD 7000 series, Q1 2012) or newer
- **Intel**: HD Graphics 4000 (Q2 2012) or newer
## [🪟 Install on Windows](https://docs.video2x.org/installing/windows-qt6.html)
**[Download the Latest Windows Installer Executable (6.3.1)](https://github.com/k4yt3x/video2x/releases/download/6.3.1/video2x-qt6-windows-amd64-installer.exe)**
**[Download the Latest Windows Installer Executable (6.4.0)](https://github.com/k4yt3x/video2x/releases/download/6.4.0/video2x-qt6-windows-amd64-installer.exe)**
You can download the latest Windows release on the [releases page](https://github.com/k4yt3x/video2x/releases/latest). For basic GUI usage, refer to the [documentation](https://docs.video2x.org/running/desktop.html). If you're unable to download directly from GitHub, try the [mirror site](https://files.k4yt3x.com/Projects/Video2X). The GUI currently supports the following languages:
You can download the latest Windows release on the [releases page](https://github.com/k4yt3x/video2x/releases/latest). For basic GUI usage, refer to the [documentation](https://docs.video2x.org/running/desktop.html). If you're unable to download directly from GitHub, try the [mirror site](https://files.k4yt3x.com). The GUI currently supports the following languages:
- English (United States)
- 简体中文(中国)
- 日本語(日本)
- Português (Portugal)
- Français (France)
- Deutsch (Deutschland)
## [🐧 Install on Linux](https://docs.video2x.org/installing/linux.html)
Video2X packages are available for the Linux distros listed below. If you'd like to build it from source code, refer to the [PKGBUILD](packaging/arch/PKGBUILD) file for a general overview of the required dependencies and commands. If a package is not available for your distro and you prefer not to compile the program from source code, consider using the container image outlined in the next section.
Video2X packages are available for the Linux distros listed below. A universal AppImage is also available for other distros. If you'd like to build it from source code, refer to the [PKGBUILD](packaging/arch/PKGBUILD) file for a general overview of the required dependencies and commands.
- Arch Linux: AUR packages, maintained by [@K4YT3X](https://github.com/k4yt3x).
- [aur/video2x](https://aur.archlinux.org/packages/video2x)
@@ -61,8 +76,9 @@ Video2X packages are available for the Linux distros listed below. If you'd like
- Arch Linux (Chinese Mainland): archlinuxcn packages, maintained by [@Integral-Tech](https://github.com/Integral-Tech).
- [archlinuxcn/video2x](https://github.com/archlinuxcn/repo/tree/master/archlinuxcn/video2x)
- [archlinuxcn/video2x-git](https://github.com/archlinuxcn/repo/tree/master/archlinuxcn/video2x-git)
- [archlinuxcn/video2x-qt6](https://github.com/archlinuxcn/repo/tree/master/archlinuxcn/video2x-qt6)
- [archlinuxcn/video2x-qt6-git](https://github.com/archlinuxcn/repo/tree/master/archlinuxcn/video2x-qt6-git)
- Ubuntu 24.04: `video2x-linux-ubuntu2404-amd64.deb` on the [releases page](https://github.com/k4yt3x/video2x/releases/latest).
- Other distros: `Video2X-x86_64.AppImage` on the [releases page](https://github.com/k4yt3x/video2x/releases/latest).
## [📦 Container Image](https://docs.video2x.org/running/container.html)
@@ -99,11 +115,11 @@ _Upscale demo: Spirited Away's movie trailer_
The following clip can be used to test if your setup works properly. This is also the standard clip used for running performance benchmarks.
- [Standard Test Clip (240P)](https://files.k4yt3x.com/Resources/Videos/standard-test.mp4) 4.54 MiB
- [Real-CUGAN Upscaled Sample (1704P)](https://files.k4yt3x.com/Resources/Videos/standard-realcugan.mp4) 3.5 MiB
- [Real-ESRGAN Upscaled Sample (1704P)](https://files.k4yt3x.com/Resources/Videos/standard-realesrgan.mp4) 3.1 MiB
- [waifu2x Upscaled Sample (1080P)](https://files.k4yt3x.com/Resources/Videos/standard-waifu2x.mp4) 4.54 MiB
- [Ground Truth (1080P)](https://files.k4yt3x.com/Resources/Videos/standard-original.mp4) 22.2 MiB
- [Standard Test Clip (240P)](https://files.k4yt3x.com/resources/videos/standard-test.mp4) 4.54 MiB
- [Real-CUGAN Upscaled Sample (1704P)](https://files.k4yt3x.com/resources/videos/standard-realcugan.mp4) 3.5 MiB
- [Real-ESRGAN Upscaled Sample (1704P)](https://files.k4yt3x.com/resources/videos/standard-realesrgan.mp4) 3.1 MiB
- [waifu2x Upscaled Sample (1080P)](https://files.k4yt3x.com/resources/videos/standard-waifu2x.mp4) 4.54 MiB
- [Ground Truth (1080P)](https://files.k4yt3x.com/resources/videos/standard-original.mp4) 22.2 MiB
The original clip came from the anime "さくら荘のペットな彼女."\
Copyright of this clip belongs to 株式会社アニプレックス.

View File

@@ -40,14 +40,17 @@ The built binaries will be located in the `build` directory.
Ubuntu users can use the `.justfile` to build the project automatically. The `ubuntu2404` and `ubuntu2204` targets are available for Ubuntu 24.04 and 22.04, respectively. `just` will automatically install the required dependencies, build the project, and package it into a `.deb` package file. It is recommended to perform the build in a container to ensure the environment's consistency and to avoid leaving extra build packages on your system.
```bash
# just needs to be installed manually
sudo apt-get update && sudo apt-get install just
# The version of `just` in the Ubuntu repository is outdated
# We need to compile and install `just` manually
sudo apt-get update && sudo apt-get install cargo
cargo install just
# Clone the repository
git clone --recurse-submodules https://github.com/k4yt3x/video2x.git
cd video2x
# Build the project
# Before running the command, ensure ~/.cargo/bin is in your PATH
just ubuntu2404
```

View File

@@ -12,12 +12,9 @@ Video2X packages are available for the Linux distros listed below. If you'd like
- Chinese Mainland: archlinuxcn packages, maintained by [@Integral-Tech](https://github.com/Integral-Tech).
- [archlinuxcn/video2x](https://github.com/archlinuxcn/repo/tree/master/archlinuxcn/video2x)
- [archlinuxcn/video2x-git](https://github.com/archlinuxcn/repo/tree/master/archlinuxcn/video2x-git)
- [archlinuxcn/video2x-qt6](https://github.com/archlinuxcn/repo/tree/master/archlinuxcn/video2x-qt6)
- [archlinuxcn/video2x-qt6-git](https://github.com/archlinuxcn/repo/tree/master/archlinuxcn/video2x-qt6-git)
## Ubuntu
## Other Distros
Ubuntu users can download the `.deb` packages from the [releases page](https://github.com/k4yt3x/video2x/releases/latest). Install the package with the APT package manager:
```bash
sudo apt-get install ./video2x-linux-ubuntu2404-amd64.deb
```
Users of other distros can download and use the AppImage from the [releases page](https://github.com/k4yt3x/video2x/releases/latest).

View File

@@ -9,27 +9,27 @@ This page does not cover all the options available. For help with more options a
Use the following command to upscale a video by 4x with RealESRGAN:
```bash
video2x -i input.mp4 -o output.mp4 -f realesrgan -r 4 -m realesr-animevideov3
video2x -i input.mp4 -o output.mp4 -p realesrgan -s 4 --realesrgan-model realesr-animevideov3
```
Use the following command to upscale a video to with libplacebo + Anime4Kv4 Mode A+A:
```bash
video2x -i input.mp4 -o output.mp4 -f libplacebo -s anime4k-v4-a+a -w 3840 -h 2160
video2x -i input.mp4 -o output.mp4 -w 3840 -h 2160 -p libplacebo --libplacebo-shader anime4k-v4-a+a
```
## Advanced
It is possible to specify custom MPV-compatible GLSL shader files with the `--shader, -s` argument:
It is possible to specify custom MPV-compatible GLSL shader files with the `--libplacebo-shader` argument:
```bash
video2x -i input.mp4 -o output.mp4 -f libplacebo -s path/to/custom/shader.glsl -w 3840 -h 2160
video2x -i input.mp4 -o output.mp4 -p libplacebo -w 3840 -h 2160 --libplacebo-shader path/to/custom/shader.glsl
```
List the available GPUs with `--list-gpus, -l`:
```bash
$video2x --list-gpus
$ video2x --list-gpus
0. NVIDIA RTX A6000
Type: Discrete GPU
Vulkan API Version: 1.3.289
@@ -39,11 +39,66 @@ $video2x --list-gpus
Select which GPU to use with the `--gpu, -g` argument:
```bash
video2x -i input.mp4 -o output.mp4 -f realesrgan -r 4 -m realesr-animevideov3 -g 1
video2x -i input.mp4 -o output.mp4 -p realesrgan -s 4 --realesrgan-model realesr-animevideov3 -g 1
```
Specify arbitrary extra FFmepg encoder options with the `--extra-encoder-options, -e` argument:
Specify arbitrary extra FFmpeg encoder options with the `--extra-encoder-options, -e` argument:
```bash
video2x -i input.mkv -o output.mkv -f realesrgan -m realesrgan-plus -r 4 -c libx264rgb -e crf=17 -e preset=veryslow -e tune=film
video2x -i input.mkv -o output.mkv -p realesrgan --realesrgan-model realesrgan-plus -s 4 -c libx264rgb -e crf=17 -e preset=veryslow -e tune=film
```
## Encoder Options
Video2X uses FFmpeg's C libraries to encode videos. Encoder options are specified in two ways:
- **Common options** shared by all encoders are stored in a [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html) struct. Below are some options set through `AVCodecContext`:
- Codec
- Pixel format
- Bitrate
- Keyframe interval
- Minimum and maximum quantizer
- GOP size
- **Encoder-specific** options are stored in [`AVOption`](https://ffmpeg.org/doxygen/trunk/structAVOption.html) structs and set with the [`av_opt_set`](https://ffmpeg.org/doxygen/trunk/group__opt__set__funcs.html#ga5fd4b92bdf4f392a2847f711676a7537) function. Below are some encoder-specific options for `libx264`:
- CRF
- Preset
- Tune
- Profile
Common options can only be set through Video2X's command line arguments. You can run `video2x --help` and see the `Encoder options` section to see the supported options.
You can specify encoder-specific options in Video2X using the `--extra-encoder-option` (`-e`) argument. To view the available options for a particular codec, run:
```bash
ffmpeg -h encoder=$ENCODER
```
For example, to view the available options for `libx264`, run:
```console
$ ffmpeg -h encoder=libx264
Encoder libx264 [libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10]:
General capabilities: dr1 delay threads
Threading capabilities: other
Supported pixel formats: yuv420p yuvj420p yuv422p yuvj422p yuv444p yuvj444p nv12 nv16 nv21 yuv420p10le yuv422p10le yuv444p10le nv20le gray gray10le
libx264 AVOptions:
-preset <string> E..V....... Set the encoding preset (cf. x264 --fullhelp) (default "medium")
-tune <string> E..V....... Tune the encoding params (cf. x264 --fullhelp)
-profile <string> E..V....... Set profile restrictions (cf. x264 --fullhelp)
-fastfirstpass <boolean> E..V....... Use fast settings when encoding first pass (default true)
-level <string> E..V....... Specify level (as defined by Annex A)
-passlogfile <string> E..V....... Filename for 2 pass stats
-wpredp <string> E..V....... Weighted prediction for P-frames
-a53cc <boolean> E..V....... Use A53 Closed Captions (if available) (default true)
-x264opts <string> E..V....... x264 options
-crf <float> E..V....... Select the quality for constant quality mode (from -1 to FLT_MAX) (default -1)
-crf_max <float> E..V....... In CRF mode, prevents VBV from lowering quality beyond this point. (from -1 to FLT_MAX) (default -1)
-qp <int> E..V....... Constant quantization parameter rate control method (from -1 to INT_MAX) (default -1)
...
```
You can then set the encoder-specific options with the `-e` argument. The `-e` argument can be used multiple times to set multiple options. For example, the following arguments set the CRF to 17, the preset to `veryslow`, and the tune to `film` for `libx264`:
```console
-e crf=17 -e preset=veryslow -e tune=film
```

View File

@@ -17,7 +17,7 @@ This section documents how to upscale a video. Replace `$TAG` with an appropriat
Make sure your host has the proper GPU and Vulkan libraries and drivers, then use the following command to launch the container:
```shell
docker run --gpus all -it --rm -v $PWD/data:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -f realesrgan -r 4 -m realesr-animevideov3
docker run --gpus all -it --rm -v $PWD/data:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -p realesrgan -s 4 --realesrgan-model realesr-animevideov3
```
### NVIDIA GPUs
@@ -33,19 +33,19 @@ In addition to installing the proper drivers on your host, `nvidia-docker2` (NVI
Once all the prerequisites are installed, you can launch the container:
```shell
docker run --gpus all -it --rm -v $PWD:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -f realesrgan -r 4 -m realesr-animevideov3
docker run --gpus all -it --rm -v $PWD:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -p realesrgan -s 4 --realesrgan-model realesr-animevideov3
```
Depending on the version of your nvidia-docker and some other mysterious factors, you can also try setting `no-cgroups = true` in `/etc/nvidia-container-runtime/config.toml` and adding the NVIDIA devices into the container if the command above doesn't work:
```shell
docker run --gpus all --device=/dev/nvidia0 --device=/dev/nvidiactl --runtime nvidia -it --rm -v $PWD:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -f realesrgan -r 4 -m realesr-animevideov3
docker run --gpus all --device=/dev/nvidia0 --device=/dev/nvidiactl --runtime nvidia -it --rm -v $PWD:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -p realesrgan -s 4 --realesrgan-model realesr-animevideov3
```
If you are still getting a `vkEnumeratePhysicalDevices failed -3` error at this point, try adding the `--privileged` flag to give the container the same level of permissions as the host:
```shell
docker run --gpus all --privileged -it --rm -v $PWD:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -f realesrgan -r 4 -m realesr-animevideov3
docker run --gpus all --privileged -it --rm -v $PWD:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -p realesrgan -s 4 --realesrgan-model realesr-animevideov3
```
### Intel GPUs
@@ -53,5 +53,5 @@ docker run --gpus all --privileged -it --rm -v $PWD:/host ghcr.io/k4yt3x/video2x
Similar to NVIDIA GPUs, you can add `--gpus all` or `--device /dev/dri` to pass the GPU into the container. Adding `--privileged` might help with the performance (thanks @NukeninDark).
```shell
docker run --gpus all --privileged -it --rm -v $PWD:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -f realesrgan -r 4 -m realesr-animevideov3
docker run --gpus all --privileged -it --rm -v $PWD:/host ghcr.io/k4yt3x/video2x:$TAG -i standard-test.mp4 -o output.mp4 -p realesrgan -s 4 --realesrgan-model realesr-animevideov3
```

View File

@@ -17,7 +17,9 @@ namespace encoder {
struct EncoderConfig {
// Non-AVCodecContext options
std::string codec = "libx264";
bool copy_streams = true;
bool recalculate_pts = true;
bool copy_audio_streams = true;
bool copy_subtitle_streams = true;
// Basic video options
AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
@@ -72,6 +74,7 @@ class Encoder {
int get_output_video_stream_index() const;
private:
EncoderConfig enc_cfg_;
AVFormatContext* ofmt_ctx_;
AVCodecContext* enc_ctx_;
int out_vstream_idx_;

View File

@@ -18,6 +18,7 @@ class FilterRealesrgan : public Filter {
int gpuid = 0,
bool tta_mode = false,
int scaling_factor = 4,
int noise_level = 0,
const fsutils::StringType model_name = STR("realesr-animevideov3")
);
@@ -47,6 +48,7 @@ class FilterRealesrgan : public Filter {
int gpuid_;
bool tta_mode_;
int scaling_factor_;
int noise_level_;
const fsutils::StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;

Binary file not shown.

View File

@@ -0,0 +1,75 @@
7767517
73 74
Input data 0 1 data
Split splitncnn_input0 1 2 data data_splitncnn_0 data_splitncnn_1
Convolution /Conv 1 1 data_splitncnn_1 /Conv_output_0 0=64 1=3 4=1 5=1 6=1728
PReLU /PRelu 1 1 /Conv_output_0 /PRelu_output_0 0=64
Convolution /Conv_1 1 1 /PRelu_output_0 /Conv_1_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_1 1 1 /Conv_1_output_0 /PRelu_1_output_0 0=64
Convolution /Conv_2 1 1 /PRelu_1_output_0 /Conv_2_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_2 1 1 /Conv_2_output_0 /PRelu_2_output_0 0=64
Convolution /Conv_3 1 1 /PRelu_2_output_0 /Conv_3_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_3 1 1 /Conv_3_output_0 /PRelu_3_output_0 0=64
Convolution /Conv_4 1 1 /PRelu_3_output_0 /Conv_4_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_4 1 1 /Conv_4_output_0 /PRelu_4_output_0 0=64
Convolution /Conv_5 1 1 /PRelu_4_output_0 /Conv_5_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_5 1 1 /Conv_5_output_0 /PRelu_5_output_0 0=64
Convolution /Conv_6 1 1 /PRelu_5_output_0 /Conv_6_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_6 1 1 /Conv_6_output_0 /PRelu_6_output_0 0=64
Convolution /Conv_7 1 1 /PRelu_6_output_0 /Conv_7_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_7 1 1 /Conv_7_output_0 /PRelu_7_output_0 0=64
Convolution /Conv_8 1 1 /PRelu_7_output_0 /Conv_8_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_8 1 1 /Conv_8_output_0 /PRelu_8_output_0 0=64
Convolution /Conv_9 1 1 /PRelu_8_output_0 /Conv_9_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_9 1 1 /Conv_9_output_0 /PRelu_9_output_0 0=64
Convolution /Conv_10 1 1 /PRelu_9_output_0 /Conv_10_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_10 1 1 /Conv_10_output_0 /PRelu_10_output_0 0=64
Convolution /Conv_11 1 1 /PRelu_10_output_0 /Conv_11_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_11 1 1 /Conv_11_output_0 /PRelu_11_output_0 0=64
Convolution /Conv_12 1 1 /PRelu_11_output_0 /Conv_12_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_12 1 1 /Conv_12_output_0 /PRelu_12_output_0 0=64
Convolution /Conv_13 1 1 /PRelu_12_output_0 /Conv_13_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_13 1 1 /Conv_13_output_0 /PRelu_13_output_0 0=64
Convolution /Conv_14 1 1 /PRelu_13_output_0 /Conv_14_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_14 1 1 /Conv_14_output_0 /PRelu_14_output_0 0=64
Convolution /Conv_15 1 1 /PRelu_14_output_0 /Conv_15_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_15 1 1 /Conv_15_output_0 /PRelu_15_output_0 0=64
Convolution /Conv_16 1 1 /PRelu_15_output_0 /Conv_16_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_16 1 1 /Conv_16_output_0 /PRelu_16_output_0 0=64
Convolution /Conv_17 1 1 /PRelu_16_output_0 /Conv_17_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_17 1 1 /Conv_17_output_0 /PRelu_17_output_0 0=64
Convolution /Conv_18 1 1 /PRelu_17_output_0 /Conv_18_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_18 1 1 /Conv_18_output_0 /PRelu_18_output_0 0=64
Convolution /Conv_19 1 1 /PRelu_18_output_0 /Conv_19_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_19 1 1 /Conv_19_output_0 /PRelu_19_output_0 0=64
Convolution /Conv_20 1 1 /PRelu_19_output_0 /Conv_20_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_20 1 1 /Conv_20_output_0 /PRelu_20_output_0 0=64
Convolution /Conv_21 1 1 /PRelu_20_output_0 /Conv_21_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_21 1 1 /Conv_21_output_0 /PRelu_21_output_0 0=64
Convolution /Conv_22 1 1 /PRelu_21_output_0 /Conv_22_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_22 1 1 /Conv_22_output_0 /PRelu_22_output_0 0=64
Convolution /Conv_23 1 1 /PRelu_22_output_0 /Conv_23_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_23 1 1 /Conv_23_output_0 /PRelu_23_output_0 0=64
Convolution /Conv_24 1 1 /PRelu_23_output_0 /Conv_24_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_24 1 1 /Conv_24_output_0 /PRelu_24_output_0 0=64
Convolution /Conv_25 1 1 /PRelu_24_output_0 /Conv_25_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_25 1 1 /Conv_25_output_0 /PRelu_25_output_0 0=64
Convolution /Conv_26 1 1 /PRelu_25_output_0 /Conv_26_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_26 1 1 /Conv_26_output_0 /PRelu_26_output_0 0=64
Convolution /Conv_27 1 1 /PRelu_26_output_0 /Conv_27_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_27 1 1 /Conv_27_output_0 /PRelu_27_output_0 0=64
Convolution /Conv_28 1 1 /PRelu_27_output_0 /Conv_28_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_28 1 1 /Conv_28_output_0 /PRelu_28_output_0 0=64
Convolution /Conv_29 1 1 /PRelu_28_output_0 /Conv_29_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_29 1 1 /Conv_29_output_0 /PRelu_29_output_0 0=64
Convolution /Conv_30 1 1 /PRelu_29_output_0 /Conv_30_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_30 1 1 /Conv_30_output_0 /PRelu_30_output_0 0=64
Convolution /Conv_31 1 1 /PRelu_30_output_0 /Conv_31_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_31 1 1 /Conv_31_output_0 /PRelu_31_output_0 0=64
Convolution /Conv_32 1 1 /PRelu_31_output_0 /Conv_32_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_32 1 1 /Conv_32_output_0 /PRelu_32_output_0 0=64
Convolution /Conv_33 1 1 /PRelu_32_output_0 /Conv_33_output_0 0=48 1=3 4=1 5=1 6=27648
PixelShuffle /DepthToSpace 1 1 /Conv_33_output_0 /DepthToSpace_output_0 0=4
Interp /Resize 1 1 data_splitncnn_0 /Resize_output_0 0=1 1=4.000000e+00 2=4.000000e+00
BinaryOp /Add 2 1 /DepthToSpace_output_0 /Resize_output_0 /Add_output_0
Clip /Clip 1 1 /Add_output_0 output 0=0.000000e+00 1=1.000000e+00

Binary file not shown.

View File

@@ -0,0 +1,75 @@
7767517
73 74
Input data 0 1 data
Split splitncnn_input0 1 2 data data_splitncnn_0 data_splitncnn_1
Convolution /Conv 1 1 data_splitncnn_1 /Conv_output_0 0=64 1=3 4=1 5=1 6=1728
PReLU /PRelu 1 1 /Conv_output_0 /PRelu_output_0 0=64
Convolution /Conv_1 1 1 /PRelu_output_0 /Conv_1_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_1 1 1 /Conv_1_output_0 /PRelu_1_output_0 0=64
Convolution /Conv_2 1 1 /PRelu_1_output_0 /Conv_2_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_2 1 1 /Conv_2_output_0 /PRelu_2_output_0 0=64
Convolution /Conv_3 1 1 /PRelu_2_output_0 /Conv_3_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_3 1 1 /Conv_3_output_0 /PRelu_3_output_0 0=64
Convolution /Conv_4 1 1 /PRelu_3_output_0 /Conv_4_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_4 1 1 /Conv_4_output_0 /PRelu_4_output_0 0=64
Convolution /Conv_5 1 1 /PRelu_4_output_0 /Conv_5_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_5 1 1 /Conv_5_output_0 /PRelu_5_output_0 0=64
Convolution /Conv_6 1 1 /PRelu_5_output_0 /Conv_6_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_6 1 1 /Conv_6_output_0 /PRelu_6_output_0 0=64
Convolution /Conv_7 1 1 /PRelu_6_output_0 /Conv_7_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_7 1 1 /Conv_7_output_0 /PRelu_7_output_0 0=64
Convolution /Conv_8 1 1 /PRelu_7_output_0 /Conv_8_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_8 1 1 /Conv_8_output_0 /PRelu_8_output_0 0=64
Convolution /Conv_9 1 1 /PRelu_8_output_0 /Conv_9_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_9 1 1 /Conv_9_output_0 /PRelu_9_output_0 0=64
Convolution /Conv_10 1 1 /PRelu_9_output_0 /Conv_10_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_10 1 1 /Conv_10_output_0 /PRelu_10_output_0 0=64
Convolution /Conv_11 1 1 /PRelu_10_output_0 /Conv_11_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_11 1 1 /Conv_11_output_0 /PRelu_11_output_0 0=64
Convolution /Conv_12 1 1 /PRelu_11_output_0 /Conv_12_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_12 1 1 /Conv_12_output_0 /PRelu_12_output_0 0=64
Convolution /Conv_13 1 1 /PRelu_12_output_0 /Conv_13_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_13 1 1 /Conv_13_output_0 /PRelu_13_output_0 0=64
Convolution /Conv_14 1 1 /PRelu_13_output_0 /Conv_14_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_14 1 1 /Conv_14_output_0 /PRelu_14_output_0 0=64
Convolution /Conv_15 1 1 /PRelu_14_output_0 /Conv_15_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_15 1 1 /Conv_15_output_0 /PRelu_15_output_0 0=64
Convolution /Conv_16 1 1 /PRelu_15_output_0 /Conv_16_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_16 1 1 /Conv_16_output_0 /PRelu_16_output_0 0=64
Convolution /Conv_17 1 1 /PRelu_16_output_0 /Conv_17_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_17 1 1 /Conv_17_output_0 /PRelu_17_output_0 0=64
Convolution /Conv_18 1 1 /PRelu_17_output_0 /Conv_18_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_18 1 1 /Conv_18_output_0 /PRelu_18_output_0 0=64
Convolution /Conv_19 1 1 /PRelu_18_output_0 /Conv_19_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_19 1 1 /Conv_19_output_0 /PRelu_19_output_0 0=64
Convolution /Conv_20 1 1 /PRelu_19_output_0 /Conv_20_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_20 1 1 /Conv_20_output_0 /PRelu_20_output_0 0=64
Convolution /Conv_21 1 1 /PRelu_20_output_0 /Conv_21_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_21 1 1 /Conv_21_output_0 /PRelu_21_output_0 0=64
Convolution /Conv_22 1 1 /PRelu_21_output_0 /Conv_22_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_22 1 1 /Conv_22_output_0 /PRelu_22_output_0 0=64
Convolution /Conv_23 1 1 /PRelu_22_output_0 /Conv_23_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_23 1 1 /Conv_23_output_0 /PRelu_23_output_0 0=64
Convolution /Conv_24 1 1 /PRelu_23_output_0 /Conv_24_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_24 1 1 /Conv_24_output_0 /PRelu_24_output_0 0=64
Convolution /Conv_25 1 1 /PRelu_24_output_0 /Conv_25_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_25 1 1 /Conv_25_output_0 /PRelu_25_output_0 0=64
Convolution /Conv_26 1 1 /PRelu_25_output_0 /Conv_26_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_26 1 1 /Conv_26_output_0 /PRelu_26_output_0 0=64
Convolution /Conv_27 1 1 /PRelu_26_output_0 /Conv_27_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_27 1 1 /Conv_27_output_0 /PRelu_27_output_0 0=64
Convolution /Conv_28 1 1 /PRelu_27_output_0 /Conv_28_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_28 1 1 /Conv_28_output_0 /PRelu_28_output_0 0=64
Convolution /Conv_29 1 1 /PRelu_28_output_0 /Conv_29_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_29 1 1 /Conv_29_output_0 /PRelu_29_output_0 0=64
Convolution /Conv_30 1 1 /PRelu_29_output_0 /Conv_30_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_30 1 1 /Conv_30_output_0 /PRelu_30_output_0 0=64
Convolution /Conv_31 1 1 /PRelu_30_output_0 /Conv_31_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_31 1 1 /Conv_31_output_0 /PRelu_31_output_0 0=64
Convolution /Conv_32 1 1 /PRelu_31_output_0 /Conv_32_output_0 0=64 1=3 4=1 5=1 6=36864
PReLU /PRelu_32 1 1 /Conv_32_output_0 /PRelu_32_output_0 0=64
Convolution /Conv_33 1 1 /PRelu_32_output_0 /Conv_33_output_0 0=48 1=3 4=1 5=1 6=27648
PixelShuffle /DepthToSpace 1 1 /Conv_33_output_0 /DepthToSpace_output_0 0=4
Interp /Resize 1 1 data_splitncnn_0 /Resize_output_0 0=1 1=4.000000e+00 2=4.000000e+00
BinaryOp /Add 2 1 /DepthToSpace_output_0 /Resize_output_0 /Add_output_0
Clip /Clip 1 1 /Add_output_0 output 0=0.000000e+00 1=1.000000e+00

View File

@@ -5,8 +5,8 @@ pkgdesc="A machine learning-based video super resolution and frame interpolation
arch=('x86_64')
url="https://github.com/k4yt3x/video2x"
license=('AGPL-3.0-only')
depends=('ffmpeg' 'ncnn' 'vulkan-driver' 'spdlog' 'boost-libs')
makedepends=('git' 'cmake' 'clang' 'vulkan-headers' 'openmp' 'boost')
depends=('ffmpeg' 'ncnn' 'openmp' 'vulkan-driver' 'spdlog' 'boost-libs')
makedepends=('git' 'cmake' 'clang' 'vulkan-headers' 'boost')
pkgver() {
printf "r%s.%s" "$(git rev-list --count HEAD)" "$(git rev-parse --short=7 HEAD)"

View File

@@ -38,7 +38,7 @@ ENV VK_ICD_FILENAMES=/usr/share/vulkan/icd.d/nvidia_icd.json\
COPY --from=builder /tmp/video2x.pkg.tar.zst /video2x.pkg.tar.zst
RUN pacman -Sy --noconfirm nvidia-utils vulkan-radeon vulkan-intel vulkan-swrast \
ffmpeg ncnn spdlog boost-libs \
ffmpeg ncnn openmp spdlog boost-libs \
&& pacman -U --noconfirm /video2x.pkg.tar.zst \
&& rm -rf /video2x.pkg.tar.zst /var/cache/pacman/pkg/*

View File

@@ -6,10 +6,9 @@ extern "C" {
#include <libavutil/opt.h>
}
#include "logger_manager.h"
#include "avutils.h"
#include "conversions.h"
#include "logger_manager.h"
namespace video2x {
namespace encoder {
@@ -45,6 +44,9 @@ int Encoder::init(
) {
int ret;
// Copy the encoder configuration
enc_cfg_ = enc_cfg;
// Allocate the output format context
avformat_alloc_output_context2(&ofmt_ctx_, nullptr, nullptr, out_fpath.u8string().c_str());
if (!ofmt_ctx_) {
@@ -188,8 +190,8 @@ int Encoder::init(
out_vstream->r_frame_rate = enc_ctx_->framerate;
// Copy other streams if necessary
if (enc_cfg.copy_streams) {
// Allocate the stream mape frame o
if (enc_cfg.copy_audio_streams || enc_cfg.copy_subtitle_streams) {
// Allocate the stream map
stream_map_ =
reinterpret_cast<int*>(av_malloc_array(ifmt_ctx->nb_streams, sizeof(*stream_map_)));
if (!stream_map_) {
@@ -198,22 +200,37 @@ int Encoder::init(
}
// Map each input stream to an output stream
for (int i = 0; i < static_cast<int>(ifmt_ctx->nb_streams); i++) {
AVStream* in_stream = ifmt_ctx->streams[i];
for (int stream_index = 0; stream_index < static_cast<int>(ifmt_ctx->nb_streams);
stream_index++) {
AVStream* in_stream = ifmt_ctx->streams[stream_index];
AVCodecParameters* in_codecpar = in_stream->codecpar;
// Skip the input video stream as it's already processed
if (i == in_vstream_idx) {
stream_map_[i] = out_vstream_idx_;
if (stream_index == in_vstream_idx) {
stream_map_[stream_index] = out_vstream_idx_;
continue;
}
// Map only audio and subtitle streams (skip other types)
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
stream_map_[i] = -1;
logger()->warn("Skipping unsupported stream type at index: {}", i);
continue;
// Determine if the current stream should be skipped
switch (in_codecpar->codec_type) {
case AVMEDIA_TYPE_AUDIO:
if (!enc_cfg.copy_audio_streams) {
stream_map_[stream_index] = -1;
continue;
}
logger()->debug("Copying audio stream at index: {}", stream_index);
break;
case AVMEDIA_TYPE_SUBTITLE:
if (!enc_cfg.copy_subtitle_streams) {
stream_map_[stream_index] = -1;
continue;
}
logger()->debug("Copying subtitle stream at index: {}", stream_index);
break;
default:
stream_map_[stream_index] = -1;
logger()->warn("Skipping unsupported stream type at index: {}", stream_index);
continue;
}
// Create corresponding output stream for audio and subtitle streams
@@ -242,8 +259,8 @@ int Encoder::init(
out_stream->time_base = in_stream->time_base;
// Map input stream index to output stream index
logger()->debug("Stream mapping: {} (in) -> {} (out)", i, out_stream->index);
stream_map_[i] = out_stream->index;
logger()->debug("Stream mapping: {} (in) -> {} (out)", stream_index, out_stream->index);
stream_map_[stream_index] = out_stream->index;
}
}
@@ -275,7 +292,9 @@ int Encoder::write_frame(AVFrame* frame, int64_t frame_idx) {
frame->pict_type = AV_PICTURE_TYPE_NONE;
// Calculate this frame's presentation timestamp (PTS)
frame->pts = av_rescale_q(frame_idx, av_inv_q(enc_ctx_->framerate), enc_ctx_->time_base);
if (enc_cfg_.recalculate_pts) {
frame->pts = av_rescale_q(frame_idx, av_inv_q(enc_ctx_->framerate), enc_ctx_->time_base);
}
// Convert the frame to the encoder's pixel format if needed
if (frame->format != enc_ctx_->pix_fmt) {

View File

@@ -1,7 +1,5 @@
#include "filter_libplacebo.h"
#include <cstdio>
#include <spdlog/spdlog.h>
#include "fsutils.h"

View File

@@ -17,12 +17,14 @@ FilterRealesrgan::FilterRealesrgan(
int gpuid,
bool tta_mode,
int scaling_factor,
int noise_level,
const fsutils::StringType model_name
)
: realesrgan_(nullptr),
gpuid_(gpuid),
tta_mode_(tta_mode),
scaling_factor_(scaling_factor),
noise_level_(noise_level),
model_name_(std::move(model_name)) {}
FilterRealesrgan::~FilterRealesrgan() {
@@ -35,10 +37,16 @@ int FilterRealesrgan::init(AVCodecContext* dec_ctx, AVCodecContext* enc_ctx, AVB
std::filesystem::path model_param_path;
std::filesystem::path model_bin_path;
fsutils::StringType param_file_name =
model_name_ + STR("-x") + fsutils::to_string_type(scaling_factor_) + STR(".param");
fsutils::StringType bin_file_name =
model_name_ + STR("-x") + fsutils::to_string_type(scaling_factor_) + STR(".bin");
fsutils::StringType param_file_name = model_name_;
fsutils::StringType bin_file_name = model_name_;
if (model_name_ == STR("realesr-generalv3") && noise_level_ > 0) {
param_file_name += STR("-wdn");
bin_file_name += STR("-wdn");
}
param_file_name += STR("-x") + fsutils::to_string_type(scaling_factor_) + STR(".param");
bin_file_name += STR("-x") + fsutils::to_string_type(scaling_factor_) + STR(".bin");
// Find the model paths by model name if provided
model_param_path = std::filesystem::path(STR("models")) / STR("realesrgan") / param_file_name;

View File

@@ -1,5 +1,4 @@
#include "libvideo2x.h"
#include <libavcodec/avcodec.h>
extern "C" {
#include <libavutil/avutil.h>
@@ -238,8 +237,10 @@ int VideoProcessor::process_frames(
}
// Calculate this frame's presentation timestamp (PTS)
frame->pts =
av_rescale_q(frame_idx_, av_inv_q(enc_ctx->framerate), enc_ctx->time_base);
if (enc_cfg_.recalculate_pts) {
frame->pts =
av_rescale_q(frame_idx_, av_inv_q(enc_ctx->framerate), enc_ctx->time_base);
}
// Process the frame based on the selected processing mode
AVFrame* proc_frame = nullptr;
@@ -262,10 +263,11 @@ int VideoProcessor::process_frames(
return ret;
}
av_frame_unref(frame.get());
frame_idx_++;
frame_idx_.fetch_add(1);
logger()->debug("Processed frame {}/{}", frame_idx_.load(), total_frames_.load());
}
} else if (enc_cfg_.copy_streams && stream_map[packet->stream_index] >= 0) {
} else if ((enc_cfg_.copy_audio_streams || enc_cfg_.copy_subtitle_streams) &&
stream_map[packet->stream_index] >= 0) {
ret = write_raw_packet(packet.get(), ifmt_ctx, ofmt_ctx, stream_map);
if (ret < 0) {
return ret;
@@ -274,12 +276,12 @@ int VideoProcessor::process_frames(
av_packet_unref(packet.get());
}
// Flush the filter
// Flush the processor
std::vector<AVFrame*> raw_flushed_frames;
ret = processor->flush(raw_flushed_frames);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
logger()->critical("Error flushing filter: {}", errbuf);
logger()->critical("Error flushing processor: {}", errbuf);
return ret;
}
@@ -295,7 +297,7 @@ int VideoProcessor::process_frames(
if (ret < 0) {
return ret;
}
frame_idx_++;
frame_idx_.fetch_add(1);
}
// Flush the encoder
@@ -314,7 +316,7 @@ int VideoProcessor::write_frame(AVFrame* frame, encoder::Encoder& encoder) {
int ret = 0;
if (!benchmark_) {
ret = encoder.write_frame(frame, frame_idx_);
ret = encoder.write_frame(frame, frame_idx_.load());
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
logger()->critical("Error encoding/writing frame: {}", errbuf);
@@ -437,7 +439,7 @@ int VideoProcessor::process_interpolation(
}
}
frame_idx_++;
frame_idx_.fetch_add(1);
current_time_step += time_step;
}

View File

@@ -91,6 +91,7 @@ void ProcessorFactory::init_default_processors(ProcessorFactory& factory) {
static_cast<int>(vk_device_index),
config.tta_mode,
proc_cfg.scaling_factor,
proc_cfg.noise_level,
config.model_name
);
}

View File

@@ -95,7 +95,9 @@ int parse_args(
encoder_opts.add_options()
("codec,c", PO_STR_VALUE<video2x::fsutils::StringType>()
->default_value(STR("libx264"), "libx264"), "Output codec")
("no-copy-streams", "Do not copy audio and subtitle streams")
("no-recalculate-pts", "Do not recalculate presentation timestamps")
("no-copy-audio-streams", "Do not copy audio streams")
("no-copy-subtitle-streams", "Do not copy subtitle streams")
("pix-fmt", PO_STR_VALUE<video2x::fsutils::StringType>(), "Output pixel format")
("bit-rate", po::value<int64_t>(&enc_cfg.bit_rate)->default_value(0),
"Bitrate in bits per second")
@@ -134,7 +136,7 @@ int parse_args(
("scaling-factor,s", po::value<int>(&proc_cfg.scaling_factor)
->notifier([](int v) { validate_min(v, "scaling-factor", 2); }), "Scaling factor")
("noise-level,n", po::value<int>(&proc_cfg.noise_level)
->notifier([](int v) { validate_min(v, "noise-level", 0); }), "Noise level")
->notifier([](int v) { validate_min(v, "noise-level", -1); }), "Noise level")
;
po::options_description interp_opts("Frame interpolation options");
@@ -156,23 +158,23 @@ int parse_args(
"anime4k-v4-b, anime4k-v4-b+b, anime4k-v4-c, anime4k-v4-c+a, anime4k-v4.1-gan)")
;
po::options_description realesrgan_opts("RealESRGAN options");
po::options_description realesrgan_opts("Real-ESRGAN options");
realesrgan_opts.add_options()
("realesrgan-model", PO_STR_VALUE<video2x::fsutils::StringType>()
->default_value(STR("realesr-animevideov3"), "realesr-animevideov3")
->notifier(validate_realesrgan_model_name),
"Name of the RealESRGAN model to use (realesr-animevideov3, realesrgan-plus-anime, "
"realesrgan-plus)")
"Name of the Real-ESRGAN model to use (realesr-animevideov3, "
"realesrgan-plus-anime, realesrgan-plus, realesr-generalv3)")
;
po::options_description realcugan_opts("RealCUGAN options");
po::options_description realcugan_opts("Real-CUGAN options");
realcugan_opts.add_options()
("realcugan-model", PO_STR_VALUE<video2x::fsutils::StringType>()
->default_value(STR("models-se"), "models-se")
->notifier(validate_realcugan_model_name),
"Name of the RealCUGAN model to use (models-nose, models-pro, models-se)")
"Name of the Real-CUGAN model to use (models-nose, models-pro, models-se)")
("realcugan-threads", po::value<int>()->default_value(1),
"Number of threads to use for RealCUGAN")
"Number of threads to use for Real-CUGAN")
("realcugan-syncgap", po::value<int>()->default_value(3),
"Sync gap mode; 0:no sync, 1: accurate sync: 2 = rough sync, 3: very rough sync")
;
@@ -212,7 +214,7 @@ int parse_args(
<< " video2x -i input.mp4 -o output.mp4 -w 3840 -h 2160 \\" << std::endl
<< " -p libplacebo --libplacebo-shader anime4k-v4-a+a" << std::endl
<< std::endl
<< " Upscale a film by 4x using RealESRGAN with custom encoder options:"
<< " Upscale a film by 4x using Real-ESRGAN with custom encoder options:"
<< std::endl
<< " video2x -i input.mkv -o output.mkv -s 4 \\" << std::endl
<< " -p realesrgan --realesrgan-model realesrgan-plus \\" << std::endl
@@ -323,8 +325,10 @@ int parse_args(
enc_cfg.codec = codec_str;
}
// Parse copy streams flag
enc_cfg.copy_streams = vm.count("no-copy-streams") == 0;
// Parse copy streams options
enc_cfg.recalculate_pts = vm.count("no-recalculate-pts") == 0;
enc_cfg.copy_audio_streams = vm.count("no-copy-audio-streams") == 0;
enc_cfg.copy_subtitle_streams = vm.count("no-copy-subtitle-streams") == 0;
// Parse pixel format to AVPixelFormat
enc_cfg.pix_fmt = AV_PIX_FMT_NONE;
@@ -385,16 +389,19 @@ int parse_args(
}
case video2x::processors::ProcessorType::RealESRGAN: {
if (!vm.count("realesrgan-model")) {
video2x::logger()->critical("RealESRGAN model name must be set for RealESRGAN."
);
video2x::logger()->critical("The model name must be set for Real-ESRGAN.");
return -1;
}
if (proc_cfg.scaling_factor < 2 || proc_cfg.scaling_factor > 4) {
video2x::logger()->critical(
"Scaling factor must be set to 2, 3, or 4 for RealESRGAN."
"Scaling factor must be set to 2, 3, or 4 for Real-ESRGAN."
);
return -1;
}
if (proc_cfg.noise_level < -1 || proc_cfg.noise_level > 1) {
video2x::logger()->critical("Noise level must be 0 or 1 for Real-ESRGAN.");
return -1;
}
proc_cfg.processor_type = video2x::processors::ProcessorType::RealESRGAN;
video2x::processors::RealESRGANConfig realesrgan_config;
@@ -406,31 +413,31 @@ int parse_args(
}
case video2x::processors::ProcessorType::RealCUGAN: {
if (!vm.count("realcugan-model")) {
video2x::logger()->critical("RealCUGAN model name must be set for RealCUGAN.");
video2x::logger()->critical("The model name must be set for Real-CUGAN.");
return -1;
}
if (vm.count("realcugan-threads") && vm["realcugan-threads"].as<int>() < 1) {
video2x::logger()->critical(
"Number of threads must be at least 1 for RealCUGAN."
"Number of threads must be at least 1 for Real-CUGAN."
);
return -1;
}
if (vm.count("realcugan-syncgap") && (vm["realcugan-syncgap"].as<int>() < 0 ||
vm["realcugan-syncgap"].as<int>() > 3)) {
video2x::logger()->critical(
"Sync gap mode must be set to 0, 1, 2, or 3 for RealCUGAN."
"Sync gap mode must be set to 0, 1, 2, or 3 for Real-CUGAN."
);
return -1;
}
if (proc_cfg.scaling_factor < 2 || proc_cfg.scaling_factor > 4) {
video2x::logger()->critical(
"Scaling factor must be set to 2, 3, or 4 for RealCUGAN."
"Scaling factor must be set to 2, 3, or 4 for Real-CUGAN."
);
return -1;
}
if (proc_cfg.noise_level < -1 || proc_cfg.noise_level > 3) {
video2x::logger()->critical(
"Noise level must be set to -1, 0, 1, 2, or 3 for RealCUGAN."
"Noise level must be set to -1, 0, 1, 2, or 3 for Real-CUGAN."
);
return -1;
}
@@ -447,7 +454,7 @@ int parse_args(
}
case video2x::processors::ProcessorType::RIFE: {
if (!vm.count("rife-model")) {
video2x::logger()->critical("RIFE model name must be set for RIFE.");
video2x::logger()->critical("The model name must be set for RIFE.");
return -1;
}
if (proc_cfg.frm_rate_mul < 2) {

View File

@@ -24,7 +24,10 @@ void validate_anime4k_shader_name(const video2x::fsutils::StringType& shader_nam
void validate_realesrgan_model_name(const video2x::fsutils::StringType& model_name) {
static const std::unordered_set<video2x::fsutils::StringType> valid_realesrgan_models = {
STR("realesrgan-plus"), STR("realesrgan-plus-anime"), STR("realesr-animevideov3")
STR("realesrgan-plus"),
STR("realesrgan-plus-anime"),
STR("realesr-animevideov3"),
STR("realesr-generalv3"),
};
if (valid_realesrgan_models.count(model_name) == 0) {
throw po::validation_error(