Files
video-subtitle-remover/backend/tools/hardware_accelerator.py
flavioy e801d58e80
Some checks failed
Docker Build and Push / check-secrets (push) Successful in 3s
Docker Build and Push / build-and-push (cpu, latest) (push) Has been skipped
Docker Build and Push / build-and-push (cuda, 11.8) (push) Has been skipped
Docker Build and Push / build-and-push (cuda, 12.6) (push) Has been skipped
Docker Build and Push / build-and-push (cuda, 12.8) (push) Has been skipped
Docker Build and Push / build-and-push (directml, latest) (push) Has been skipped
Build Windows CPU / build (push) Has been cancelled
Build Windows CUDA 11.8 / build (push) Has been cancelled
Build Windows CUDA 12.6 / build (push) Has been cancelled
Build Windows CUDA 12.8 / build (push) Has been cancelled
Build Windows DirectML / build (push) Has been cancelled
GPU加速和批处理优化、更新README
- STTN Auto/Det: 统一 torch.no_grad 包裹,减少重复上下文切换开销
- STTN Auto: 添加 FramePrefetcher 帧预读取,根据 GPU 显存动态调整 batch size
- Lama Inpaint: 新增 _inpaint_batch 批量推理,多帧合并一次 GPU 推理
- ProPainter: copy.deepcopy 替换为浅拷贝,每个区域处理后 gc.collect
- HardwareAccelerator: 新增 get_available_vram_mb 显存查询方法
- README: 添加应用 Logo,同步英文版 README_en.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-08 00:17:50 +08:00

155 lines
5.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import traceback
import importlib.util
import torch
from backend.config import tr
class HardwareAccelerator:
# 类变量,用于存储单例实例
_instance = None
@classmethod
def instance(cls):
"""获取单例实例"""
if cls._instance is None:
cls._instance = HardwareAccelerator()
cls._instance.initialize()
return cls._instance
def __init__(self):
self.__cuda = False
self.__dml = False
self.__mps = False
self.__onnx_providers = []
self.__enabled = True
self.__device = None
def initialize(self):
self.check_directml_available()
self.check_cuda_available()
self.check_mps_available()
self.load_onnx_providers()
def check_directml_available(self):
self.__dml = importlib.util.find_spec("torch_directml")
def check_cuda_available(self):
self.__cuda = torch.cuda.is_available()
def check_mps_available(self):
self.__mps = torch.backends.mps.is_available() and torch.backends.mps.is_built()
def load_onnx_providers(self):
try:
import onnxruntime as ort
available_providers = ort.get_available_providers()
for provider in available_providers:
if provider in [
"CPUExecutionProvider"
]:
continue
if provider not in [
"DmlExecutionProvider", # DirectML适用于 Windows GPU
"ROCMExecutionProvider", # AMD ROCm
"MIGraphXExecutionProvider", # AMD MIGraphX
"VitisAIExecutionProvider", # AMD VitisAI适用于 RyzenAI & Windows, 实测和DirectML性能似乎差不多
"OpenVINOExecutionProvider", # Intel GPU
"MetalExecutionProvider", # Apple macOS
"CoreMLExecutionProvider", # Apple macOS
"CUDAExecutionProvider", # Nvidia GPU
]:
print(tr['Main']['OnnxExectionProviderNotSupportedSkipped'].format(provider))
continue
print(tr['Main']['OnnxExecutionProviderDetected'].format(provider))
self.__onnx_providers.append(provider)
except ModuleNotFoundError as e:
print(tr['Main']['OnnxRuntimeNotInstall'])
def has_accelerator(self):
if not self.__enabled:
return False
return self.__cuda or self.__dml or self.__mps or len(self.__onnx_providers) > 0
@property
def accelerator_name(self):
if not self.__enabled:
return "CPU"
if self.__dml:
return "DirectML"
if self.__cuda:
return "GPU"
if self.__mps:
return "MPS"
elif len(self.__onnx_providers) > 0:
return ", ".join(self.__onnx_providers)
else:
return "CPU"
@property
def onnx_providers(self):
if not self.__enabled:
return ["CPUExecutionProvider"]
return self.__onnx_providers
def has_cuda(self):
if not self.__enabled:
return False
return self.__cuda
def has_mps(self):
if not self.__enabled:
return False
return self.__mps
def set_enabled(self, enable):
self.__enabled = enable
def get_available_vram_mb(self):
"""获取可用 GPU 显存MB无 GPU 返回 0"""
if not self.__enabled:
return 0
if self.__cuda:
try:
free_vram = torch.cuda.mem_get_info()[0] # (free, total)
return free_vram / (1024 * 1024)
except Exception:
return 0
if self.__mps:
try:
# MPS 没有直接查询接口,使用系统内存作为参考
import subprocess
result = subprocess.run(['sysctl', '-n', 'hw.memsize'], capture_output=True, text=True)
total_mem = int(result.stdout.strip()) / (1024 * 1024)
return total_mem * 0.5 # 保守估计可用一半
except Exception:
return 0
return 0
@property
def device(self):
"""
onnxruntime-directml 1.21.1-1.22.0(往上未测试) 和 torch-directml 不能同时初始化, 会相互影响
提示site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 266, in run
return self._sess.run(output_names, input_feed, run_options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb2 in position 344: invalid start bn 344: invalid start byte
onnxruntime-directml 1.21.1 则正常, 但Win10跑不起来, Win11正常
为了避免冲突以及避免重写一个QPT智能部署流程, 这里采用延迟初始化的方式+继续使用onnxruntime-directml 1.20.1
当然SubtitleDetect放到一个独立进程去操作也是可以的
"""
if self.__enabled:
if self.__dml:
try:
import torch_directml
return torch_directml.device(torch_directml.default_device())
self.__dml = True
except:
traceback.print_exc()
self.__dml = False
if self.__cuda:
return torch.device("cuda:0")
if self.__mps:
return torch.device("mps")
return torch.device("cpu")