mirror of
https://github.com/YaoFANGUK/video-subtitle-remover.git
synced 2026-04-12 23:27:35 +08:00
Some checks failed
Docker Build and Push / check-secrets (push) Successful in 3s
Docker Build and Push / build-and-push (cpu, latest) (push) Has been skipped
Docker Build and Push / build-and-push (cuda, 11.8) (push) Has been skipped
Docker Build and Push / build-and-push (cuda, 12.6) (push) Has been skipped
Docker Build and Push / build-and-push (cuda, 12.8) (push) Has been skipped
Docker Build and Push / build-and-push (directml, latest) (push) Has been skipped
Build Windows CPU / build (push) Has been cancelled
Build Windows CUDA 11.8 / build (push) Has been cancelled
Build Windows CUDA 12.6 / build (push) Has been cancelled
Build Windows CUDA 12.8 / build (push) Has been cancelled
Build Windows DirectML / build (push) Has been cancelled
- STTN Auto/Det: 统一 torch.no_grad 包裹,减少重复上下文切换开销 - STTN Auto: 添加 FramePrefetcher 帧预读取,根据 GPU 显存动态调整 batch size - Lama Inpaint: 新增 _inpaint_batch 批量推理,多帧合并一次 GPU 推理 - ProPainter: copy.deepcopy 替换为浅拷贝,每个区域处理后 gc.collect - HardwareAccelerator: 新增 get_available_vram_mb 显存查询方法 - README: 添加应用 Logo,同步英文版 README_en.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
155 lines
5.6 KiB
Python
155 lines
5.6 KiB
Python
import traceback
|
||
import importlib.util
|
||
|
||
import torch
|
||
|
||
from backend.config import tr
|
||
|
||
class HardwareAccelerator:
|
||
|
||
# 类变量,用于存储单例实例
|
||
_instance = None
|
||
|
||
@classmethod
|
||
def instance(cls):
|
||
"""获取单例实例"""
|
||
if cls._instance is None:
|
||
cls._instance = HardwareAccelerator()
|
||
cls._instance.initialize()
|
||
return cls._instance
|
||
|
||
def __init__(self):
|
||
self.__cuda = False
|
||
self.__dml = False
|
||
self.__mps = False
|
||
self.__onnx_providers = []
|
||
self.__enabled = True
|
||
self.__device = None
|
||
|
||
def initialize(self):
|
||
self.check_directml_available()
|
||
self.check_cuda_available()
|
||
self.check_mps_available()
|
||
self.load_onnx_providers()
|
||
|
||
def check_directml_available(self):
|
||
self.__dml = importlib.util.find_spec("torch_directml")
|
||
|
||
def check_cuda_available(self):
|
||
self.__cuda = torch.cuda.is_available()
|
||
|
||
def check_mps_available(self):
|
||
self.__mps = torch.backends.mps.is_available() and torch.backends.mps.is_built()
|
||
|
||
def load_onnx_providers(self):
|
||
try:
|
||
import onnxruntime as ort
|
||
available_providers = ort.get_available_providers()
|
||
for provider in available_providers:
|
||
if provider in [
|
||
"CPUExecutionProvider"
|
||
]:
|
||
continue
|
||
if provider not in [
|
||
"DmlExecutionProvider", # DirectML,适用于 Windows GPU
|
||
"ROCMExecutionProvider", # AMD ROCm
|
||
"MIGraphXExecutionProvider", # AMD MIGraphX
|
||
"VitisAIExecutionProvider", # AMD VitisAI,适用于 RyzenAI & Windows, 实测和DirectML性能似乎差不多
|
||
"OpenVINOExecutionProvider", # Intel GPU
|
||
"MetalExecutionProvider", # Apple macOS
|
||
"CoreMLExecutionProvider", # Apple macOS
|
||
"CUDAExecutionProvider", # Nvidia GPU
|
||
]:
|
||
print(tr['Main']['OnnxExectionProviderNotSupportedSkipped'].format(provider))
|
||
continue
|
||
print(tr['Main']['OnnxExecutionProviderDetected'].format(provider))
|
||
self.__onnx_providers.append(provider)
|
||
except ModuleNotFoundError as e:
|
||
print(tr['Main']['OnnxRuntimeNotInstall'])
|
||
|
||
def has_accelerator(self):
|
||
if not self.__enabled:
|
||
return False
|
||
return self.__cuda or self.__dml or self.__mps or len(self.__onnx_providers) > 0
|
||
|
||
@property
|
||
def accelerator_name(self):
|
||
if not self.__enabled:
|
||
return "CPU"
|
||
if self.__dml:
|
||
return "DirectML"
|
||
if self.__cuda:
|
||
return "GPU"
|
||
if self.__mps:
|
||
return "MPS"
|
||
elif len(self.__onnx_providers) > 0:
|
||
return ", ".join(self.__onnx_providers)
|
||
else:
|
||
return "CPU"
|
||
|
||
@property
|
||
def onnx_providers(self):
|
||
if not self.__enabled:
|
||
return ["CPUExecutionProvider"]
|
||
return self.__onnx_providers
|
||
|
||
def has_cuda(self):
|
||
if not self.__enabled:
|
||
return False
|
||
return self.__cuda
|
||
|
||
def has_mps(self):
|
||
if not self.__enabled:
|
||
return False
|
||
return self.__mps
|
||
|
||
def set_enabled(self, enable):
|
||
self.__enabled = enable
|
||
|
||
def get_available_vram_mb(self):
|
||
"""获取可用 GPU 显存(MB),无 GPU 返回 0"""
|
||
if not self.__enabled:
|
||
return 0
|
||
if self.__cuda:
|
||
try:
|
||
free_vram = torch.cuda.mem_get_info()[0] # (free, total)
|
||
return free_vram / (1024 * 1024)
|
||
except Exception:
|
||
return 0
|
||
if self.__mps:
|
||
try:
|
||
# MPS 没有直接查询接口,使用系统内存作为参考
|
||
import subprocess
|
||
result = subprocess.run(['sysctl', '-n', 'hw.memsize'], capture_output=True, text=True)
|
||
total_mem = int(result.stdout.strip()) / (1024 * 1024)
|
||
return total_mem * 0.5 # 保守估计可用一半
|
||
except Exception:
|
||
return 0
|
||
return 0
|
||
|
||
@property
|
||
def device(self):
|
||
"""
|
||
onnxruntime-directml 1.21.1-1.22.0(往上未测试) 和 torch-directml 不能同时初始化, 会相互影响
|
||
提示site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 266, in run
|
||
return self._sess.run(output_names, input_feed, run_options)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb2 in position 344: invalid start bn 344: invalid start byte
|
||
onnxruntime-directml 1.21.1 则正常, 但Win10跑不起来, Win11正常
|
||
为了避免冲突以及避免重写一个QPT智能部署流程, 这里采用延迟初始化的方式+继续使用onnxruntime-directml 1.20.1
|
||
当然SubtitleDetect放到一个独立进程去操作也是可以的
|
||
"""
|
||
if self.__enabled:
|
||
if self.__dml:
|
||
try:
|
||
import torch_directml
|
||
return torch_directml.device(torch_directml.default_device())
|
||
self.__dml = True
|
||
except:
|
||
traceback.print_exc()
|
||
self.__dml = False
|
||
if self.__cuda:
|
||
return torch.device("cuda:0")
|
||
if self.__mps:
|
||
return torch.device("mps")
|
||
return torch.device("cpu") |