feat: 新增视频分镜

close #16
This commit is contained in:
TheSmallHanCat
2025-12-03 08:53:00 +08:00
parent 4d5fe919f0
commit 3fdf7a6ac3
3 changed files with 150 additions and 9 deletions

View File

@@ -22,7 +22,6 @@
- [管理后台](#管理后台)
- [API 调用](#api-调用)
- [视频角色功能](#视频角色功能)
- [常见问题](#常见问题)
- [许可证](#许可证)
---
@@ -129,6 +128,7 @@ python main.py
| 创建角色 | `sora-video*` | 使用 `content` 数组 + `video_url` |
| 角色生成视频 | `sora-video*` | 使用 `content` 数组 + `video_url` + 文本 |
| Remix | `sora-video*` | 在 `content` 中包含 Remix ID |
| 视频分镜 | `sora-video*` | 在 `content` 中使用```[时长s]提示词```格式触发 |
---
@@ -258,6 +258,8 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \
**视频Remix基于已有视频继续创作**
* 提示词内包含remix分享链接或id即可
```bash
curl -X POST "http://localhost:8000/v1/chat/completions" \
-H "Authorization: Bearer han1234" \
@@ -273,6 +275,32 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \
}'
```
**视频分镜**
* 示例触发提示词:
```[5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 [10.0s]猫猫在田野奔跑```
* 或
```text
[5.0s]猫猫从飞机上跳伞
[5.0s]猫猫降落
[10.0s]猫猫在田野奔跑
```
```bash
curl -X POST "http://localhost:8000/v1/chat/completions" \
-H "Authorization: Bearer han1234" \
-H "Content-Type: application/json" \
-d '{
"model": "sora-video-landscape-10s",
"messages": [
{
"role": "user",
"content": "[5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 [10.0s]猫猫在田野奔跑"
}
]
}'
```
### 视频角色功能
Sora2API 支持**视频角色生成**功能。

View File

@@ -339,12 +339,31 @@ class GenerationHandler:
# Get n_frames from model configuration
n_frames = model_config.get("n_frames", 300) # Default to 300 frames (10s)
task_id = await self.sora_client.generate_video(
prompt, token_obj.token,
orientation=model_config["orientation"],
media_id=media_id,
n_frames=n_frames
)
# Check if prompt is in storyboard format
if self.sora_client.is_storyboard_prompt(prompt):
# Storyboard mode
if stream:
yield self._format_stream_chunk(
reasoning_content="Detected storyboard format. Converting to storyboard API format...\n"
)
formatted_prompt = self.sora_client.format_storyboard_prompt(prompt)
debug_logger.log_info(f"Storyboard mode detected. Formatted prompt: {formatted_prompt}")
task_id = await self.sora_client.generate_storyboard(
formatted_prompt, token_obj.token,
orientation=model_config["orientation"],
media_id=media_id,
n_frames=n_frames
)
else:
# Normal video generation
task_id = await self.sora_client.generate_video(
prompt, token_obj.token,
orientation=model_config["orientation"],
media_id=media_id,
n_frames=n_frames
)
else:
task_id = await self.sora_client.generate_image(
prompt, token_obj.token,

View File

@@ -4,7 +4,8 @@ import io
import time
import random
import string
from typing import Optional, Dict, Any
import re
from typing import Optional, Dict, Any, Tuple
from curl_cffi.requests import AsyncSession
from curl_cffi import CurlMime
from .proxy_manager import ProxyManager
@@ -29,7 +30,56 @@ class SoraClient:
length = random.randint(10, 20)
random_str = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
return random_str
@staticmethod
def is_storyboard_prompt(prompt: str) -> bool:
"""检测提示词是否为分镜模式格式
格式: [time]prompt 或 [time]prompt\n[time]prompt
例如: [5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落
Args:
prompt: 用户输入的提示词
Returns:
True if prompt matches storyboard format
"""
if not prompt:
return False
# 匹配格式: [数字s] 或 [数字.数字s]
pattern = r'\[\d+(?:\.\d+)?s\]'
matches = re.findall(pattern, prompt)
# 至少包含一个时间标记才认为是分镜模式
return len(matches) >= 1
@staticmethod
def format_storyboard_prompt(prompt: str) -> str:
"""将分镜格式提示词转换为API所需格式
输入: [5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 [1.0s]猫猫屋顶跑酷
输出: Shot 1:\nduration: 5.0sec\nScene: 猫猫从飞机上跳伞\n\nShot 2:\nduration: 5.0sec\nScene: 猫猫降落\n\nShot 3:\nduration: 1.0sec\nScene: 猫猫屋顶跑酷
Args:
prompt: 原始分镜格式提示词
Returns:
格式化后的API提示词
"""
# 匹配 [时间]内容 的模式
pattern = r'\[(\d+(?:\.\d+)?)s\]\s*([^\[]+)'
matches = re.findall(pattern, prompt)
if not matches:
return prompt
formatted_shots = []
for idx, (duration, scene) in enumerate(matches, 1):
scene = scene.strip()
shot = f"Shot {idx}:\nduration: {duration}sec\nScene: {scene}"
formatted_shots.append(shot)
return "\n\n".join(formatted_shots)
async def _make_request(self, method: str, endpoint: str, token: str,
json_data: Optional[Dict] = None,
multipart: Optional[Dict] = None,
@@ -612,3 +662,47 @@ class SoraClient:
result = await self._make_request("POST", "/nf/create", token, json_data=json_data, add_sentinel_token=True)
return result.get("id")
async def generate_storyboard(self, prompt: str, token: str, orientation: str = "landscape",
media_id: Optional[str] = None, n_frames: int = 450) -> str:
"""Generate video using storyboard mode
Args:
prompt: Formatted storyboard prompt (Shot 1:\nduration: 5.0sec\nScene: ...)
token: Access token
orientation: Video orientation (portrait/landscape)
media_id: Optional image media_id for image-to-video
n_frames: Number of frames
Returns:
task_id
"""
inpaint_items = []
if media_id:
inpaint_items = [{
"kind": "upload",
"upload_id": media_id
}]
json_data = {
"kind": "video",
"prompt": prompt,
"title": "Draft your video",
"orientation": orientation,
"size": "small",
"n_frames": n_frames,
"storyboard_id": None,
"inpaint_items": inpaint_items,
"remix_target_id": None,
"model": "sy_8",
"metadata": None,
"style_id": None,
"cameo_ids": None,
"cameo_replacements": None,
"audio_caption": None,
"audio_transcript": None,
"video_caption": None
}
result = await self._make_request("POST", "/nf/create/storyboard", token, json_data=json_data, add_sentinel_token=True)
return result.get("id")