diff --git a/README.md b/README.md index ded361f..0bff971 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,6 @@ - [管理后台](#管理后台) - [API 调用](#api-调用) - [视频角色功能](#视频角色功能) -- [常见问题](#常见问题) - [许可证](#许可证) --- @@ -129,6 +128,7 @@ python main.py | 创建角色 | `sora-video*` | 使用 `content` 数组 + `video_url` | | 角色生成视频 | `sora-video*` | 使用 `content` 数组 + `video_url` + 文本 | | Remix | `sora-video*` | 在 `content` 中包含 Remix ID | +| 视频分镜 | `sora-video*` | 在 `content` 中使用```[时长s]提示词```格式触发 | --- @@ -258,6 +258,8 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \ **视频Remix(基于已有视频继续创作)** +* 提示词内包含remix分享链接或id即可 + ```bash curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Authorization: Bearer han1234" \ @@ -273,6 +275,32 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \ }' ``` +**视频分镜** + +* 示例触发提示词: + ```[5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 [10.0s]猫猫在田野奔跑``` +* 或 + ```text + [5.0s]猫猫从飞机上跳伞 + [5.0s]猫猫降落 + [10.0s]猫猫在田野奔跑 + ``` + +```bash +curl -X POST "http://localhost:8000/v1/chat/completions" \ + -H "Authorization: Bearer han1234" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "sora-video-landscape-10s", + "messages": [ + { + "role": "user", + "content": "[5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 [10.0s]猫猫在田野奔跑" + } + ] + }' +``` + ### 视频角色功能 Sora2API 支持**视频角色生成**功能。 diff --git a/src/services/generation_handler.py b/src/services/generation_handler.py index f5d018e..801dd05 100644 --- a/src/services/generation_handler.py +++ b/src/services/generation_handler.py @@ -339,12 +339,31 @@ class GenerationHandler: # Get n_frames from model configuration n_frames = model_config.get("n_frames", 300) # Default to 300 frames (10s) - task_id = await self.sora_client.generate_video( - prompt, token_obj.token, - orientation=model_config["orientation"], - media_id=media_id, - n_frames=n_frames - ) + # Check if prompt is in storyboard format + if self.sora_client.is_storyboard_prompt(prompt): + # Storyboard mode + if stream: + yield self._format_stream_chunk( + reasoning_content="Detected storyboard format. Converting to storyboard API format...\n" + ) + + formatted_prompt = self.sora_client.format_storyboard_prompt(prompt) + debug_logger.log_info(f"Storyboard mode detected. Formatted prompt: {formatted_prompt}") + + task_id = await self.sora_client.generate_storyboard( + formatted_prompt, token_obj.token, + orientation=model_config["orientation"], + media_id=media_id, + n_frames=n_frames + ) + else: + # Normal video generation + task_id = await self.sora_client.generate_video( + prompt, token_obj.token, + orientation=model_config["orientation"], + media_id=media_id, + n_frames=n_frames + ) else: task_id = await self.sora_client.generate_image( prompt, token_obj.token, diff --git a/src/services/sora_client.py b/src/services/sora_client.py index da1eaa3..a49e5f9 100644 --- a/src/services/sora_client.py +++ b/src/services/sora_client.py @@ -4,7 +4,8 @@ import io import time import random import string -from typing import Optional, Dict, Any +import re +from typing import Optional, Dict, Any, Tuple from curl_cffi.requests import AsyncSession from curl_cffi import CurlMime from .proxy_manager import ProxyManager @@ -29,7 +30,56 @@ class SoraClient: length = random.randint(10, 20) random_str = ''.join(random.choices(string.ascii_letters + string.digits, k=length)) return random_str - + + @staticmethod + def is_storyboard_prompt(prompt: str) -> bool: + """检测提示词是否为分镜模式格式 + + 格式: [time]prompt 或 [time]prompt\n[time]prompt + 例如: [5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 + + Args: + prompt: 用户输入的提示词 + + Returns: + True if prompt matches storyboard format + """ + if not prompt: + return False + # 匹配格式: [数字s] 或 [数字.数字s] + pattern = r'\[\d+(?:\.\d+)?s\]' + matches = re.findall(pattern, prompt) + # 至少包含一个时间标记才认为是分镜模式 + return len(matches) >= 1 + + @staticmethod + def format_storyboard_prompt(prompt: str) -> str: + """将分镜格式提示词转换为API所需格式 + + 输入: [5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 [1.0s]猫猫屋顶跑酷 + 输出: Shot 1:\nduration: 5.0sec\nScene: 猫猫从飞机上跳伞\n\nShot 2:\nduration: 5.0sec\nScene: 猫猫降落\n\nShot 3:\nduration: 1.0sec\nScene: 猫猫屋顶跑酷 + + Args: + prompt: 原始分镜格式提示词 + + Returns: + 格式化后的API提示词 + """ + # 匹配 [时间]内容 的模式 + pattern = r'\[(\d+(?:\.\d+)?)s\]\s*([^\[]+)' + matches = re.findall(pattern, prompt) + + if not matches: + return prompt + + formatted_shots = [] + for idx, (duration, scene) in enumerate(matches, 1): + scene = scene.strip() + shot = f"Shot {idx}:\nduration: {duration}sec\nScene: {scene}" + formatted_shots.append(shot) + + return "\n\n".join(formatted_shots) + async def _make_request(self, method: str, endpoint: str, token: str, json_data: Optional[Dict] = None, multipart: Optional[Dict] = None, @@ -612,3 +662,47 @@ class SoraClient: result = await self._make_request("POST", "/nf/create", token, json_data=json_data, add_sentinel_token=True) return result.get("id") + + async def generate_storyboard(self, prompt: str, token: str, orientation: str = "landscape", + media_id: Optional[str] = None, n_frames: int = 450) -> str: + """Generate video using storyboard mode + + Args: + prompt: Formatted storyboard prompt (Shot 1:\nduration: 5.0sec\nScene: ...) + token: Access token + orientation: Video orientation (portrait/landscape) + media_id: Optional image media_id for image-to-video + n_frames: Number of frames + + Returns: + task_id + """ + inpaint_items = [] + if media_id: + inpaint_items = [{ + "kind": "upload", + "upload_id": media_id + }] + + json_data = { + "kind": "video", + "prompt": prompt, + "title": "Draft your video", + "orientation": orientation, + "size": "small", + "n_frames": n_frames, + "storyboard_id": None, + "inpaint_items": inpaint_items, + "remix_target_id": None, + "model": "sy_8", + "metadata": None, + "style_id": None, + "cameo_ids": None, + "cameo_replacements": None, + "audio_caption": None, + "audio_transcript": None, + "video_caption": None + } + + result = await self._make_request("POST", "/nf/create/storyboard", token, json_data=json_data, add_sentinel_token=True) + return result.get("id")