feat: 新增视频分镜

close #16
2026-02-13 09:14:40 +08:00 · 2025-12-03 08:53:00 +08:00
parent 4d5fe919f0
commit 3fdf7a6ac3
3 changed files with 150 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -22,7 +22,6 @@
  - [管理后台](#管理后台)
  - [API 调用](#api-调用)
  - [视频角色功能](#视频角色功能)
- [常见问题](#常见问题)
 - [许可证](#许可证)

 ---
@@ -129,6 +128,7 @@ python main.py
 | 创建角色 | `sora-video*` | 使用 `content` 数组 + `video_url` |
 | 角色生成视频 | `sora-video*` | 使用 `content` 数组 + `video_url` + 文本 |
 | Remix | `sora-video*` | 在 `content` 中包含 Remix ID |
+| 视频分镜 | `sora-video*` | 在 `content` 中使用```[时长s]提示词```格式触发 |

 ---

@@ -258,6 +258,8 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \

 **视频Remix（基于已有视频继续创作）**

+* 提示词内包含remix分享链接或id即可
+
 ```bash
 curl -X POST "http://localhost:8000/v1/chat/completions" \
  -H "Authorization: Bearer han1234" \
@@ -273,6 +275,32 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \
  }'
 ```

+**视频分镜**
+
+* 示例触发提示词：
+  ```[5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 [10.0s]猫猫在田野奔跑```
+* 或
+  ```text
+  [5.0s]猫猫从飞机上跳伞
+  [5.0s]猫猫降落
+  [10.0s]猫猫在田野奔跑
+  ```
+
+```bash
+curl -X POST "http://localhost:8000/v1/chat/completions" \
+  -H "Authorization: Bearer han1234" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "sora-video-landscape-10s",
+    "messages": [
+      {
+        "role": "user",
+        "content": "[5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 [10.0s]猫猫在田野奔跑"
+      }
+    ]
+  }'
+```
+
 ### 视频角色功能

 Sora2API 支持**视频角色生成**功能。
--- a/src/services/generation_handler.py
+++ b/src/services/generation_handler.py
@@ -339,12 +339,31 @@ class GenerationHandler:
                # Get n_frames from model configuration
                n_frames = model_config.get("n_frames", 300)  # Default to 300 frames (10s)

-                task_id = await self.sora_client.generate_video(
-                    prompt, token_obj.token,
-                    orientation=model_config["orientation"],
-                    media_id=media_id,
-                    n_frames=n_frames
-                )
+                # Check if prompt is in storyboard format
+                if self.sora_client.is_storyboard_prompt(prompt):
+                    # Storyboard mode
+                    if stream:
+                        yield self._format_stream_chunk(
+                            reasoning_content="Detected storyboard format. Converting to storyboard API format...\n"
+                        )
+
+                    formatted_prompt = self.sora_client.format_storyboard_prompt(prompt)
+                    debug_logger.log_info(f"Storyboard mode detected. Formatted prompt: {formatted_prompt}")
+
+                    task_id = await self.sora_client.generate_storyboard(
+                        formatted_prompt, token_obj.token,
+                        orientation=model_config["orientation"],
+                        media_id=media_id,
+                        n_frames=n_frames
+                    )
+                else:
+                    # Normal video generation
+                    task_id = await self.sora_client.generate_video(
+                        prompt, token_obj.token,
+                        orientation=model_config["orientation"],
+                        media_id=media_id,
+                        n_frames=n_frames
+                    )
            else:
                task_id = await self.sora_client.generate_image(
                    prompt, token_obj.token,
--- a/src/services/sora_client.py
+++ b/src/services/sora_client.py
@@ -4,7 +4,8 @@ import io
 import time
 import random
 import string
-from typing import Optional, Dict, Any
+import re
+from typing import Optional, Dict, Any, Tuple
 from curl_cffi.requests import AsyncSession
 from curl_cffi import CurlMime
 from .proxy_manager import ProxyManager
@@ -29,7 +30,56 @@ class SoraClient:
        length = random.randint(10, 20)
        random_str = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
        return random_str
-    
+
+    @staticmethod
+    def is_storyboard_prompt(prompt: str) -> bool:
+        """检测提示词是否为分镜模式格式
+
+        格式: [time]prompt 或 [time]prompt\n[time]prompt
+        例如: [5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落
+
+        Args:
+            prompt: 用户输入的提示词
+
+        Returns:
+            True if prompt matches storyboard format
+        """
+        if not prompt:
+            return False
+        # 匹配格式: [数字s] 或 [数字.数字s]
+        pattern = r'\[\d+(?:\.\d+)?s\]'
+        matches = re.findall(pattern, prompt)
+        # 至少包含一个时间标记才认为是分镜模式
+        return len(matches) >= 1
+
+    @staticmethod
+    def format_storyboard_prompt(prompt: str) -> str:
+        """将分镜格式提示词转换为API所需格式
+
+        输入: [5.0s]猫猫从飞机上跳伞 [5.0s]猫猫降落 [1.0s]猫猫屋顶跑酷
+        输出: Shot 1:\nduration: 5.0sec\nScene: 猫猫从飞机上跳伞\n\nShot 2:\nduration: 5.0sec\nScene: 猫猫降落\n\nShot 3:\nduration: 1.0sec\nScene: 猫猫屋顶跑酷
+
+        Args:
+            prompt: 原始分镜格式提示词
+
+        Returns:
+            格式化后的API提示词
+        """
+        # 匹配 [时间]内容 的模式
+        pattern = r'\[(\d+(?:\.\d+)?)s\]\s*([^\[]+)'
+        matches = re.findall(pattern, prompt)
+
+        if not matches:
+            return prompt
+
+        formatted_shots = []
+        for idx, (duration, scene) in enumerate(matches, 1):
+            scene = scene.strip()
+            shot = f"Shot {idx}:\nduration: {duration}sec\nScene: {scene}"
+            formatted_shots.append(shot)
+
+        return "\n\n".join(formatted_shots)
+
    async def _make_request(self, method: str, endpoint: str, token: str,
                           json_data: Optional[Dict] = None,
                           multipart: Optional[Dict] = None,
@@ -612,3 +662,47 @@ class SoraClient:

        result = await self._make_request("POST", "/nf/create", token, json_data=json_data, add_sentinel_token=True)
        return result.get("id")
+
+    async def generate_storyboard(self, prompt: str, token: str, orientation: str = "landscape",
+                                 media_id: Optional[str] = None, n_frames: int = 450) -> str:
+        """Generate video using storyboard mode
+
+        Args:
+            prompt: Formatted storyboard prompt (Shot 1:\nduration: 5.0sec\nScene: ...)
+            token: Access token
+            orientation: Video orientation (portrait/landscape)
+            media_id: Optional image media_id for image-to-video
+            n_frames: Number of frames
+
+        Returns:
+            task_id
+        """
+        inpaint_items = []
+        if media_id:
+            inpaint_items = [{
+                "kind": "upload",
+                "upload_id": media_id
+            }]
+
+        json_data = {
+            "kind": "video",
+            "prompt": prompt,
+            "title": "Draft your video",
+            "orientation": orientation,
+            "size": "small",
+            "n_frames": n_frames,
+            "storyboard_id": None,
+            "inpaint_items": inpaint_items,
+            "remix_target_id": None,
+            "model": "sy_8",
+            "metadata": None,
+            "style_id": None,
+            "cameo_ids": None,
+            "cameo_replacements": None,
+            "audio_caption": None,
+            "audio_transcript": None,
+            "video_caption": None
+        }
+
+        result = await self._make_request("POST", "/nf/create/storyboard", token, json_data=json_data, add_sentinel_token=True)
+        return result.get("id")