diff --git a/src/api/routes.py b/src/api/routes.py
index c515a1a..2cd4675 100644
--- a/src/api/routes.py
+++ b/src/api/routes.py
@@ -151,7 +151,6 @@ async def create_chat_completion(
                     result = chunk
 
                 if result:
-                    import json
                     return JSONResponse(content=json.loads(result))
                 else:
                     return JSONResponse(
@@ -169,7 +168,6 @@ async def create_chat_completion(
         # Handle streaming
         if request.stream:
             async def generate():
-                import json as json_module  # Import inside function to avoid scope issues
                 try:
                     async for chunk in generation_handler.handle_generation(
                         model=request.model,
@@ -184,7 +182,7 @@ async def create_chat_completion(
                     # Try to parse structured error (JSON format)
                     error_data = None
                     try:
-                        error_data = json_module.loads(str(e))
+                        error_data = json.loads(str(e))
                     except:
                         pass
 
@@ -202,7 +200,7 @@ async def create_chat_completion(
                                 "code": None
                             }
                         }
-                    error_chunk = f'data: {json_module.dumps(error_response)}\n\n'
+                    error_chunk = f'data: {json.dumps(error_response)}\n\n'
                     yield error_chunk
                     yield 'data: [DONE]\n\n'
 
@@ -229,7 +227,6 @@ async def create_chat_completion(
                 result = chunk
 
             if result:
-                import json
                 return JSONResponse(content=json.loads(result))
             else:
                 # Return OpenAI-compatible error format
diff --git a/src/services/generation_handler.py b/src/services/generation_handler.py
index e8fb79e..81c94d8 100644
--- a/src/services/generation_handler.py
+++ b/src/services/generation_handler.py
@@ -600,7 +600,6 @@ class GenerationHandler:
             # Parse error message to check if it's a structured error (JSON)
             error_response = None
             try:
-                import json
                 error_response = json.loads(str(e))
             except:
                 pass