InternLM · lvhan028 · May 20, 2025 · May 15, 2025 · May 17, 2025 · May 17, 2025
diff --git a/lmdeploy/model.py b/lmdeploy/model.py
@@ -1135,6 +1135,84 @@ def match(cls, model_path: str) -> Optional[str]:
             return 'qwq'
 
 
+@MODELS.register_module(name='qwen3')
+class Qwen3(Qwen2d5Chat):
+
+    def __init__(self, meta_instruction='', **kwargs):
+        super().__init__(meta_instruction=meta_instruction, **kwargs)
+
+    def messages2prompt(self, messages, sequence_start=True, tools=None, enable_thinking=None, **kwargs):
+        if isinstance(messages, str):
+            prompt = self.get_prompt(messages, sequence_start)
+        prompt = super().messages2prompt(messages, sequence_start, tools, **kwargs)
+
+        # enable_thinking parameter
+        if enable_thinking is not None:
+            no_think = not enable_thinking
+        else:
+            no_think = False
+
+            system_content = ''
+            for msg in messages:
+                if msg.get('role') == 'system':
+                    content = msg.get('content', '')
+                    if isinstance(content, list):
+                        system_content = ' '.join(content)
+                    elif isinstance(content, str):
+                        system_content = content
+
+            user_content = ''
+            for msg in reversed(messages):
+                if msg.get('role') == 'user':
+                    content = msg.get('content', '')
+                    if isinstance(content, list):
+                        user_content = ' '.join(content)
+                    elif isinstance(content, str):
+                        user_content = content
+                    break
+
+            # check /no_think or /think in user_content
+            user_has_no_think = '/no_think' in user_content.lower()
+            user_has_think = '/think' in user_content.lower()
+
+            # check /no_think or /think in system_content
+            system_has_no_think = '/no_think' in system_content.lower()
+            system_has_think = '/think' in system_content.lower()
+
+            # default no_think = True
+            if user_has_no_think and not user_has_think:
+                no_think = True
+            elif not user_has_no_think and user_has_think:
+                no_think = False
+            elif user_has_no_think and user_has_think:
+                no_think = False
+            elif not user_has_no_think and not user_has_think:
+                if system_has_no_think and not system_has_think:
+                    no_think = True
+                elif not system_has_no_think and system_has_think:
+                    no_think = False
+                elif system_has_no_think and system_has_think:
+                    no_think = False
+                elif not system_has_no_think and not system_has_think:
+                    no_think = True
+
+        if no_think:
+            prompt += '<think>\n\n</think>\n\n'
+
+        return prompt
+
+    @classmethod
+    def match(cls, model_path: str) -> Optional[str]:
+        """Return the model_name that was registered to MODELS.
+
+        Args:
+            model_path (str): the model path used for matching.
+        """
+        lower_path = model_path.lower()
+        if 'qwen3' in lower_path:
+            return 'qwen3'
+
+
 @MODELS.register_module(name='codellama')
 class CodeLlama(Llama2):
 

diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py
@@ -558,6 +558,7 @@ async def _get_prompt_input(self,
                                 sequence_start: bool,
                                 adapter_name: str,
                                 tools: Optional[List[object]] = None,
+                                enable_thinking: Optional[bool] = None,
                                 **kwargs):
         if do_preprocess:
             # use adapter's chat template if possible
@@ -566,7 +567,7 @@ async def _get_prompt_input(self,
                 chat_template = MODELS.module_dict[adapter_name]()
         else:
             chat_template = BaseChatTemplate()
-        prompt = chat_template.messages2prompt(prompt, sequence_start, tools=tools)
+        prompt = chat_template.messages2prompt(prompt, sequence_start, tools=tools, enable_thinking=enable_thinking)
         if prompt is None:
             raise ValueError(
                 f'You are using base template to handle chat task. Please specify a `--chat-template` name chosen from `lmdeploy list` if you want to use OpenAI messages input.'  # noqa
@@ -616,6 +617,7 @@ async def generate(
             skip_stop_tokens: bool = True,
             rewind_stop_tokens: bool = False,
             input_ids: Optional[List] = None,
+            enable_thinking: Optional[bool] = None,
             **kwargs):
         """Generate responses.
 
@@ -670,7 +672,8 @@ async def generate(
                                                         do_preprocess,
                                                         sequence_start,
                                                         adapter_name,
-                                                        tools=tools)
+                                                        tools=tools,
+                                                        enable_thinking=enable_thinking)
             prompt = prompt_input['prompt']
             input_ids = prompt_input['input_ids']
             self.request_logger.log_inputs(session_id=session_id,

diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
@@ -427,6 +427,7 @@ async def chat_completions_v1(raw_request: Request = None):
         sequence_end=True,
         do_preprocess=not isinstance(request.messages, str),  # text completion for string input
         adapter_name=adapter_name,
+        enable_thinking=request.enable_thinking,
     )
 
     def create_stream_response_json(index: int,

diff --git a/lmdeploy/serve/openai/protocol.py b/lmdeploy/serve/openai/protocol.py
@@ -136,6 +136,7 @@ class ChatCompletionRequest(BaseModel):
     seed: Optional[int] = None
     min_new_tokens: Optional[int] = Field(default=None, examples=[None])
     min_p: float = 0.0
+    enable_thinking: Optional[bool] = None
 
 
 class FunctionCall(BaseModel):