# SPDX-License-Identifier: Apache-2.0 from typing import List import pytest from transformers import AutoTokenizer from tests.entrypoints.openai.reasoning_parsers.utils import ( run_reasoning_extraction) from vllm.entrypoints.openai.reasoning_parsers import (ReasoningParser, ReasoningParserManager) parser_name = "deepseek_r1" start_token = "" end_token = "" SIMPLE_REASONING = { "output": "This is a reasoning sectionThis is the rest", "reasoning_content": "This is a reasoning section", "content": "This is the rest", } COMPLETE_REASONING = { "output": "This is a reasoning section", "reasoning_content": "This is a reasoning section", "content": None, } NO_REASONING = { "output": "This is content", "reasoning_content": None, "content": "This is content", } NO_REASONING_STREAMING = { "output": "This is a reasoning section", "reasoning_content": "This is a reasoning section", "content": None, } MULTIPLE_LINES = { "output": "This\nThatThis is the rest\nThat", "reasoning_content": "This\nThat", "content": "This is the rest\nThat", } SHORTEST_REASONING_NO_STREAMING = { "output": "This is the rest", "reasoning_content": "", "content": "This is the rest", } SHORTEST_REASONING = { "output": "This is the rest", "reasoning_content": None, "content": "This is the rest", } REASONING_WITH_THINK = { "output": "This is a reasoning sectionThis is the rest", "reasoning_content": "This is a reasoning section", "content": "This is the rest", } COMPLETE_REASONING_WITH_THINK = { "output": "This is a reasoning section", "reasoning_content": "This is a reasoning section", "content": None, } MULTIPLE_LINES_WITH_THINK = { "output": "This\nThatThis is the rest\nThat", "reasoning_content": "This\nThat", "content": "This is the rest\nThat", } SHORTEST_REASONING_NO_STREAMING_WITH_THINK = { "output": "This is the rest", "reasoning_content": "", "content": "This is the rest", } SHORTEST_REASONING_WITH_THINK = { "output": "This is the rest", "reasoning_content": None, "content": "This is the rest", } TEST_CASES = [ pytest.param( False, SIMPLE_REASONING, id="simple_reasoning", ), pytest.param( True, SIMPLE_REASONING, id="simple_reasoning_streaming", ), pytest.param( False, COMPLETE_REASONING, id="complete_reasoning", ), pytest.param( True, COMPLETE_REASONING, id="complete_reasoning_streaming", ), pytest.param( False, NO_REASONING, id="no_reasoning_token", ), pytest.param( True, NO_REASONING_STREAMING, id="no_reasoning_token_streaming", ), pytest.param( False, MULTIPLE_LINES, id="multiple_lines", ), pytest.param( True, MULTIPLE_LINES, id="multiple_lines_streaming", ), pytest.param( True, SHORTEST_REASONING, id="shortest", ), pytest.param( False, SHORTEST_REASONING_NO_STREAMING, id="shortest_streaming", ), pytest.param( False, REASONING_WITH_THINK, id="reasoning_with_think", ), pytest.param( True, REASONING_WITH_THINK, id="reasoning_with_think_streaming", ), pytest.param( False, COMPLETE_REASONING_WITH_THINK, id="complete_reasoning_with_think", ), pytest.param( True, COMPLETE_REASONING_WITH_THINK, id="complete_reasoning_with_think_streaming", ), pytest.param( False, MULTIPLE_LINES_WITH_THINK, id="multiple_lines_with_think", ), pytest.param( True, MULTIPLE_LINES_WITH_THINK, id="multiple_lines_with_think_streaming", ), pytest.param( False, SHORTEST_REASONING_NO_STREAMING_WITH_THINK, id="shortest_with_think", ), pytest.param( True, SHORTEST_REASONING_WITH_THINK, id="shortest_with_think_streaming", ), ] # Global tokenizer initialization to avoid repeated loading tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m") tokenizer.add_tokens([start_token, end_token]) @pytest.mark.parametrize("streaming, param_dict", TEST_CASES) def test_reasoning( streaming: bool, param_dict: dict, ): output = tokenizer.tokenize(param_dict["output"]) # decode everything to tokens output_tokens: List[str] = [ tokenizer.convert_tokens_to_string([token]) for token in output ] parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser( parser_name)(tokenizer) reasoning, content = run_reasoning_extraction(parser, output_tokens, streaming=streaming) assert reasoning == param_dict["reasoning_content"] assert content == param_dict["content"]