diff --git a/examples/offline_inference/profiling.py b/examples/offline_inference/profiling.py index ffa76b4e..6e1d4722 100644 --- a/examples/offline_inference/profiling.py +++ b/examples/offline_inference/profiling.py @@ -234,9 +234,8 @@ def run_profile(context: ProfileContext, csv_output: Optional[str], sampling_params.max_tokens = next(output_len_generator) assert isinstance(sampling_params.max_tokens, int) - prompt_token_ids = torch.randint( - llm.llm_engine.model_config.get_vocab_size(), - size=(prompt_len, )).tolist() + prompt_token_ids = torch.randint(llm.get_tokenizer().vocab_size, + size=(prompt_len, )).tolist() llm.llm_engine.add_request( request_id=f"seq{i}",