[Minor] Remove TGI launching script (#15646)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-03-28 09:30:08 -07:00 · 2025-03-28 09:30:08 -07:00 · 70e132244a
commit 70e132244a
parent 47e9038d23
3 changed files with 0 additions and 22 deletions
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@ -7,9 +7,6 @@ On the server side, run one of the following commands:
        --swap-space 16 \
        --disable-log-requests

-    (TGI backend)
-    ./launch_tgi_server.sh <your_model> <max_batch_total_tokens>
-
 On the client side, run:
    python benchmarks/benchmark_serving.py \
        --backend <backend> \
--- a/benchmarks/benchmark_serving_structured_output.py
+++ b/benchmarks/benchmark_serving_structured_output.py
@ -5,9 +5,6 @@ On the server side, run one of the following commands:
    (vLLM OpenAI API server)
    vllm serve <your_model> --disable-log-requests

-    (TGI backend)
-    ./launch_tgi_server.sh <your_model> <max_batch_total_tokens>
-
 On the client side, run:
    python benchmarks/benchmark_serving_structured_output.py \
        --backend <backend> \
--- a/benchmarks/launch_tgi_server.sh
+++ b/benchmarks/launch_tgi_server.sh
@ -1,16 +0,0 @@
-#!/bin/bash
-
-PORT=8000
-MODEL=$1
-TOKENS=$2
-
-docker run -e "HF_TOKEN=$HF_TOKEN" --gpus all --shm-size 1g -p $PORT:80 \
-           -v "$PWD/data:/data" \
-           ghcr.io/huggingface/text-generation-inference:2.2.0 \
-           --model-id "$MODEL" \
-           --sharded false  \
-           --max-input-length 1024 \
-           --max-total-tokens 2048 \
-           --max-best-of 5 \
-           --max-concurrent-requests 5000 \
-           --max-batch-total-tokens "$TOKENS"