diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 82c6b426..e2f712df 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -7,9 +7,6 @@ On the server side, run one of the following commands:
         --swap-space 16 \
         --disable-log-requests
 
-    (TGI backend)
-    ./launch_tgi_server.sh <your_model> <max_batch_total_tokens>
-
 On the client side, run:
     python benchmarks/benchmark_serving.py \
         --backend <backend> \
diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py
index c79a93fa..71cb420a 100644
--- a/benchmarks/benchmark_serving_structured_output.py
+++ b/benchmarks/benchmark_serving_structured_output.py
@@ -5,9 +5,6 @@ On the server side, run one of the following commands:
     (vLLM OpenAI API server)
     vllm serve <your_model> --disable-log-requests
 
-    (TGI backend)
-    ./launch_tgi_server.sh <your_model> <max_batch_total_tokens>
-
 On the client side, run:
     python benchmarks/benchmark_serving_structured_output.py \
         --backend <backend> \
diff --git a/benchmarks/launch_tgi_server.sh b/benchmarks/launch_tgi_server.sh
deleted file mode 100755
index ba7383d8..00000000
--- a/benchmarks/launch_tgi_server.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-
-PORT=8000
-MODEL=$1
-TOKENS=$2
-
-docker run -e "HF_TOKEN=$HF_TOKEN" --gpus all --shm-size 1g -p $PORT:80 \
-           -v "$PWD/data:/data" \
-           ghcr.io/huggingface/text-generation-inference:2.2.0 \
-           --model-id "$MODEL" \
-           --sharded false  \
-           --max-input-length 1024 \
-           --max-total-tokens 2048 \
-           --max-best-of 5 \
-           --max-concurrent-requests 5000 \
-           --max-batch-total-tokens "$TOKENS"