vllm/.buildkite/run-benchmarks.sh

#!/bin/bash

# This script is run by buildkite to run the benchmarks and upload the results to buildkite

set -ex
set -o pipefail

# cd into parent directory of this file
cd "$(dirname "${BASH_SOURCE[0]}")/.."

(which wget && which curl) || (apt-get update && apt-get install -y wget curl)

# run python-based benchmarks and upload the result to buildkite
python3 benchmarks/benchmark_latency.py --output-json latency_results.json 2>&1 | tee benchmark_latency.txt
bench_latency_exit_code=$?

python3 benchmarks/benchmark_throughput.py --input-len 256 --output-len 256 --output-json throughput_results.json 2>&1 | tee benchmark_throughput.txt
bench_throughput_exit_code=$?

# run server-based benchmarks and upload the result to buildkite
python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat-hf &
server_pid=$!
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

# wait for server to start, timeout after 600 seconds
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
python3 benchmarks/benchmark_serving.py \
    --backend vllm \
    --dataset-name sharegpt \
    --dataset-path ./ShareGPT_V3_unfiltered_cleaned_split.json \
    --model meta-llama/Llama-2-7b-chat-hf \
    --num-prompts 20 \
    --endpoint /v1/completions \
    --tokenizer meta-llama/Llama-2-7b-chat-hf \
    --save-result \
    2>&1 | tee benchmark_serving.txt
bench_serving_exit_code=$?
kill $server_pid

# write the results into a markdown file
echo "### Latency Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_latency.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
sed -n '$p' benchmark_latency.txt >> benchmark_results.md # last line

echo "### Throughput Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_throughput.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
sed -n '$p' benchmark_throughput.txt >> benchmark_results.md # last line

echo "### Serving Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_serving.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
echo '```' >> benchmark_results.md
tail -n 24 benchmark_serving.txt >> benchmark_results.md # last 24 lines
echo '```' >> benchmark_results.md

# if the agent binary is not found, skip uploading the results, exit 0
if [ ! -f /usr/bin/buildkite-agent ]; then
    exit 0
fi

# upload the results to buildkite
buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md

# exit with the exit code of the benchmarks
if [ $bench_latency_exit_code -ne 0 ]; then
    exit $bench_latency_exit_code
fi

if [ $bench_throughput_exit_code -ne 0 ]; then
    exit $bench_throughput_exit_code
fi

if [ $bench_serving_exit_code -ne 0 ]; then
    exit $bench_serving_exit_code
fi

rm ShareGPT_V3_unfiltered_cleaned_split.json
buildkite-agent artifact upload "*.json"
[CI/Build] Add shell script linting using shellcheck (#7925) Signed-off-by: Russell Bryant <rbryant@redhat.com> 2024-11-07 13:17:29 -05:00			`#!/bin/bash`

[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00			`# This script is run by buildkite to run the benchmarks and upload the results to buildkite`

			`set -ex`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00			`set -o pipefail`
[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00
			`# cd into parent directory of this file`
			`cd "$(dirname "${BASH_SOURCE[0]}")/.."`

Serving Benchmark Refactoring (#2433) 2024-02-12 22:53:00 -08:00			`(which wget && which curl) \|\| (apt-get update && apt-get install -y wget curl)`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00
Serving Benchmark Refactoring (#2433) 2024-02-12 22:53:00 -08:00			`# run python-based benchmarks and upload the result to buildkite`
Add JSON output support for benchmark_latency and benchmark_throughput (#4848) 2024-05-16 10:02:56 -07:00			`python3 benchmarks/benchmark_latency.py --output-json latency_results.json 2>&1 \| tee benchmark_latency.txt`
CI: make sure benchmark script exit on error (#2449) 2024-01-16 09:50:13 -08:00			`bench_latency_exit_code=$?`
[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00
Add JSON output support for benchmark_latency and benchmark_throughput (#4848) 2024-05-16 10:02:56 -07:00			`python3 benchmarks/benchmark_throughput.py --input-len 256 --output-len 256 --output-json throughput_results.json 2>&1 \| tee benchmark_throughput.txt`
CI: make sure benchmark script exit on error (#2449) 2024-01-16 09:50:13 -08:00			`bench_throughput_exit_code=$?`
[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00
Serving Benchmark Refactoring (#2433) 2024-02-12 22:53:00 -08:00			`# run server-based benchmarks and upload the result to buildkite`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00			`python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat-hf &`
			`server_pid=$!`
			`wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json`

			`# wait for server to start, timeout after 600 seconds`
			`timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' \|\| exit 1`
			`python3 benchmarks/benchmark_serving.py \`
feat(benchmarks): Add Prefix Caching Benchmark to Serving Benchmark (#3277) 2024-03-27 13:39:26 -07:00			`--backend vllm \`
			`--dataset-name sharegpt \`
			`--dataset-path ./ShareGPT_V3_unfiltered_cleaned_split.json \`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00			`--model meta-llama/Llama-2-7b-chat-hf \`
			`--num-prompts 20 \`
			`--endpoint /v1/completions \`
Serving Benchmark Refactoring (#2433) 2024-02-12 22:53:00 -08:00			`--tokenizer meta-llama/Llama-2-7b-chat-hf \`
			`--save-result \`
			`2>&1 \| tee benchmark_serving.txt`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00			`bench_serving_exit_code=$?`
			`kill $server_pid`

[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00			`# write the results into a markdown file`
			`echo "### Latency Benchmarks" >> benchmark_results.md`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00			`sed -n '1p' benchmark_latency.txt >> benchmark_results.md # first line`
[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00			`echo "" >> benchmark_results.md`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00			`sed -n '$p' benchmark_latency.txt >> benchmark_results.md # last line`

[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00			`echo "### Throughput Benchmarks" >> benchmark_results.md`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00			`sed -n '1p' benchmark_throughput.txt >> benchmark_results.md # first line`
[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00			`echo "" >> benchmark_results.md`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00			`sed -n '$p' benchmark_throughput.txt >> benchmark_results.md # last line`

			`echo "### Serving Benchmarks" >> benchmark_results.md`
			`sed -n '1p' benchmark_serving.txt >> benchmark_results.md # first line`
			`echo "" >> benchmark_results.md`
fix benchmark format reporting in buildkite (#3693) 2024-03-28 14:35:16 -07:00			echo '```' >> benchmark_results.md
[misc] benchmark_serving.py -- add ITL results and tweak TPOT results (#5263) 2024-06-05 13:17:51 -04:00			`tail -n 24 benchmark_serving.txt >> benchmark_results.md # last 24 lines`
fix benchmark format reporting in buildkite (#3693) 2024-03-28 14:35:16 -07:00			echo '```' >> benchmark_results.md
[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00
[CI/Build] AMD CI pipeline with extended set of tests. (#4267) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-05-02 14:29:07 -05:00			`# if the agent binary is not found, skip uploading the results, exit 0`
[ci] Fix Buildkite agent path (#5392) Signed-off-by: kevin <kevin@anyscale.com> 2024-06-10 18:58:07 -07:00			`if [ ! -f /usr/bin/buildkite-agent ]; then`
[CI/Build] AMD CI pipeline with extended set of tests. (#4267) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-05-02 14:29:07 -05:00			`exit 0`
			`fi`

[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00			`# upload the results to buildkite`
[ci] Mount buildkite agent on Docker container to upload benchmark results (#5330) Signed-off-by: kevin <kevin@anyscale.com> 2024-06-10 09:22:34 -07:00			`buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md`
CI: make sure benchmark script exit on error (#2449) 2024-01-16 09:50:13 -08:00
			`# exit with the exit code of the benchmarks`
			`if [ $bench_latency_exit_code -ne 0 ]; then`
			`exit $bench_latency_exit_code`
			`fi`

			`if [ $bench_throughput_exit_code -ne 0 ]; then`
			`exit $bench_throughput_exit_code`
			`fi`
Add benchmark serving to CI (#2505) 2024-01-19 20:20:19 -08:00
			`if [ $bench_serving_exit_code -ne 0 ]; then`
			`exit $bench_serving_exit_code`
			`fi`
Serving Benchmark Refactoring (#2433) 2024-02-12 22:53:00 -08:00
Add JSON output support for benchmark_latency and benchmark_throughput (#4848) 2024-05-16 10:02:56 -07:00			`rm ShareGPT_V3_unfiltered_cleaned_split.json`
[ci] Mount buildkite agent on Docker container to upload benchmark results (#5330) Signed-off-by: kevin <kevin@anyscale.com> 2024-06-10 09:22:34 -07:00			`buildkite-agent artifact upload "*.json"`