2024-11-07 13:17:29 -05:00
|
|
|
#!/bin/bash
|
|
|
|
|
2024-01-14 12:37:58 -08:00
|
|
|
# This script is run by buildkite to run the benchmarks and upload the results to buildkite
|
|
|
|
|
|
|
|
set -ex
|
2024-01-19 20:20:19 -08:00
|
|
|
set -o pipefail
|
2024-01-14 12:37:58 -08:00
|
|
|
|
|
|
|
# cd into parent directory of this file
|
|
|
|
cd "$(dirname "${BASH_SOURCE[0]}")/.."
|
|
|
|
|
2024-02-12 22:53:00 -08:00
|
|
|
(which wget && which curl) || (apt-get update && apt-get install -y wget curl)
|
2024-01-19 20:20:19 -08:00
|
|
|
|
2024-02-12 22:53:00 -08:00
|
|
|
# run python-based benchmarks and upload the result to buildkite
|
2024-05-16 10:02:56 -07:00
|
|
|
python3 benchmarks/benchmark_latency.py --output-json latency_results.json 2>&1 | tee benchmark_latency.txt
|
2024-01-16 09:50:13 -08:00
|
|
|
bench_latency_exit_code=$?
|
2024-01-14 12:37:58 -08:00
|
|
|
|
2024-05-16 10:02:56 -07:00
|
|
|
python3 benchmarks/benchmark_throughput.py --input-len 256 --output-len 256 --output-json throughput_results.json 2>&1 | tee benchmark_throughput.txt
|
2024-01-16 09:50:13 -08:00
|
|
|
bench_throughput_exit_code=$?
|
2024-01-14 12:37:58 -08:00
|
|
|
|
2024-02-12 22:53:00 -08:00
|
|
|
# run server-based benchmarks and upload the result to buildkite
|
2024-01-19 20:20:19 -08:00
|
|
|
python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat-hf &
|
|
|
|
server_pid=$!
|
|
|
|
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
|
|
|
|
|
|
|
# wait for server to start, timeout after 600 seconds
|
|
|
|
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
|
|
|
|
python3 benchmarks/benchmark_serving.py \
|
2024-03-27 13:39:26 -07:00
|
|
|
--backend vllm \
|
|
|
|
--dataset-name sharegpt \
|
|
|
|
--dataset-path ./ShareGPT_V3_unfiltered_cleaned_split.json \
|
2024-01-19 20:20:19 -08:00
|
|
|
--model meta-llama/Llama-2-7b-chat-hf \
|
|
|
|
--num-prompts 20 \
|
|
|
|
--endpoint /v1/completions \
|
2024-02-12 22:53:00 -08:00
|
|
|
--tokenizer meta-llama/Llama-2-7b-chat-hf \
|
|
|
|
--save-result \
|
|
|
|
2>&1 | tee benchmark_serving.txt
|
2024-01-19 20:20:19 -08:00
|
|
|
bench_serving_exit_code=$?
|
|
|
|
kill $server_pid
|
|
|
|
|
2024-01-14 12:37:58 -08:00
|
|
|
# write the results into a markdown file
|
|
|
|
echo "### Latency Benchmarks" >> benchmark_results.md
|
2024-01-19 20:20:19 -08:00
|
|
|
sed -n '1p' benchmark_latency.txt >> benchmark_results.md # first line
|
2024-01-14 12:37:58 -08:00
|
|
|
echo "" >> benchmark_results.md
|
2024-01-19 20:20:19 -08:00
|
|
|
sed -n '$p' benchmark_latency.txt >> benchmark_results.md # last line
|
|
|
|
|
2024-01-14 12:37:58 -08:00
|
|
|
echo "### Throughput Benchmarks" >> benchmark_results.md
|
2024-01-19 20:20:19 -08:00
|
|
|
sed -n '1p' benchmark_throughput.txt >> benchmark_results.md # first line
|
2024-01-14 12:37:58 -08:00
|
|
|
echo "" >> benchmark_results.md
|
2024-01-19 20:20:19 -08:00
|
|
|
sed -n '$p' benchmark_throughput.txt >> benchmark_results.md # last line
|
|
|
|
|
|
|
|
echo "### Serving Benchmarks" >> benchmark_results.md
|
|
|
|
sed -n '1p' benchmark_serving.txt >> benchmark_results.md # first line
|
|
|
|
echo "" >> benchmark_results.md
|
2024-03-28 14:35:16 -07:00
|
|
|
echo '```' >> benchmark_results.md
|
2024-06-05 13:17:51 -04:00
|
|
|
tail -n 24 benchmark_serving.txt >> benchmark_results.md # last 24 lines
|
2024-03-28 14:35:16 -07:00
|
|
|
echo '```' >> benchmark_results.md
|
2024-01-14 12:37:58 -08:00
|
|
|
|
2024-05-02 14:29:07 -05:00
|
|
|
# if the agent binary is not found, skip uploading the results, exit 0
|
2024-06-10 18:58:07 -07:00
|
|
|
if [ ! -f /usr/bin/buildkite-agent ]; then
|
2024-05-02 14:29:07 -05:00
|
|
|
exit 0
|
|
|
|
fi
|
|
|
|
|
2024-01-14 12:37:58 -08:00
|
|
|
# upload the results to buildkite
|
2024-06-10 09:22:34 -07:00
|
|
|
buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md
|
2024-01-16 09:50:13 -08:00
|
|
|
|
|
|
|
# exit with the exit code of the benchmarks
|
|
|
|
if [ $bench_latency_exit_code -ne 0 ]; then
|
|
|
|
exit $bench_latency_exit_code
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ $bench_throughput_exit_code -ne 0 ]; then
|
|
|
|
exit $bench_throughput_exit_code
|
|
|
|
fi
|
2024-01-19 20:20:19 -08:00
|
|
|
|
|
|
|
if [ $bench_serving_exit_code -ne 0 ]; then
|
|
|
|
exit $bench_serving_exit_code
|
|
|
|
fi
|
2024-02-12 22:53:00 -08:00
|
|
|
|
2024-05-16 10:02:56 -07:00
|
|
|
rm ShareGPT_V3_unfiltered_cleaned_split.json
|
2024-06-10 09:22:34 -07:00
|
|
|
buildkite-agent artifact upload "*.json"
|