-
Notifications
You must be signed in to change notification settings - Fork 110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add flux example #1126
base: main
Are you sure you want to change the base?
add flux example #1126
Changes from 7 commits
c85f228
084f13e
e52a7da
3e9336b
11a183b
6da355b
bb3ba7a
569a6c5
898ea57
d2932fc
b13521c
9319cca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
# indicate which model to run | ||
# e.g. ./run_benchmark.sh sd15,sd21,sdxl or ./run_benchmark.sh all | ||
run_model=$1 | ||
|
||
export NEXFORT_GRAPH_CACHE=1 | ||
export NEXFORT_FX_FORCE_TRITON_SDPA=1 | ||
|
||
|
||
# model path | ||
model_dir="/data1/hf_model" | ||
sd15_path="${model_dir}/stable-diffusion-v1-5" | ||
sd21_path="${model_dir}/stable-diffusion-2-1" | ||
sdxl_path="${model_dir}/stable-diffusion-xl-base-1.0" | ||
sd3_path="/data1/home/zhangxu/stable-diffusion-3-medium-diffusers" | ||
flux_dev_path="${model_dir}/FLUX.1-dev/snapshots/0ef5fff789c832c5c7f4e127f94c8b54bbcced44" | ||
flux_schell_path="${model_dir}/FLUX.1-schnell" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Avoid hardcoding paths and add validation. The script uses hardcoded paths which makes it less portable and could fail silently if models aren't present. Consider:
-model_dir="/data1/hf_model"
+MODEL_DIR="${HF_MODEL_DIR:-/data1/hf_model}"
+
+validate_model_path() {
+ if [ ! -d "$1" ]; then
+ echo "Error: Model path not found: $1"
+ exit 1
+ fi
+}
+
+sd15_path="${MODEL_DIR}/stable-diffusion-v1-5"
+validate_model_path "${sd15_path}"
|
||
# get current time | ||
current_time=$(date +"%Y-%m-%d") | ||
echo "Current time: ${current_time}" | ||
|
||
# get NVIDIA GPU name | ||
gpu_name=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits | head -n 1 | sed 's/NVIDIA //; s/ /_/g') | ||
|
||
XuZhang99 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# table header | ||
BENCHMARK_RESULT_TEXT="| Data update date (yyyy-mm-dd) | GPU | Model | HxW | Compiler | Quantization | Iteration speed (it/s) | E2E Time (s) | Max used CUDA memory (GiB) | Warmup time (s) |\n| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |\n" | ||
|
||
prompt="beautiful scenery nature glass bottle landscape, purple galaxy bottle" | ||
quantize_config='{"quant_type": "fp8_e4m3_e4m3_dynamic_per_tensor"}' | ||
|
||
# oneflow 没有compiler_config | ||
#sd15_nexfort_compiler_config="" | ||
#sd21_nexfort_compiler_config="" | ||
#sdxl_nexfort_compiler_config="" | ||
sd3_nexfort_compiler_config='{"mode": "max-optimize:max-autotune:low-precision:cache-all", "memory_format": "channels_last"}' | ||
flux_nexfort_compiler_config='{"mode": "max-optimize:max-autotune:low-precision", "memory_format": "channels_last"}' | ||
|
||
benchmark_model_with_one_resolution() { | ||
model_name=$1 | ||
model_path=$2 | ||
steps=$3 | ||
compiler=$4 | ||
compiler_config=$5 | ||
height=$6 | ||
width=$7 | ||
quantize=$8 | ||
|
||
echo "Running ${model_path} ${height}x${width}..." | ||
|
||
if [[ "${model_name}" =~ sd3 ]]; then | ||
script_path="onediff_diffusers_extensions/examples/sd3/text_to_image_sd3.py" | ||
elif [[ "${model_name}" =~ flux ]]; then | ||
script_path="onediff_diffusers_extensions/examples/flux/text_to_image_flux.py" | ||
else | ||
script_path="benchmarks/text_to_image.py" | ||
fi | ||
|
||
if [[ ${quantize} == True ]]; then | ||
script_output=$(python3 ${script_path} \ | ||
--model ${model_path} --variant fp16 --steps ${steps} \ | ||
--height ${height} --width ${width} --seed 1 \ | ||
--compiler ${compiler} --compiler-config "${compiler_config}" \ | ||
--quantize --quantize-config "${quantize_config}" \ | ||
--prompt "${prompt}" --print-output | tee /dev/tty) | ||
else | ||
script_output=$(python3 ${script_path} \ | ||
--model ${model_path} --variant fp16 --steps ${steps} \ | ||
--height ${height} --width ${width} --seed 1 \ | ||
--compiler ${compiler} --compiler-config "${compiler_config}" \ | ||
--prompt "${prompt}" --print-output | tee /dev/tty) | ||
fi | ||
|
||
inference_time=$(echo "${script_output}" | grep -oP '(?<=Inference time: )\d+\.\d+') | ||
iterations_per_second=$(echo "${script_output}" | grep -oP '(?<=Iterations per second: )\d+\.\d+') | ||
max_used_cuda_memory=$(echo "${script_output}" | grep -oP '(?<=Max used CUDA memory : )\d+\.\d+') | ||
warmup_time=$(echo "${script_output}" | grep -oP '(?<=Warmup time: )\d+\.\d+') | ||
|
||
BENCHMARK_RESULT_TEXT="${BENCHMARK_RESULT_TEXT}| "${current_time}" | "${gpu_name}" | "${model_name}" | ${height}x${width} | ${compiler} | ${quantize} | ${iterations_per_second} | ${inference_time} | ${max_used_cuda_memory} | ${warmup_time} |\n" | ||
XuZhang99 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Enhance the benchmark function's robustness and output handling. The function has several areas for improvement:
Suggested improvements: benchmark_model_with_one_resolution() {
+ # Validate input parameters
+ [[ -z "$1" ]] && { echo "Error: model_name is required"; return 1; }
+ # ... validate other parameters ...
+
model_name=$1
model_path=$2
steps=$3
compiler=$4
compiler_config=$5
height=$6
width=$7
quantize=$8
+ # Capture command output and exit status
+ set +e
if [[ ${quantize} == True ]]; then
script_output=$(python3 ${script_path} \
--model ${model_path} --variant fp16 --steps ${steps} \
--height ${height} --width ${width} --seed 1 \
--compiler ${compiler} --compiler-config "${compiler_config}" \
--quantize --quantize-config "${quantize_config}" \
--prompt "${prompt}" --print-output | tee /dev/tty)
+ exit_status=$?
else
# ... similar for non-quantize case ...
fi
+ set -e
+
+ # Check for execution errors
+ if [ $exit_status -ne 0 ]; then
+ echo "Error: Benchmark failed for ${model_name}"
+ return 1
+ fi
# Extract metrics with error checking
- inference_time=$(echo "${script_output}" | grep -oP '(?<=Inference time: )\d+\.\d+')
+ inference_time=$(echo "${script_output}" | grep -oP '(?<=Inference time: )\d+\.\d+' || echo "N/A")
# ... similar for other metrics ...
# Fix quote handling in result string
- BENCHMARK_RESULT_TEXT="${BENCHMARK_RESULT_TEXT}| "${current_time}" | "${gpu_name}" | "${model_name}" | ${height}x${width} | ${compiler} | ${quantize} | ${iterations_per_second} | ${inference_time} | ${max_used_cuda_memory} | ${warmup_time} |\n"
+ BENCHMARK_RESULT_TEXT="${BENCHMARK_RESULT_TEXT}| ${current_time} | ${gpu_name} | ${model_name} | ${height}x${width} | ${compiler} | ${quantize} | ${iterations_per_second} | ${inference_time} | ${max_used_cuda_memory} | ${warmup_time} |\n"
}
🧰 Tools🪛 Shellcheck
|
||
|
||
# conda init | ||
source ~/miniconda3/etc/profile.d/conda.sh | ||
|
||
XuZhang99 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
######################################### | ||
if [[ "${run_model}" =~ sd15|all ]]; then | ||
conda activate oneflow | ||
benchmark_model_with_one_resolution sd15 ${sd15_path} 30 none none 512 512 False | ||
benchmark_model_with_one_resolution sd15 ${sd15_path} 30 oneflow none 512 512 False | ||
benchmark_model_with_one_resolution sd15 ${sd15_path} 30 oneflow none 512 512 True | ||
fi | ||
|
||
if [[ "${run_model}" =~ sd21|all ]]; then | ||
conda activate oneflow | ||
benchmark_model_with_one_resolution sd21 ${sd21_path} 20 none none 768 768 False | ||
benchmark_model_with_one_resolution sd21 ${sd21_path} 20 oneflow none 768 768 False | ||
benchmark_model_with_one_resolution sd21 ${sd21_path} 20 oneflow none 768 768 True | ||
fi | ||
|
||
if [[ "${run_model}" =~ sdxl|all ]]; then | ||
conda activate oneflow | ||
benchmark_model_with_one_resolution sdxl ${sdxl_path} 30 none none 1024 1024 False | ||
benchmark_model_with_one_resolution sdxl ${sdxl_path} 30 oneflow none 1024 1024 False | ||
benchmark_model_with_one_resolution sdxl ${sdxl_path} 30 oneflow none 1024 1024 True | ||
fi | ||
######################################### | ||
|
||
######################################### | ||
if [[ "${run_model}" =~ sd3|all ]]; then | ||
conda activate nexfort | ||
benchmark_model_with_one_resolution sd3 ${sd3_path} 28 none none 1024 1024 False | ||
benchmark_model_with_one_resolution sd3 ${sd3_path} 28 nexfort "${sd3_nexfort_compiler_config}" 1024 1024 False | ||
benchmark_model_with_one_resolution sd3 ${sd3_path} 28 nexfort "${sd3_nexfort_compiler_config}" 1024 1024 True | ||
fi | ||
|
||
|
||
if [[ "${run_model}" =~ flux|all ]]; then | ||
conda activate nexfort | ||
benchmark_model_with_one_resolution flux_dev ${flux_dev_path} 20 none none 1024 1024 False | ||
benchmark_model_with_one_resolution flux_dev ${flux_dev_path} 20 nexfort "${flux_nexfort_compiler_config}" 1024 1024 False | ||
benchmark_model_with_one_resolution flux_dev ${flux_dev_path} 20 nexfort "${flux_nexfort_compiler_config}" 1024 1024 True | ||
benchmark_model_with_one_resolution flux_dev ${flux_dev_path} 20 transform none 1024 1024 False | ||
|
||
|
||
benchmark_model_with_one_resolution flux_schell ${flux_schell_path} 4 none none 1024 1024 False | ||
benchmark_model_with_one_resolution flux_schell ${flux_schell_path} 4 nexfort "${flux_nexfort_compiler_config}" 1024 1024 False | ||
benchmark_model_with_one_resolution flux_schell ${flux_schell_path} 4 nexfort "${flux_nexfort_compiler_config}" 1024 1024 True | ||
benchmark_model_with_one_resolution flux_schell ${flux_schell_path} 4 transform none 1024 1024 False | ||
fi | ||
######################################### | ||
|
||
|
||
echo -e "\nBenchmark Results:" | ||
echo -e ${BENCHMARK_RESULT_TEXT} | tee -a benchmark_result_"${gpu_name}".md | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Consider adding parallel execution support for faster benchmarking. The script runs benchmarks sequentially, which could be time-consuming when benchmarking multiple models. Consider adding support for parallel execution using GNU Parallel: +# Function to check if GNU Parallel is installed
+check_parallel() {
+ if ! command -v parallel &> /dev/null; then
+ echo "GNU Parallel not found. Running benchmarks sequentially."
+ return 1
+ fi
+ return 0
+}
+
+# Function to generate benchmark commands
+generate_benchmark_commands() {
+ local model=$1
+ case $model in
+ sd15)
+ echo "benchmark_model_with_one_resolution sd15 ${sd15_path} 30 none none 512 512 False"
+ echo "benchmark_model_with_one_resolution sd15 ${sd15_path} 30 oneflow none 512 512 False"
+ # ... other commands ...
+ ;;
+ # ... other models ...
+ esac
+}
+
+# Main execution
+if check_parallel; then
+ # Generate all commands and run in parallel
+ for model in ${run_model//,/ }; do
+ generate_benchmark_commands "$model"
+ done | parallel --will-cite
+else
+ # Existing sequential execution
+ if [[ "${run_model}" =~ sd15|all ]]; then
+ # ... existing code ...
+ fi
+ # ... other models ...
+fi
🧰 Tools🪛 Shellcheck
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add error handling for required command-line argument.
The script should validate that the model argument is provided and show usage information if missing.
Add this at the beginning of the script:
📝 Committable suggestion