| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389 |
- 2026-05-15 08:41:06 - evalscope - INFO: Starting benchmark with args:
- 2026-05-15 08:41:06 - evalscope - INFO: {
- "model": "Qwen3.6-27B-W8A8",
- "model_id": "Qwen3.6-27B-W8A8",
- "attn_implementation": null,
- "api": "openai",
- "tokenizer_path": "/opt/lq/models/Qwen3.6-27B-W8A8",
- "port": 8877,
- "url": "http://127.0.0.1:8004/v1/chat/completions",
- "headers": {
- "Authorization": "Bearer sk-123456"
- },
- "connect_timeout": null,
- "read_timeout": null,
- "total_timeout": 21600,
- "api_key": "sk-123456",
- "no_test_connection": false,
- "number": 1,
- "parallel": 1,
- "rate": -1,
- "sleep_interval": 5,
- "sla_auto_tune": false,
- "sla_variable": "parallel",
- "sla_params": null,
- "sla_num_runs": 3,
- "sla_upper_bound": 65536,
- "sla_lower_bound": 1,
- "db_commit_interval": 1000,
- "queue_size_multiplier": 5,
- "in_flight_task_multiplier": 2,
- "log_every_n_query": 10,
- "debug": false,
- "visualizer": null,
- "wandb_api_key": null,
- "swanlab_api_key": null,
- "name": null,
- "outputs_dir": "outputs/20260515_084106/Qwen3.6-27B-W8A8",
- "no_timestamp": false,
- "max_prompt_length": 2048,
- "min_prompt_length": 2048,
- "prefix_length": 0,
- "prompt": null,
- "query_template": null,
- "apply_chat_template": true,
- "image_width": 224,
- "image_height": 224,
- "image_format": "RGB",
- "image_num": 1,
- "image_patch_size": 28,
- "dataset": "random",
- "dataset_path": null,
- "frequency_penalty": null,
- "repetition_penalty": null,
- "logprobs": null,
- "max_tokens": 128,
- "min_tokens": 128,
- "n_choices": null,
- "seed": null,
- "stop": null,
- "stop_token_ids": null,
- "stream": true,
- "temperature": 0.0,
- "top_p": null,
- "top_k": null,
- "extra_args": {}
- }
- 2026-05-15 08:41:22 - evalscope - INFO: Test connection successful.
- 2026-05-15 08:41:25 - evalscope - INFO: Using 248044 allowed tokens out of 248044 total tokens
- 2026-05-15 08:41:25 - evalscope - INFO: Sampling input lengths from [2046, 2047)
- 2026-05-15 08:41:26 - evalscope - INFO: Save the data base to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_1_number_1/benchmark_data.db
- 2026-05-15 08:41:33 - evalscope - INFO: Processing 100%| 1/1 [Elapsed: 00:07 < Remaining: 00:00, 7.37s/it]
- 2026-05-15 08:41:33 - evalscope - INFO:
- Benchmarking summary:
- +-----------------------------------+-----------+
- | Key | Value |
- +===================================+===========+
- | Time taken for tests (s) | 7.3689 |
- +-----------------------------------+-----------+
- | Number of concurrency | 1 |
- +-----------------------------------+-----------+
- | Request rate (req/s) | -1 |
- +-----------------------------------+-----------+
- | Total requests | 1 |
- +-----------------------------------+-----------+
- | Succeed requests | 1 |
- +-----------------------------------+-----------+
- | Failed requests | 0 |
- +-----------------------------------+-----------+
- | Output token throughput (tok/s) | 17.3704 |
- +-----------------------------------+-----------+
- | Total token throughput (tok/s) | 296.383 |
- +-----------------------------------+-----------+
- | Request throughput (req/s) | 0.1357 |
- +-----------------------------------+-----------+
- | Average latency (s) | 7.3689 |
- +-----------------------------------+-----------+
- | Average time to first token (s) | 0.8157 |
- +-----------------------------------+-----------+
- | Average time per output token (s) | 0.0516 |
- +-----------------------------------+-----------+
- | Average inter-token latency (s) | 0.0512 |
- +-----------------------------------+-----------+
- | Average input tokens per request | 2056 |
- +-----------------------------------+-----------+
- | Average output tokens per request | 128 |
- +-----------------------------------+-----------+
- 2026-05-15 08:41:33 - evalscope - INFO:
- Percentile results:
- +-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
- | Percentiles | TTFT (s) | ITL (s) | TPOT (s) | Latency (s) | Input tokens | Output tokens | Output (tok/s) | Total (tok/s) |
- +-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
- | 10% | 0.8157 | 0.0514 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- | 25% | 0.8157 | 0.0515 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- | 50% | 0.8157 | 0.0516 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- | 66% | 0.8157 | 0.0517 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- | 75% | 0.8157 | 0.0518 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- | 80% | 0.8157 | 0.0518 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- | 90% | 0.8157 | 0.0519 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- | 95% | 0.8157 | 0.052 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- | 98% | 0.8157 | 0.0523 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- | 99% | 0.8157 | 0.0523 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
- +-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
- 2026-05-15 08:41:33 - evalscope - INFO: Save the summary to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_1_number_1
- 2026-05-15 08:41:33 - evalscope - INFO: Sleeping for 5 seconds before the next run...
- 2026-05-15 08:41:38 - evalscope - INFO: Starting benchmark with args:
- 2026-05-15 08:41:38 - evalscope - INFO: {
- "model": "Qwen3.6-27B-W8A8",
- "model_id": "Qwen3.6-27B-W8A8",
- "attn_implementation": null,
- "api": "openai",
- "tokenizer_path": "/opt/lq/models/Qwen3.6-27B-W8A8",
- "port": 8877,
- "url": "http://127.0.0.1:8004/v1/chat/completions",
- "headers": {
- "Authorization": "Bearer sk-123456"
- },
- "connect_timeout": null,
- "read_timeout": null,
- "total_timeout": 21600,
- "api_key": "sk-123456",
- "no_test_connection": false,
- "number": 5,
- "parallel": 5,
- "rate": -1,
- "sleep_interval": 5,
- "sla_auto_tune": false,
- "sla_variable": "parallel",
- "sla_params": null,
- "sla_num_runs": 3,
- "sla_upper_bound": 65536,
- "sla_lower_bound": 1,
- "db_commit_interval": 1000,
- "queue_size_multiplier": 5,
- "in_flight_task_multiplier": 2,
- "log_every_n_query": 10,
- "debug": false,
- "visualizer": null,
- "wandb_api_key": null,
- "swanlab_api_key": null,
- "name": null,
- "outputs_dir": "outputs/20260515_084106/Qwen3.6-27B-W8A8",
- "no_timestamp": false,
- "max_prompt_length": 2048,
- "min_prompt_length": 2048,
- "prefix_length": 0,
- "prompt": null,
- "query_template": null,
- "apply_chat_template": true,
- "image_width": 224,
- "image_height": 224,
- "image_format": "RGB",
- "image_num": 1,
- "image_patch_size": 28,
- "dataset": "random",
- "dataset_path": null,
- "frequency_penalty": null,
- "repetition_penalty": null,
- "logprobs": null,
- "max_tokens": 128,
- "min_tokens": 128,
- "n_choices": null,
- "seed": null,
- "stop": null,
- "stop_token_ids": null,
- "stream": true,
- "temperature": 0.0,
- "top_p": null,
- "top_k": null,
- "extra_args": {}
- }
- 2026-05-15 08:41:48 - evalscope - INFO: Test connection successful.
- 2026-05-15 08:41:51 - evalscope - INFO: Using 248044 allowed tokens out of 248044 total tokens
- 2026-05-15 08:41:51 - evalscope - INFO: Sampling input lengths from [2046, 2047)
- 2026-05-15 08:41:51 - evalscope - INFO: Save the data base to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_5_number_5/benchmark_data.db
- 2026-05-15 08:41:58 - evalscope - INFO: Processing 100%| 5/5 [Elapsed: 00:06 < Remaining: 00:00, 1.01it/s]
- 2026-05-15 08:41:58 - evalscope - INFO:
- Benchmarking summary:
- +-----------------------------------+-----------+
- | Key | Value |
- +===================================+===========+
- | Time taken for tests (s) | 6.6303 |
- +-----------------------------------+-----------+
- | Number of concurrency | 5 |
- +-----------------------------------+-----------+
- | Request rate (req/s) | -1 |
- +-----------------------------------+-----------+
- | Total requests | 5 |
- +-----------------------------------+-----------+
- | Succeed requests | 5 |
- +-----------------------------------+-----------+
- | Failed requests | 0 |
- +-----------------------------------+-----------+
- | Output token throughput (tok/s) | 96.5268 |
- +-----------------------------------+-----------+
- | Total token throughput (tok/s) | 1647.29 |
- +-----------------------------------+-----------+
- | Request throughput (req/s) | 0.7541 |
- +-----------------------------------+-----------+
- | Average latency (s) | 6.5697 |
- +-----------------------------------+-----------+
- | Average time to first token (s) | 2.1216 |
- +-----------------------------------+-----------+
- | Average time per output token (s) | 0.035 |
- +-----------------------------------+-----------+
- | Average inter-token latency (s) | 0.0348 |
- +-----------------------------------+-----------+
- | Average input tokens per request | 2056.4 |
- +-----------------------------------+-----------+
- | Average output tokens per request | 128 |
- +-----------------------------------+-----------+
- 2026-05-15 08:41:58 - evalscope - INFO:
- Percentile results:
- +-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
- | Percentiles | TTFT (s) | ITL (s) | TPOT (s) | Latency (s) | Input tokens | Output tokens | Output (tok/s) | Total (tok/s) |
- +-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
- | 10% | 0.6655 | 0.0295 | 0.0299 | 6.5201 | 2056 | 128 | 19.3128 | 329.6753 |
- | 25% | 1.6872 | 0.0296 | 0.0304 | 6.5483 | 2056 | 128 | 19.4617 | 332.0644 |
- | 50% | 2.7127 | 0.0297 | 0.0304 | 6.5751 | 2056 | 128 | 19.4674 | 332.1626 |
- | 66% | 2.7127 | 0.0298 | 0.0383 | 6.577 | 2057 | 128 | 19.5469 | 333.6721 |
- | 75% | 2.7127 | 0.0299 | 0.0383 | 6.577 | 2057 | 128 | 19.5469 | 333.6721 |
- | 80% | 2.8297 | 0.0299 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
- | 90% | 2.8297 | 0.0301 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
- | 95% | 2.8297 | 0.0304 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
- | 98% | 2.8297 | 0.031 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
- | 99% | 2.8297 | 0.1171 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
- +-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
- 2026-05-15 08:41:58 - evalscope - INFO: Save the summary to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_5_number_5
- 2026-05-15 08:41:58 - evalscope - INFO: Sleeping for 5 seconds before the next run...
- 2026-05-15 08:42:03 - evalscope - INFO: Starting benchmark with args:
- 2026-05-15 08:42:03 - evalscope - INFO: {
- "model": "Qwen3.6-27B-W8A8",
- "model_id": "Qwen3.6-27B-W8A8",
- "attn_implementation": null,
- "api": "openai",
- "tokenizer_path": "/opt/lq/models/Qwen3.6-27B-W8A8",
- "port": 8877,
- "url": "http://127.0.0.1:8004/v1/chat/completions",
- "headers": {
- "Authorization": "Bearer sk-123456"
- },
- "connect_timeout": null,
- "read_timeout": null,
- "total_timeout": 21600,
- "api_key": "sk-123456",
- "no_test_connection": false,
- "number": 10,
- "parallel": 10,
- "rate": -1,
- "sleep_interval": 5,
- "sla_auto_tune": false,
- "sla_variable": "parallel",
- "sla_params": null,
- "sla_num_runs": 3,
- "sla_upper_bound": 65536,
- "sla_lower_bound": 1,
- "db_commit_interval": 1000,
- "queue_size_multiplier": 5,
- "in_flight_task_multiplier": 2,
- "log_every_n_query": 10,
- "debug": false,
- "visualizer": null,
- "wandb_api_key": null,
- "swanlab_api_key": null,
- "name": null,
- "outputs_dir": "outputs/20260515_084106/Qwen3.6-27B-W8A8",
- "no_timestamp": false,
- "max_prompt_length": 2048,
- "min_prompt_length": 2048,
- "prefix_length": 0,
- "prompt": null,
- "query_template": null,
- "apply_chat_template": true,
- "image_width": 224,
- "image_height": 224,
- "image_format": "RGB",
- "image_num": 1,
- "image_patch_size": 28,
- "dataset": "random",
- "dataset_path": null,
- "frequency_penalty": null,
- "repetition_penalty": null,
- "logprobs": null,
- "max_tokens": 128,
- "min_tokens": 128,
- "n_choices": null,
- "seed": null,
- "stop": null,
- "stop_token_ids": null,
- "stream": true,
- "temperature": 0.0,
- "top_p": null,
- "top_k": null,
- "extra_args": {}
- }
- 2026-05-15 08:42:12 - evalscope - INFO: Test connection successful.
- 2026-05-15 08:42:16 - evalscope - INFO: Using 248044 allowed tokens out of 248044 total tokens
- 2026-05-15 08:42:16 - evalscope - INFO: Sampling input lengths from [2046, 2047)
- 2026-05-15 08:42:16 - evalscope - INFO: Save the data base to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_10_number_10/benchmark_data.db
- 2026-05-15 08:42:26 - evalscope - INFO: {
- "Time taken for tests (s)": 9.6293,
- "Number of concurrency": 10,
- "Request rate (req/s)": -1,
- "Total requests": 10,
- "Succeed requests": 10,
- "Failed requests": 0,
- "Output token throughput (tok/s)": 132.928,
- "Total token throughput (tok/s)": 2268.4997,
- "Request throughput (req/s)": 1.0385,
- "Average latency (s)": 9.567,
- "Average time to first token (s)": 3.6071,
- "Average time per output token (s)": 0.0469,
- "Average inter-token latency (s)": 0.0466,
- "Average input tokens per request": 2056.4,
- "Average output tokens per request": 128.0
- }
- 2026-05-15 08:42:26 - evalscope - INFO: Processing 100%| 10/10 [Elapsed: 00:09 < Remaining: 00:00, 1.01s/it]
- 2026-05-15 08:42:26 - evalscope - INFO:
- Benchmarking summary:
- +-----------------------------------+-----------+
- | Key | Value |
- +===================================+===========+
- | Time taken for tests (s) | 9.6293 |
- +-----------------------------------+-----------+
- | Number of concurrency | 10 |
- +-----------------------------------+-----------+
- | Request rate (req/s) | -1 |
- +-----------------------------------+-----------+
- | Total requests | 10 |
- +-----------------------------------+-----------+
- | Succeed requests | 10 |
- +-----------------------------------+-----------+
- | Failed requests | 0 |
- +-----------------------------------+-----------+
- | Output token throughput (tok/s) | 132.928 |
- +-----------------------------------+-----------+
- | Total token throughput (tok/s) | 2268.5 |
- +-----------------------------------+-----------+
- | Request throughput (req/s) | 1.0385 |
- +-----------------------------------+-----------+
- | Average latency (s) | 9.567 |
- +-----------------------------------+-----------+
- | Average time to first token (s) | 3.6071 |
- +-----------------------------------+-----------+
- | Average time per output token (s) | 0.0469 |
- +-----------------------------------+-----------+
- | Average inter-token latency (s) | 0.0466 |
- +-----------------------------------+-----------+
- | Average input tokens per request | 2056.4 |
- +-----------------------------------+-----------+
- | Average output tokens per request | 128 |
- +-----------------------------------+-----------+
- 2026-05-15 08:42:26 - evalscope - INFO:
- Percentile results:
- +-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
- | Percentiles | TTFT (s) | ITL (s) | TPOT (s) | Latency (s) | Input tokens | Output tokens | Output (tok/s) | Total (tok/s) |
- +-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
- | 10% | 1.7449 | 0.0332 | 0.0332 | 9.5131 | 2056 | 128 | 13.2998 | 226.9284 |
- | 25% | 2.7691 | 0.0333 | 0.0376 | 9.5425 | 2056 | 128 | 13.3343 | 227.5162 |
- | 50% | 3.7961 | 0.0333 | 0.0455 | 9.5729 | 2056 | 128 | 13.3741 | 228.2483 |
- | 66% | 4.8242 | 0.0334 | 0.0533 | 9.5984 | 2056 | 128 | 13.412 | 228.8415 |
- | 75% | 4.825 | 0.0334 | 0.0533 | 9.5993 | 2057 | 128 | 13.4137 | 229.0807 |
- | 80% | 5.4098 | 0.0335 | 0.0612 | 9.6242 | 2057 | 128 | 13.4552 | 229.5791 |
- | 90% | 5.41 | 0.0336 | 0.0689 | 9.6253 | 2058 | 128 | 13.5021 | 230.4848 |
- | 95% | 5.41 | 0.0337 | 0.0689 | 9.6253 | 2058 | 128 | 13.5021 | 230.4848 |
- | 98% | 5.41 | 0.0367 | 0.0689 | 9.6253 | 2058 | 128 | 13.5021 | 230.4848 |
- | 99% | 5.41 | 1.0169 | 0.0689 | 9.6253 | 2058 | 128 | 13.5021 | 230.4848 |
- +-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
- 2026-05-15 08:42:26 - evalscope - INFO: Save the summary to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_10_number_10
- 2026-05-15 08:42:26 - evalscope - INFO: Performance summary saved to: outputs/20260515_084106/Qwen3.6-27B-W8A8/performance_summary.txt
|