|
|
@@ -0,0 +1,389 @@
|
|
|
+2026-05-15 08:41:06 - evalscope - INFO: Starting benchmark with args:
|
|
|
+2026-05-15 08:41:06 - evalscope - INFO: {
|
|
|
+ "model": "Qwen3.6-27B-W8A8",
|
|
|
+ "model_id": "Qwen3.6-27B-W8A8",
|
|
|
+ "attn_implementation": null,
|
|
|
+ "api": "openai",
|
|
|
+ "tokenizer_path": "/opt/lq/models/Qwen3.6-27B-W8A8",
|
|
|
+ "port": 8877,
|
|
|
+ "url": "http://127.0.0.1:8004/v1/chat/completions",
|
|
|
+ "headers": {
|
|
|
+ "Authorization": "Bearer sk-123456"
|
|
|
+ },
|
|
|
+ "connect_timeout": null,
|
|
|
+ "read_timeout": null,
|
|
|
+ "total_timeout": 21600,
|
|
|
+ "api_key": "sk-123456",
|
|
|
+ "no_test_connection": false,
|
|
|
+ "number": 1,
|
|
|
+ "parallel": 1,
|
|
|
+ "rate": -1,
|
|
|
+ "sleep_interval": 5,
|
|
|
+ "sla_auto_tune": false,
|
|
|
+ "sla_variable": "parallel",
|
|
|
+ "sla_params": null,
|
|
|
+ "sla_num_runs": 3,
|
|
|
+ "sla_upper_bound": 65536,
|
|
|
+ "sla_lower_bound": 1,
|
|
|
+ "db_commit_interval": 1000,
|
|
|
+ "queue_size_multiplier": 5,
|
|
|
+ "in_flight_task_multiplier": 2,
|
|
|
+ "log_every_n_query": 10,
|
|
|
+ "debug": false,
|
|
|
+ "visualizer": null,
|
|
|
+ "wandb_api_key": null,
|
|
|
+ "swanlab_api_key": null,
|
|
|
+ "name": null,
|
|
|
+ "outputs_dir": "outputs/20260515_084106/Qwen3.6-27B-W8A8",
|
|
|
+ "no_timestamp": false,
|
|
|
+ "max_prompt_length": 2048,
|
|
|
+ "min_prompt_length": 2048,
|
|
|
+ "prefix_length": 0,
|
|
|
+ "prompt": null,
|
|
|
+ "query_template": null,
|
|
|
+ "apply_chat_template": true,
|
|
|
+ "image_width": 224,
|
|
|
+ "image_height": 224,
|
|
|
+ "image_format": "RGB",
|
|
|
+ "image_num": 1,
|
|
|
+ "image_patch_size": 28,
|
|
|
+ "dataset": "random",
|
|
|
+ "dataset_path": null,
|
|
|
+ "frequency_penalty": null,
|
|
|
+ "repetition_penalty": null,
|
|
|
+ "logprobs": null,
|
|
|
+ "max_tokens": 128,
|
|
|
+ "min_tokens": 128,
|
|
|
+ "n_choices": null,
|
|
|
+ "seed": null,
|
|
|
+ "stop": null,
|
|
|
+ "stop_token_ids": null,
|
|
|
+ "stream": true,
|
|
|
+ "temperature": 0.0,
|
|
|
+ "top_p": null,
|
|
|
+ "top_k": null,
|
|
|
+ "extra_args": {}
|
|
|
+}
|
|
|
+2026-05-15 08:41:22 - evalscope - INFO: Test connection successful.
|
|
|
+2026-05-15 08:41:25 - evalscope - INFO: Using 248044 allowed tokens out of 248044 total tokens
|
|
|
+2026-05-15 08:41:25 - evalscope - INFO: Sampling input lengths from [2046, 2047)
|
|
|
+2026-05-15 08:41:26 - evalscope - INFO: Save the data base to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_1_number_1/benchmark_data.db
|
|
|
+2026-05-15 08:41:33 - evalscope - INFO: Processing 100%| 1/1 [Elapsed: 00:07 < Remaining: 00:00, 7.37s/it]
|
|
|
+2026-05-15 08:41:33 - evalscope - INFO:
|
|
|
+Benchmarking summary:
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Key | Value |
|
|
|
++===================================+===========+
|
|
|
+| Time taken for tests (s) | 7.3689 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Number of concurrency | 1 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Request rate (req/s) | -1 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Total requests | 1 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Succeed requests | 1 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Failed requests | 0 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Output token throughput (tok/s) | 17.3704 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Total token throughput (tok/s) | 296.383 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Request throughput (req/s) | 0.1357 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average latency (s) | 7.3689 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average time to first token (s) | 0.8157 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average time per output token (s) | 0.0516 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average inter-token latency (s) | 0.0512 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average input tokens per request | 2056 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average output tokens per request | 128 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+2026-05-15 08:41:33 - evalscope - INFO:
|
|
|
+Percentile results:
|
|
|
++-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
|
|
|
+| Percentiles | TTFT (s) | ITL (s) | TPOT (s) | Latency (s) | Input tokens | Output tokens | Output (tok/s) | Total (tok/s) |
|
|
|
++-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
|
|
|
+| 10% | 0.8157 | 0.0514 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
+| 25% | 0.8157 | 0.0515 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
+| 50% | 0.8157 | 0.0516 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
+| 66% | 0.8157 | 0.0517 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
+| 75% | 0.8157 | 0.0518 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
+| 80% | 0.8157 | 0.0518 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
+| 90% | 0.8157 | 0.0519 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
+| 95% | 0.8157 | 0.052 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
+| 98% | 0.8157 | 0.0523 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
+| 99% | 0.8157 | 0.0523 | 0.0516 | 7.3689 | 2056 | 128 | 17.3704 | 296.3827 |
|
|
|
++-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
|
|
|
+2026-05-15 08:41:33 - evalscope - INFO: Save the summary to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_1_number_1
|
|
|
+2026-05-15 08:41:33 - evalscope - INFO: Sleeping for 5 seconds before the next run...
|
|
|
+2026-05-15 08:41:38 - evalscope - INFO: Starting benchmark with args:
|
|
|
+2026-05-15 08:41:38 - evalscope - INFO: {
|
|
|
+ "model": "Qwen3.6-27B-W8A8",
|
|
|
+ "model_id": "Qwen3.6-27B-W8A8",
|
|
|
+ "attn_implementation": null,
|
|
|
+ "api": "openai",
|
|
|
+ "tokenizer_path": "/opt/lq/models/Qwen3.6-27B-W8A8",
|
|
|
+ "port": 8877,
|
|
|
+ "url": "http://127.0.0.1:8004/v1/chat/completions",
|
|
|
+ "headers": {
|
|
|
+ "Authorization": "Bearer sk-123456"
|
|
|
+ },
|
|
|
+ "connect_timeout": null,
|
|
|
+ "read_timeout": null,
|
|
|
+ "total_timeout": 21600,
|
|
|
+ "api_key": "sk-123456",
|
|
|
+ "no_test_connection": false,
|
|
|
+ "number": 5,
|
|
|
+ "parallel": 5,
|
|
|
+ "rate": -1,
|
|
|
+ "sleep_interval": 5,
|
|
|
+ "sla_auto_tune": false,
|
|
|
+ "sla_variable": "parallel",
|
|
|
+ "sla_params": null,
|
|
|
+ "sla_num_runs": 3,
|
|
|
+ "sla_upper_bound": 65536,
|
|
|
+ "sla_lower_bound": 1,
|
|
|
+ "db_commit_interval": 1000,
|
|
|
+ "queue_size_multiplier": 5,
|
|
|
+ "in_flight_task_multiplier": 2,
|
|
|
+ "log_every_n_query": 10,
|
|
|
+ "debug": false,
|
|
|
+ "visualizer": null,
|
|
|
+ "wandb_api_key": null,
|
|
|
+ "swanlab_api_key": null,
|
|
|
+ "name": null,
|
|
|
+ "outputs_dir": "outputs/20260515_084106/Qwen3.6-27B-W8A8",
|
|
|
+ "no_timestamp": false,
|
|
|
+ "max_prompt_length": 2048,
|
|
|
+ "min_prompt_length": 2048,
|
|
|
+ "prefix_length": 0,
|
|
|
+ "prompt": null,
|
|
|
+ "query_template": null,
|
|
|
+ "apply_chat_template": true,
|
|
|
+ "image_width": 224,
|
|
|
+ "image_height": 224,
|
|
|
+ "image_format": "RGB",
|
|
|
+ "image_num": 1,
|
|
|
+ "image_patch_size": 28,
|
|
|
+ "dataset": "random",
|
|
|
+ "dataset_path": null,
|
|
|
+ "frequency_penalty": null,
|
|
|
+ "repetition_penalty": null,
|
|
|
+ "logprobs": null,
|
|
|
+ "max_tokens": 128,
|
|
|
+ "min_tokens": 128,
|
|
|
+ "n_choices": null,
|
|
|
+ "seed": null,
|
|
|
+ "stop": null,
|
|
|
+ "stop_token_ids": null,
|
|
|
+ "stream": true,
|
|
|
+ "temperature": 0.0,
|
|
|
+ "top_p": null,
|
|
|
+ "top_k": null,
|
|
|
+ "extra_args": {}
|
|
|
+}
|
|
|
+2026-05-15 08:41:48 - evalscope - INFO: Test connection successful.
|
|
|
+2026-05-15 08:41:51 - evalscope - INFO: Using 248044 allowed tokens out of 248044 total tokens
|
|
|
+2026-05-15 08:41:51 - evalscope - INFO: Sampling input lengths from [2046, 2047)
|
|
|
+2026-05-15 08:41:51 - evalscope - INFO: Save the data base to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_5_number_5/benchmark_data.db
|
|
|
+2026-05-15 08:41:58 - evalscope - INFO: Processing 100%| 5/5 [Elapsed: 00:06 < Remaining: 00:00, 1.01it/s]
|
|
|
+2026-05-15 08:41:58 - evalscope - INFO:
|
|
|
+Benchmarking summary:
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Key | Value |
|
|
|
++===================================+===========+
|
|
|
+| Time taken for tests (s) | 6.6303 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Number of concurrency | 5 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Request rate (req/s) | -1 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Total requests | 5 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Succeed requests | 5 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Failed requests | 0 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Output token throughput (tok/s) | 96.5268 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Total token throughput (tok/s) | 1647.29 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Request throughput (req/s) | 0.7541 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average latency (s) | 6.5697 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average time to first token (s) | 2.1216 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average time per output token (s) | 0.035 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average inter-token latency (s) | 0.0348 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average input tokens per request | 2056.4 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average output tokens per request | 128 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+2026-05-15 08:41:58 - evalscope - INFO:
|
|
|
+Percentile results:
|
|
|
++-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
|
|
|
+| Percentiles | TTFT (s) | ITL (s) | TPOT (s) | Latency (s) | Input tokens | Output tokens | Output (tok/s) | Total (tok/s) |
|
|
|
++-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
|
|
|
+| 10% | 0.6655 | 0.0295 | 0.0299 | 6.5201 | 2056 | 128 | 19.3128 | 329.6753 |
|
|
|
+| 25% | 1.6872 | 0.0296 | 0.0304 | 6.5483 | 2056 | 128 | 19.4617 | 332.0644 |
|
|
|
+| 50% | 2.7127 | 0.0297 | 0.0304 | 6.5751 | 2056 | 128 | 19.4674 | 332.1626 |
|
|
|
+| 66% | 2.7127 | 0.0298 | 0.0383 | 6.577 | 2057 | 128 | 19.5469 | 333.6721 |
|
|
|
+| 75% | 2.7127 | 0.0299 | 0.0383 | 6.577 | 2057 | 128 | 19.5469 | 333.6721 |
|
|
|
+| 80% | 2.8297 | 0.0299 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
|
|
|
+| 90% | 2.8297 | 0.0301 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
|
|
|
+| 95% | 2.8297 | 0.0304 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
|
|
|
+| 98% | 2.8297 | 0.031 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
|
|
|
+| 99% | 2.8297 | 0.1171 | 0.0461 | 6.6277 | 2057 | 128 | 19.6315 | 334.9623 |
|
|
|
++-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
|
|
|
+2026-05-15 08:41:58 - evalscope - INFO: Save the summary to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_5_number_5
|
|
|
+2026-05-15 08:41:58 - evalscope - INFO: Sleeping for 5 seconds before the next run...
|
|
|
+2026-05-15 08:42:03 - evalscope - INFO: Starting benchmark with args:
|
|
|
+2026-05-15 08:42:03 - evalscope - INFO: {
|
|
|
+ "model": "Qwen3.6-27B-W8A8",
|
|
|
+ "model_id": "Qwen3.6-27B-W8A8",
|
|
|
+ "attn_implementation": null,
|
|
|
+ "api": "openai",
|
|
|
+ "tokenizer_path": "/opt/lq/models/Qwen3.6-27B-W8A8",
|
|
|
+ "port": 8877,
|
|
|
+ "url": "http://127.0.0.1:8004/v1/chat/completions",
|
|
|
+ "headers": {
|
|
|
+ "Authorization": "Bearer sk-123456"
|
|
|
+ },
|
|
|
+ "connect_timeout": null,
|
|
|
+ "read_timeout": null,
|
|
|
+ "total_timeout": 21600,
|
|
|
+ "api_key": "sk-123456",
|
|
|
+ "no_test_connection": false,
|
|
|
+ "number": 10,
|
|
|
+ "parallel": 10,
|
|
|
+ "rate": -1,
|
|
|
+ "sleep_interval": 5,
|
|
|
+ "sla_auto_tune": false,
|
|
|
+ "sla_variable": "parallel",
|
|
|
+ "sla_params": null,
|
|
|
+ "sla_num_runs": 3,
|
|
|
+ "sla_upper_bound": 65536,
|
|
|
+ "sla_lower_bound": 1,
|
|
|
+ "db_commit_interval": 1000,
|
|
|
+ "queue_size_multiplier": 5,
|
|
|
+ "in_flight_task_multiplier": 2,
|
|
|
+ "log_every_n_query": 10,
|
|
|
+ "debug": false,
|
|
|
+ "visualizer": null,
|
|
|
+ "wandb_api_key": null,
|
|
|
+ "swanlab_api_key": null,
|
|
|
+ "name": null,
|
|
|
+ "outputs_dir": "outputs/20260515_084106/Qwen3.6-27B-W8A8",
|
|
|
+ "no_timestamp": false,
|
|
|
+ "max_prompt_length": 2048,
|
|
|
+ "min_prompt_length": 2048,
|
|
|
+ "prefix_length": 0,
|
|
|
+ "prompt": null,
|
|
|
+ "query_template": null,
|
|
|
+ "apply_chat_template": true,
|
|
|
+ "image_width": 224,
|
|
|
+ "image_height": 224,
|
|
|
+ "image_format": "RGB",
|
|
|
+ "image_num": 1,
|
|
|
+ "image_patch_size": 28,
|
|
|
+ "dataset": "random",
|
|
|
+ "dataset_path": null,
|
|
|
+ "frequency_penalty": null,
|
|
|
+ "repetition_penalty": null,
|
|
|
+ "logprobs": null,
|
|
|
+ "max_tokens": 128,
|
|
|
+ "min_tokens": 128,
|
|
|
+ "n_choices": null,
|
|
|
+ "seed": null,
|
|
|
+ "stop": null,
|
|
|
+ "stop_token_ids": null,
|
|
|
+ "stream": true,
|
|
|
+ "temperature": 0.0,
|
|
|
+ "top_p": null,
|
|
|
+ "top_k": null,
|
|
|
+ "extra_args": {}
|
|
|
+}
|
|
|
+2026-05-15 08:42:12 - evalscope - INFO: Test connection successful.
|
|
|
+2026-05-15 08:42:16 - evalscope - INFO: Using 248044 allowed tokens out of 248044 total tokens
|
|
|
+2026-05-15 08:42:16 - evalscope - INFO: Sampling input lengths from [2046, 2047)
|
|
|
+2026-05-15 08:42:16 - evalscope - INFO: Save the data base to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_10_number_10/benchmark_data.db
|
|
|
+2026-05-15 08:42:26 - evalscope - INFO: {
|
|
|
+ "Time taken for tests (s)": 9.6293,
|
|
|
+ "Number of concurrency": 10,
|
|
|
+ "Request rate (req/s)": -1,
|
|
|
+ "Total requests": 10,
|
|
|
+ "Succeed requests": 10,
|
|
|
+ "Failed requests": 0,
|
|
|
+ "Output token throughput (tok/s)": 132.928,
|
|
|
+ "Total token throughput (tok/s)": 2268.4997,
|
|
|
+ "Request throughput (req/s)": 1.0385,
|
|
|
+ "Average latency (s)": 9.567,
|
|
|
+ "Average time to first token (s)": 3.6071,
|
|
|
+ "Average time per output token (s)": 0.0469,
|
|
|
+ "Average inter-token latency (s)": 0.0466,
|
|
|
+ "Average input tokens per request": 2056.4,
|
|
|
+ "Average output tokens per request": 128.0
|
|
|
+}
|
|
|
+2026-05-15 08:42:26 - evalscope - INFO: Processing 100%| 10/10 [Elapsed: 00:09 < Remaining: 00:00, 1.01s/it]
|
|
|
+2026-05-15 08:42:26 - evalscope - INFO:
|
|
|
+Benchmarking summary:
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Key | Value |
|
|
|
++===================================+===========+
|
|
|
+| Time taken for tests (s) | 9.6293 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Number of concurrency | 10 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Request rate (req/s) | -1 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Total requests | 10 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Succeed requests | 10 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Failed requests | 0 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Output token throughput (tok/s) | 132.928 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Total token throughput (tok/s) | 2268.5 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Request throughput (req/s) | 1.0385 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average latency (s) | 9.567 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average time to first token (s) | 3.6071 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average time per output token (s) | 0.0469 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average inter-token latency (s) | 0.0466 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average input tokens per request | 2056.4 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+| Average output tokens per request | 128 |
|
|
|
++-----------------------------------+-----------+
|
|
|
+2026-05-15 08:42:26 - evalscope - INFO:
|
|
|
+Percentile results:
|
|
|
++-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
|
|
|
+| Percentiles | TTFT (s) | ITL (s) | TPOT (s) | Latency (s) | Input tokens | Output tokens | Output (tok/s) | Total (tok/s) |
|
|
|
++-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
|
|
|
+| 10% | 1.7449 | 0.0332 | 0.0332 | 9.5131 | 2056 | 128 | 13.2998 | 226.9284 |
|
|
|
+| 25% | 2.7691 | 0.0333 | 0.0376 | 9.5425 | 2056 | 128 | 13.3343 | 227.5162 |
|
|
|
+| 50% | 3.7961 | 0.0333 | 0.0455 | 9.5729 | 2056 | 128 | 13.3741 | 228.2483 |
|
|
|
+| 66% | 4.8242 | 0.0334 | 0.0533 | 9.5984 | 2056 | 128 | 13.412 | 228.8415 |
|
|
|
+| 75% | 4.825 | 0.0334 | 0.0533 | 9.5993 | 2057 | 128 | 13.4137 | 229.0807 |
|
|
|
+| 80% | 5.4098 | 0.0335 | 0.0612 | 9.6242 | 2057 | 128 | 13.4552 | 229.5791 |
|
|
|
+| 90% | 5.41 | 0.0336 | 0.0689 | 9.6253 | 2058 | 128 | 13.5021 | 230.4848 |
|
|
|
+| 95% | 5.41 | 0.0337 | 0.0689 | 9.6253 | 2058 | 128 | 13.5021 | 230.4848 |
|
|
|
+| 98% | 5.41 | 0.0367 | 0.0689 | 9.6253 | 2058 | 128 | 13.5021 | 230.4848 |
|
|
|
+| 99% | 5.41 | 1.0169 | 0.0689 | 9.6253 | 2058 | 128 | 13.5021 | 230.4848 |
|
|
|
++-------------+----------+---------+----------+-------------+--------------+---------------+----------------+---------------+
|
|
|
+2026-05-15 08:42:26 - evalscope - INFO: Save the summary to: outputs/20260515_084106/Qwen3.6-27B-W8A8/parallel_10_number_10
|
|
|
+2026-05-15 08:42:26 - evalscope - INFO: Performance summary saved to: outputs/20260515_084106/Qwen3.6-27B-W8A8/performance_summary.txt
|