Maas2-group
/
maas-base


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
							"""
Plot throughput comparison between baseline and optimized versions.
Adjust the example data as needed.
"""

import matplotlib.pyplot as plt
import numpy as np


def plot_throughput_comparison(
    model_name,
    gpu_type,
    case_names,
    baseline_tps,
    optimized_tps,
    optimized_gpu_ratio=1,
    save_path=None,
):
    """
    Plot throughput comparison between baseline and optimized versions.

    Args:
        model_name (str): Name of the model.
        gpu_type (str): Type of GPU.
        case_names (list of str): List of test case names.
        baseline_tps (list of float): Throughput of baseline.
        optimized_tps (list of float): Throughput of optimized version.
        optimized_gpu_ratio (float): Normalization factor for GPUs. Default is 1.
        save_path (str, optional): If given, save plot to this file path.
    """
    # Normalize optimized TPS if needed
    optimized_tps = [x * optimized_gpu_ratio for x in optimized_tps]

    # Label for y-axis
    ylabel = "Throughput (TPS)"
    if optimized_gpu_ratio != 1:
        ylabel = "Throughput (TPS, normalized by GPU count)"

    # Compute improvements
    improvement = [(o - b) / b * 100 for b, o in zip(baseline_tps, optimized_tps)]
    max_tps = max(max(baseline_tps), max(optimized_tps))
    value_offset = max_tps * 0.015
    improvement_offset = max_tps * 0.08

    x = np.arange(len(case_names))
    width = 0.35

    _, ax = plt.subplots(figsize=(10, 6))
    ax.bar(x - width / 2, baseline_tps, width, label='vLLM Baseline')
    ax.bar(x + width / 2, optimized_tps, width, label='GPUStack-Optimized')

    # Add text annotations
    for i in range(len(case_names)):
        # TPS values
        ax.text(
            x[i] - width / 2,
            baseline_tps[i] + value_offset,
            f'{baseline_tps[i]:.0f}',
            ha='center',
            va='bottom',
            fontsize=9,
        )
        ax.text(
            x[i] + width / 2,
            optimized_tps[i] + value_offset,
            f'{optimized_tps[i]:.0f}',
            ha='center',
            va='bottom',
            fontsize=9,
        )
        # Improvement percentage
        if improvement[i] > 0:
            ax.text(
                x[i] + width / 2,
                optimized_tps[i] + improvement_offset,
                f'(+{improvement[i]:.1f}%)',
                ha='center',
                va='bottom',
                fontsize=9,
                color='green',
            )
        else:
            ax.text(
                x[i] + width / 2,
                optimized_tps[i] + improvement_offset,
                f'({improvement[i]:.1f}%)',
                ha='center',
                va='bottom',
                fontsize=9,
                color='red',
            )

    ax.set_ylabel(ylabel)
    ax.set_title(
        f'{model_name} Throughput on {gpu_type} GPUs: vLLM Baseline vs. GPUStack-Optimized'
    )
    ax.set_xticks(x)
    ax.set_xticklabels(case_names)
    plt.xticks(rotation=30, ha='right')

    ax.legend()
    plt.ylim(0, max_tps * 1.18)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()

    # Save to file if path is given
    if save_path:
        plt.savefig(save_path, dpi=300)
        print(f"Plot saved to {save_path}")

    plt.show()


# Example usage
model_name = "Qwen3.5-350B-A3B"
gpu_type = "H200"
case_names = [
    "ShareGPT",
    "Input 1024 / Output 128 (Profile: Throughput)",
    "Input 32000 / Output 100 (Profile: Long Context)",
    "Input 1000 / Output 2000 (Profile: Generation Heavy)",
]
baseline_tps = [9632.01, 37934.72, 44993.20, 10455.38]
optimized_tps = [10570.88, 50464.84, 56424.42, 12258.79]

plot_throughput_comparison(
    model_name,
    gpu_type,
    case_names,
    baseline_tps,
    optimized_tps,
    save_path="throughput_comparison.png",
)