plot_tps_comparison.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. """
  2. Plot throughput comparison between baseline and optimized versions.
  3. Adjust the example data as needed.
  4. """
  5. import matplotlib.pyplot as plt
  6. import numpy as np
  7. def plot_throughput_comparison(
  8. model_name,
  9. gpu_type,
  10. case_names,
  11. baseline_tps,
  12. optimized_tps,
  13. optimized_gpu_ratio=1,
  14. save_path=None,
  15. ):
  16. """
  17. Plot throughput comparison between baseline and optimized versions.
  18. Args:
  19. model_name (str): Name of the model.
  20. gpu_type (str): Type of GPU.
  21. case_names (list of str): List of test case names.
  22. baseline_tps (list of float): Throughput of baseline.
  23. optimized_tps (list of float): Throughput of optimized version.
  24. optimized_gpu_ratio (float): Normalization factor for GPUs. Default is 1.
  25. save_path (str, optional): If given, save plot to this file path.
  26. """
  27. # Normalize optimized TPS if needed
  28. optimized_tps = [x * optimized_gpu_ratio for x in optimized_tps]
  29. # Label for y-axis
  30. ylabel = "Throughput (TPS)"
  31. if optimized_gpu_ratio != 1:
  32. ylabel = "Throughput (TPS, normalized by GPU count)"
  33. # Compute improvements
  34. improvement = [(o - b) / b * 100 for b, o in zip(baseline_tps, optimized_tps)]
  35. max_tps = max(max(baseline_tps), max(optimized_tps))
  36. value_offset = max_tps * 0.015
  37. improvement_offset = max_tps * 0.08
  38. x = np.arange(len(case_names))
  39. width = 0.35
  40. _, ax = plt.subplots(figsize=(10, 6))
  41. ax.bar(x - width / 2, baseline_tps, width, label='vLLM Baseline')
  42. ax.bar(x + width / 2, optimized_tps, width, label='GPUStack-Optimized')
  43. # Add text annotations
  44. for i in range(len(case_names)):
  45. # TPS values
  46. ax.text(
  47. x[i] - width / 2,
  48. baseline_tps[i] + value_offset,
  49. f'{baseline_tps[i]:.0f}',
  50. ha='center',
  51. va='bottom',
  52. fontsize=9,
  53. )
  54. ax.text(
  55. x[i] + width / 2,
  56. optimized_tps[i] + value_offset,
  57. f'{optimized_tps[i]:.0f}',
  58. ha='center',
  59. va='bottom',
  60. fontsize=9,
  61. )
  62. # Improvement percentage
  63. if improvement[i] > 0:
  64. ax.text(
  65. x[i] + width / 2,
  66. optimized_tps[i] + improvement_offset,
  67. f'(+{improvement[i]:.1f}%)',
  68. ha='center',
  69. va='bottom',
  70. fontsize=9,
  71. color='green',
  72. )
  73. else:
  74. ax.text(
  75. x[i] + width / 2,
  76. optimized_tps[i] + improvement_offset,
  77. f'({improvement[i]:.1f}%)',
  78. ha='center',
  79. va='bottom',
  80. fontsize=9,
  81. color='red',
  82. )
  83. ax.set_ylabel(ylabel)
  84. ax.set_title(
  85. f'{model_name} Throughput on {gpu_type} GPUs: vLLM Baseline vs. GPUStack-Optimized'
  86. )
  87. ax.set_xticks(x)
  88. ax.set_xticklabels(case_names)
  89. plt.xticks(rotation=30, ha='right')
  90. ax.legend()
  91. plt.ylim(0, max_tps * 1.18)
  92. plt.grid(axis='y', linestyle='--', alpha=0.7)
  93. plt.tight_layout()
  94. # Save to file if path is given
  95. if save_path:
  96. plt.savefig(save_path, dpi=300)
  97. print(f"Plot saved to {save_path}")
  98. plt.show()
  99. # Example usage
  100. model_name = "Qwen3.5-350B-A3B"
  101. gpu_type = "H200"
  102. case_names = [
  103. "ShareGPT",
  104. "Input 1024 / Output 128 (Profile: Throughput)",
  105. "Input 32000 / Output 100 (Profile: Long Context)",
  106. "Input 1000 / Output 2000 (Profile: Generation Heavy)",
  107. ]
  108. baseline_tps = [9632.01, 37934.72, 44993.20, 10455.38]
  109. optimized_tps = [10570.88, 50464.84, 56424.42, 12258.79]
  110. plot_throughput_comparison(
  111. model_name,
  112. gpu_type,
  113. case_names,
  114. baseline_tps,
  115. optimized_tps,
  116. save_path="throughput_comparison.png",
  117. )