121 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			121 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | ||
| Backbone A/B 基准测试脚本
 | ||
| 
 | ||
| 目的:在相同输入与重复次数下,对比不同骨干(vgg16/resnet34/efficientnet_b0)
 | ||
| 在单尺度与 FPN 前向推理的吞吐(毫秒)与显存占用(MB)。
 | ||
| 
 | ||
| 示例:
 | ||
|   uv run python tests/benchmark_backbones.py --device cpu --image-size 512 --runs 5
 | ||
|   uv run python tests/benchmark_backbones.py --device cuda --runs 20 --backbones vgg16 resnet34 efficientnet_b0
 | ||
| """
 | ||
| from __future__ import annotations
 | ||
| 
 | ||
| import argparse
 | ||
| import time
 | ||
| from typing import Dict, List, Tuple
 | ||
| 
 | ||
| import numpy as np
 | ||
| import psutil
 | ||
| import torch
 | ||
| 
 | ||
| from models.rord import RoRD
 | ||
| 
 | ||
| 
 | ||
| def get_mem_mb() -> float:
 | ||
|     p = psutil.Process()
 | ||
|     return p.memory_info().rss / 1024 / 1024
 | ||
| 
 | ||
| 
 | ||
| def get_gpu_mem_mb() -> float:
 | ||
|     if torch.cuda.is_available():
 | ||
|         return torch.cuda.memory_allocated() / 1024 / 1024
 | ||
|     return 0.0
 | ||
| 
 | ||
| 
 | ||
| def warmup(model: torch.nn.Module, x: torch.Tensor, steps: int = 3, fpn: bool = False) -> None:
 | ||
|     with torch.inference_mode():
 | ||
|         for _ in range(steps):
 | ||
|             _ = model(x, return_pyramid=fpn)
 | ||
| 
 | ||
| 
 | ||
| def bench_once(model: torch.nn.Module, x: torch.Tensor, fpn: bool = False) -> float:
 | ||
|     if torch.cuda.is_available() and x.is_cuda:
 | ||
|         torch.cuda.synchronize()
 | ||
|     t0 = time.time()
 | ||
|     with torch.inference_mode():
 | ||
|         _ = model(x, return_pyramid=fpn)
 | ||
|     if torch.cuda.is_available() and x.is_cuda:
 | ||
|         torch.cuda.synchronize()
 | ||
|     return (time.time() - t0) * 1000.0
 | ||
| 
 | ||
| 
 | ||
| def run_benchmark(backbone: str, device: torch.device, image_size: int, runs: int) -> Dict[str, float]:
 | ||
|     cfg = type("cfg", (), {
 | ||
|         "model": type("m", (), {
 | ||
|             "backbone": type("b", (), {"name": backbone, "pretrained": False})(),
 | ||
|             "attention": type("a", (), {"enabled": False, "type": "none", "places": []})(),
 | ||
|         })()
 | ||
|     })()
 | ||
| 
 | ||
|     model = RoRD(cfg=cfg).to(device)
 | ||
|     model.eval()
 | ||
| 
 | ||
|     x = torch.randn(1, 3, image_size, image_size, device=device)
 | ||
| 
 | ||
|     # warmup
 | ||
|     warmup(model, x, steps=5, fpn=False)
 | ||
|     warmup(model, x, steps=5, fpn=True)
 | ||
| 
 | ||
|     # single-scale
 | ||
|     t_list_single: List[float] = []
 | ||
|     for _ in range(runs):
 | ||
|         t_list_single.append(bench_once(model, x, fpn=False))
 | ||
| 
 | ||
|     # FPN
 | ||
|     t_list_fpn: List[float] = []
 | ||
|     for _ in range(runs):
 | ||
|         t_list_fpn.append(bench_once(model, x, fpn=True))
 | ||
| 
 | ||
|     return {
 | ||
|         "backbone": backbone,
 | ||
|         "single_ms_mean": float(np.mean(t_list_single)),
 | ||
|         "single_ms_std": float(np.std(t_list_single)),
 | ||
|         "fpn_ms_mean": float(np.mean(t_list_fpn)),
 | ||
|         "fpn_ms_std": float(np.std(t_list_fpn)),
 | ||
|         "gpu_mem_mb": float(get_gpu_mem_mb()),
 | ||
|         "cpu_mem_mb": float(get_mem_mb()),
 | ||
|         "runs": int(runs),
 | ||
|     }
 | ||
| 
 | ||
| 
 | ||
| def main():
 | ||
|     parser = argparse.ArgumentParser(description="RoRD 骨干 A/B 基准测试")
 | ||
|     parser.add_argument("--backbones", nargs="*", default=["vgg16", "resnet34", "efficientnet_b0"],
 | ||
|                         help="要测试的骨干列表")
 | ||
|     parser.add_argument("--image-size", type=int, default=512, help="输入图像尺寸(正方形)")
 | ||
|     parser.add_argument("--runs", type=int, default=10, help="每个设置的重复次数")
 | ||
|     parser.add_argument("--device", type=str, default="cuda", help="cuda 或 cpu")
 | ||
| 
 | ||
|     args = parser.parse_args()
 | ||
| 
 | ||
|     device = torch.device(args.device if torch.cuda.is_available() or args.device == "cpu" else "cpu")
 | ||
|     print(f"使用设备: {device}")
 | ||
| 
 | ||
|     results: List[Dict[str, float]] = []
 | ||
|     for bk in args.backbones:
 | ||
|         print(f"\n=== Benchmark: {bk} ===")
 | ||
|         res = run_benchmark(bk, device, args.image_size, args.runs)
 | ||
|         print(f"single: {res['single_ms_mean']:.2f}±{res['single_ms_std']:.2f} ms  |  "
 | ||
|               f"fpn: {res['fpn_ms_mean']:.2f}±{res['fpn_ms_std']:.2f} ms  |  "
 | ||
|               f"gpu_mem: {res['gpu_mem_mb']:.1f} MB")
 | ||
|         results.append(res)
 | ||
| 
 | ||
|     # 简要对比打印
 | ||
|     print("\n===== 汇总 =====")
 | ||
|     for r in results:
 | ||
|         print(f"{r['backbone']:<16} single {r['single_ms_mean']:.2f} ms | fpn {r['fpn_ms_mean']:.2f} ms")
 | ||
| 
 | ||
| 
 | ||
| if __name__ == "__main__":
 | ||
|     main()
 | 
