from torch.profiler import profile, record_function, ProfilerActivity with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof: model(inputs) prof.export_chrome_trace("trace.json")
import torch from torch.profiler import profile, record_function, ProfilerActivity model = torch.nn.Linear(100, 100) inputs = torch.randn(100, 100) with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True) as prof: with record_function("model_inference"): model(inputs) print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))
import torch import torch.nn as nn from torch.autograd.profiler import profile, record_function class SimpleModel(nn.Module): def forward(self, x): return x * 2 model = SimpleModel().cuda() input_data = torch.randn(1000, 1000).cuda() with profile(use_cuda=True) as prof: with record_function("model_forward"): output = model(input_data) print(prof.key_averages().table(sort_by="cuda_time_total"))
import torch.profiler with torch.profiler.profile( activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA], profile_memory=True, schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=1), on_trace_ready=torch.profiler.tensorboard_trace_handler('./log') ) as prof: # Your code here for data in dataloader: # process data... prof.step() # After profiling, view the TensorBoard logs for visualization.
torchlensjohnmarktaylor91 • Updated 2025 Feb 14 15:53
torchlens
johnmarktaylor91 • Updated 2025 Feb 14 15:53
How to
Document