The trend in chip-multi-processors for the next several years is for on-chip FLOPS to grow much faster than bandwidth to off-chip DRAM. This trend is likely to emerge as a performance bottleneck for future real-time ray tracing systems. In this paper, we assess the impact of this bottleneck by measuring the DRAM bandwidth requirements for several different ray tracing algorithms, each running on simulated architectures with a variety of cache sizes. We conclude that for current packet-tracing algorithms, bandwidth will not be a bottleneck for primary rays, but that it will be a bottleneck for soft shadow rays. This bottleneck is caused primarily by dramatically lower cache hit rates, rather than by an increase in total working set, which suggests that substantial reductions in memory bandwidth requirements would be possible by designing algorithms that do a better job of scheduling ray traversals in a coherent fashion for divergent secondary rays.
@TechReport{ Navratil:2006:AAR,
author = {Paul Arthur Navr\'{a}til and William R. Mark},
title = {An Analysis of Ray Tracing Bandwidth Consumption},
institution = {The University of Texas at Austin},
year = {2006},
number = {TR-06-40},
month = {November 21}
}