@Article{Goto:2008:AHP, author = "Kazushige Goto and Robert A. van de Geijn", title = "Anatomy of a High-Performance Matrix Multiplication", journal = "{ACM} Transactions on Mathematical Software", volume = "34", number = "3", month = may, year = "2008", pages = "12", note = "Article 12, 25 pages", URL = "http://doi.acm.org/10.1145/1356052.1356053", abstract = "We present the basic principles which underlie the high-performance implementation of the matrix-matrix multiplication that is part of the widely used GotoBLAS library. Design decisions are justified by successively refining a model of architectures with multilevel memories. A simple but effective algorithm for executing this operation results. Implementations on a broad selection of architectures are shown to achieve near-peak performance.", }