BibTeX Entries for FLAME Related Publications

@BOOK{Eijkhout:IHPSClulu,
 title = {Introduction to High Performance Scientific Computing},
 publisher = {lulu.com},
 year = {2011},
 author = {Victor Eijkhout},
 note = {Also available for download from \url{http://www.tacc.utexas.edu/~eijkhout/istc/istc.html}},
 isbn = {978-1-257-99254-6},
 url = {\url{http://www.lulu.com/shop/victor-eijkhout/introduction-to-high-performance-scientific-computing/paperback/product-18783375.html}}
}
@Book{libflame_ref,
  author =       {Field G. {V}an~{Z}ee},
  title =        {{\tt libflame}: {T}he {C}omplete {R}eference},
  publisher =    {{\tt lulu.com}},
  year =         {2009}
}
@Book{TSoPMC,
  author =       {Robert A. {v}an~{d}e~{G}eijn and Enrique S. Quintana-Ort\'{\i}},
  title =        {The Science of Programming Matrix Computations},
  publisher =    {{\tt lulu.com}},
  year =         {2008}
}
@Book{PLAPACKBook,
  author =       {Robert A. {v}an~{d}e~{G}eijn},
  title =        {Using {PLAPACK}: {P}arallel {L}inear {A}lgebra {P}ackage},
  publisher =    {The {MIT} {P}ress},
  year =         {1997}
}
@article {CPE:CPE2842,
author = {Igual, Francisco D. and Quintana-Ortí, Gregorio and van de Geijn, Robert},
title = {Scheduling algorithms-by-blocks on small clusters},
journal = {Concurrency and Computation: Practice and Experience},
volume = {25},
number = {3},
issn = {1532-0634},
url = {http://dx.doi.org/10.1002/cpe.2842},
doi = {10.1002/cpe.2842},
pages = {367--384},
keywords = {matrix computations, novel parallel architectures, automatic parallelization},
year = {2013},
}
@article{DxTJournal1,
 author = {Bryan Marker and Don Batory and Robert {v}an~{d}e~{G}eijn},
 title = {A Case Study in Mechanically Deriving Dense Linear Algebra Code},
 journal = {International Journal of High Performance Computing Applications},
month = Nov,
year = {2013},
volume = {27},
number = {4},
pages = {439-452}
}
@article{VanZee:2012:FAR:2382585.2382587,
 author = {{V}an~{Z}ee, Field G. and {v}an~{d}e~{G}eijn, Robert A. and Quintana-Ort\'{\i}, Gregorio and Elizondo, G. Joseph},
 title = {Families of Algorithms for Reducing a Matrix to Condensed Form},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {November 2012},
 volume = {39},
 number = {1},
 month = nov,
 year = {2012},
 issn = {0098-3500},
 pages = {2:1--2:32},
 articleno = {2},
 numpages = {32},
 url = {http://doi.acm.org/10.1145/2382585.2382587},
 doi = {10.1145/2382585.2382587},
 acmid = {2382587},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Hessenberg, Linear algebra, bidiagonal, high performance, libraries, reduction, tridiagonal},
} 
@article{Quintana-Orti:2012:RSP:2331130.2331133,
 author = {Quintana-Ort\'{\i}, Gregorio and Igual, Francisco D. and Marqu{\'e}s, Mercedes and Quintana-Ort\'{\i}, Enrique S. and van de Geijn, Robert A.},
 title = {A Runtime System for Programming Out-of-Core Matrix Algorithms-by-Tiles on Multithreaded Architectures},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {August 2012},
 volume = {38},
 number = {4},
 month = aug,
 year = {2012},
 issn = {0098-3500},
 pages = {25:1--25:25},
 articleno = {25},
 numpages = {25},
 url = {http://doi.acm.org/10.1145/2331130.2331133},
 doi = {10.1145/2331130.2331133},
 acmid = {2331133},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {High-performance, libraries, linear algebra, multithreaded architectures, out-of-core algorithms},
} 
@ARTICLE{6212466, 
author={Pedram, Ardavan and van de Geijn, Robert A. and Gerstlauer, Andreas}, 
journal={Computers, IEEE Transactions on}, title={Codesign Tradeoffs for High-Performance, Low-Power Linear Algebra Architectures}, 
year={2012}, 
month={dec. }, 
volume={61}, 
number={12}, 
pages={1724 -1736}, 
keywords={}, 
doi={10.1109/TC.2012.132}, 
ISSN={0018-9340},}
@article{SCC-2012,
author = "Marker, Bryan and Chan, Ernie and Poulson, Jack and
{v}an~{d}e~{G}eijn, Robert and  {V}an~{d}er~{ W}ijngaart, Rob  F. and
Mattson, Timothy G. and Kubaska, Theodore E.},
title = {Programming many-core architectures - a case study: dense
matrix computations on the {I}ntel single-chip cloud computer
processor},
journal = {Concurrency Computat.: Pract. Exper.},
volume = 24,
pages ={1317–1333},
doi = {10.1002/cpe.1832}
year = 2012
}
@article{Igual20121134, 
title = "The FLAME approach: From dense linear algebra algorithms to high-performance multi-accelerator implementations",
journal = "Journal of Parallel and Distributed Computing",
volume = "72",
number = "9",
pages = "1134 - 1143",
year = "2012",
note = "Accelerators for High-Performance Computing",
issn = "0743-7315",
doi = "10.1016/j.jpdc.2011.10.014",
url = "http://www.sciencedirect.com/science/article/pii/S0743731511002139",
author = "Francisco D. Igual and Ernie Chan and Enrique S. Quintana-Ortí and Gregorio Quintana-Ortí and Robert A. van de Geijn and Field G. Van Zee",
keywords = "Dense linear algebra libraries",
keywords = "Graphics processors",
keywords = "Runtime systems",
keywords = "High performance computing"
}
@article{VanDeGeijn:2011:HUV:2049662.2049666,
 author = {{v}an~{d}e~Geijn, Robert A. and {V}an~{Z}ee, Field G.},
 title = {High-performance up-and-downdating via {H}ouseholder-like transformations},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {November 2011},
 volume = {38},
 number = {1},
 month = dec,
 year = {2011},
 issn = {0098-3500},
 pages = {4:1--4:17},
 articleno = {4},
 numpages = {17},
 url = {http://doi.acm.org/10.1145/2049662.2049666},
 doi = {10.1145/2049662.2049666},
 acmid = {2049666},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Linear algebra, high performance, libraries},
} 
@article{DBLP:journals/tjs/MarquesQQG11,
  author    = {Mercedes Marqu{\'e}s and
               Gregorio Quintana-Ort\'{\i} and
               Enrique S. Quintana-Ort\'{\i} and
               Robert A. {v}an~{d}e~{G}eijn},
  title     = {Using desktop computers to solve large-scale dense linear
               algebra problems},
  journal   = {The Journal of Supercomputing},
  volume    = {58},
  number    = {2},
  year      = {2011},
  pages     = {145-150},
  ee        = {http://dx.doi.org/10.1007/s11227-010-0394-2},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@article{Bientinesi:2011:GMS:2078718.2078728,
 author = {Bientinesi, Paolo and {v}an~{d}e~Geijn, Robert A.},
 title = {Goal-Oriented and Modular Stability Analysis},
 journal = {SIAM J. Matrix Anal. Appl.},
 issue_date = {February 2011},
 volume = {32},
 number = {1},
 month = mar,
 year = {2011},
 issn = {0895-4798},
 pages = {286--308},
 numpages = {23},
 url = {http://dx.doi.org/10.1137/080741057},
 doi = {10.1137/080741057},
 acmid = {2078728},
 publisher = {Society for Industrial and Applied Mathematics},
 address = {Philadelphia, PA, USA},
 keywords = {LU factorization, automatic stability analysis, modular, stability analysis},
} 
@article{Bientinesi2010430,
title = "Sparse direct factorizations through unassembled hyper-matrices",
journal = "Computer Methods in Applied Mechanics and Engineering",
volume = "199",
number = "9–12",
pages = "430 - 438",
year = "2010",
note = "",
issn = "0045-7825",
doi = "10.1016/j.cma.2009.07.012",
url = "http://www.sciencedirect.com/science/article/pii/S0045782509002333",
author = "Paolo Bientinesi and Victor Eijkhout and Kyungjoo Kim and Jason Kurtz and Robert {v}an~{d}e~{G}eijn",
keywords = "Factorizations",
keywords = "Gaussian elimination",
keywords = "Sparse matrices",
keywords = "hp-Adaptive finite elements"
}
@article{Eijkhout20101805,
title = "Towards mechanical derivation of Krylov solver libraries",
journal = "Procedia Computer Science",
volume = "1",
number = "1",
pages = "1805 - 1813",
year = "2010",
note = "ICCS 2010",
issn = "1877-0509",
doi = "10.1016/j.procs.2010.04.202",
url = "http://www.sciencedirect.com/science/article/pii/S1877050910002036",
author = "Victor Eijkhout and Paolo Bientinesi and Robert {v}an~{d}e~{G}eijn"
}
@article{Quintana-Orti:2009:PMA:1527286.1527288,
 author = {Quintana-Ort\'{\i}, Gregorio and Quintana-Ort\'{\i}, Enrique S. and {v}an~{d}e~{G}eijn, Robert A. and {V}an~{Z}ee, Field G. and Chan, Ernie},
 title = {Programming matrix algorithms-by-blocks for thread-level parallelism},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {July 2009},
 volume = {36},
 number = {3},
 month = jul,
 year = {2009},
 issn = {0098-3500},
 pages = {14:1--14:26},
 articleno = {14},
 numpages = {26},
 url = {http://doi.acm.org/10.1145/1527286.1527288},
 doi = {10.1145/1527286.1527288},
 acmid = {1527288},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Linear algebra, high-performance, libraries, multithreaded architectures},
} 
@article{Zee:2009:LLD:1674531.1674626,
 author = {{V}an~{Z}ee, Field G.  and Chan, Ernie and {v}an~{d}e~{G}eijn, Robert A.  and Quintana-Ort\'{\i}, Enrique S. and Quintana-Ort\'{\i}, Gregorio},
 title = {The libflame Library for Dense Matrix Computations},
 journal = {IEEE Des. Test},
 issue_date = {November 2009},
 volume = {11},
 number = {6},
 month = nov,
 year = {2009},
 issn = {0740-7475},
 pages = {56--63},
 numpages = {8},
 url = {http://dx.doi.org/10.1109/MCSE.2009.207},
 doi = {10.1109/MCSE.2009.207},
 acmid = {1674626},
 publisher = {IEEE Computer Society Press},
 address = {Los Alamitos, CA, USA},
 keywords = {Dense linear algebra, performance, portability, programmability},
} 
@article{Quintana-Orti:2008:ULF:1377612.1377615,
 author = {Quintana-Ort\'{\i}, Enrique S. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {Updating an {LU} Factorization with Pivoting},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {July 2008},
 volume = {35},
 number = {2},
 month = jul,
 year = {2008},
 issn = {0098-3500},
 pages = {11:1--11:16},
 articleno = {11},
 numpages = {16},
 url = {http://doi.acm.org/10.1145/1377612.1377615},
 doi = {10.1145/1377612.1377615},
 acmid = {1377615},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {LU factorization, linear systems, pivoting, updating},
} 
@article{Goto:2008:HIL:1377603.1377607,
 author = {Goto, Kazushige and {v}an~{d}e~{G}eijn, Robert},
 title = {High-performance implementation of the level-3 BLAS},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {July 2008},
 volume = {35},
 number = {1},
 month = jul,
 year = {2008},
 issn = {0098-3500},
 pages = {4:1--4:14},
 articleno = {4},
 numpages = {14},
 url = {http://doi.acm.org/10.1145/1377603.1377607},
 doi = {10.1145/1377603.1377607},
 acmid = {1377607},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Linear algebra, basic linear algebra subprograms, libraries, matrix-matrix operations},
} 
@article{Bientinesi:2008:FAR:1377603.1377606,
 author = {Bientinesi, Paolo and Gunter, Brian and {v}an~{d}e~{G}eijn, Robert A.},
 title = {Families of algorithms related to the inversion of a Symmetric Positive Definite matrix},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {July 2008},
 volume = {35},
 number = {1},
 month = jul,
 year = {2008},
 issn = {0098-3500},
 pages = {3:1--3:22},
 articleno = {3},
 numpages = {22},
 url = {http://doi.acm.org/10.1145/1377603.1377606},
 doi = {10.1145/1377603.1377606},
 acmid = {1377606},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Linear algebra, inversion, libraries, symmetric positive definite},
}
@article{Goto:2008:AHM:1356052.1356053,
 author = {Goto, Kazushige and {v}an~{d}e~{G}eijn, Robert A.},
 title = {Anatomy of high-performance matrix multiplication},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {May 2008},
 volume = {34},
 number = {3},
 month = may,
 year = {2008},
 issn = {0098-3500},
 pages = {12:1--12:25},
 articleno = {12},
 numpages = {25},
 url = {http://doi.acm.org/10.1145/1356052.1356053},
 doi = {10.1145/1356052.1356053},
 acmid = {1356053},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Linear algebra, basic linear algebra subprogrms, matrix multiplication}
}
@article{Zee:2008:SPF:1326548.1326552,
 author = {{V}an~{Z}ee, Field G. and Bientinesi, Paolo and Low, Tze Meng and {v}an~{d}e~{G}eijn, Robert A.},
 title = {Scalable parallelization of FLAME code via the workqueuing model},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {March 2008},
 volume = {34},
 number = {2},
 month = mar,
 year = {2008},
 issn = {0098-3500},
 pages = {10:1--10:29},
 articleno = {10},
 numpages = {29},
 url = {http://doi.acm.org/10.1145/1326548.1326552},
 doi = {10.1145/1326548.1326552},
 acmid = {1326552},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {FLAME, OpenMP, SMP, parallel, scalability, workqueuing}
}
@article{Chan:2007:CCT:1285358.1285359,
 author = {Chan, Ernie and Heimlich, Marcel and Purkayastha, Avi and {v}an~{d}e~{G}eijn, Robert},
 title = {Collective communication: theory, practice, and experience: Research Articles},
 journal = {Concurr. Comput. : Pract. Exper.},
 issue_date = {September 2007},
 volume = {19},
 number = {13},
 month = sep,
 year = {2007},
 issn = {1532-0626},
 pages = {1749--1783},
 numpages = {35},
 url = {http://dx.doi.org/10.1002/cpe.v19:13},
 doi = {10.1002/cpe.v19:13},
 acmid = {1285359},
 publisher = {John Wiley and Sons Ltd.},
 address = {Chichester, UK},
 keywords = {clusters, collective communication, distributed-memory architecture}
} 
@article{Quintana-Orti:2006:IPR:1141885.1141887,
 author = {Quintana-Ort\'{\i}, Gregorio and {v}an~{d}e~{G}eijn, Robert},
 title = {Improving the performance of reduction to {H}essenberg form},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {June 2006},
 volume = {32},
 number = {2},
 month = jun,
 year = {2006},
 issn = {0098-3500},
 pages = {180--194},
 numpages = {15},
 url = {http://doi.acm.org/10.1145/1141885.1141887},
 doi = {10.1145/1141885.1141887},
 acmid = {1141887},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Linear algebra, eigenvalue problems, reduction to condensed form},
} 
@article{Joffrain:2006:AHT:1141885.1141886,
 author = {Joffrain, Thierry and Low, Tze Meng and Quintana-Ort\'{\i}, Enrique S. and {v}an~{d}e~{G}eijn, Robert and {V}an~{Z}ee, Field G.},
 title = {Accumulating {H}ouseholder transformations, revisited},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {June 2006},
 volume = {32},
 number = {2},
 month = jun,
 year = {2006},
 issn = {0098-3500},
 pages = {169--179},
 numpages = {11},
 url = {http://doi.acm.org/10.1145/1141885.1141886},
 doi = {10.1145/1141885.1141886},
 acmid = {1141886},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Householder transformation, Linear algebra, QR factorization, compact WY transform},
} 
@article{Bientinesi:2005:PED:1081198.1081222,
 author = {Bientinesi, Paolo and Dhillon, Inderjit S. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {A Parallel Eigensolver for Dense Symmetric Matrices Based on Multiple Relatively Robust Representations},
 journal = {SIAM J. Sci. Comput.},
 issue_date = {2005},
 volume = {27},
 number = {1},
 month = jul,
 year = {2005},
 issn = {1064-8275},
 pages = {43--66},
 numpages = {24},
 url = {http://dx.doi.org/10.1137/030601107},
 doi = {10.1137/030601107},
 acmid = {1081222},
 publisher = {Society for Industrial and Applied Mathematics},
 address = {Philadelphia, PA, USA},
 keywords = {eigenvalues, eigenvectors, parallel computing, relatively robust representations, symmetric matrix},
} 
@article{Bientinesi:2005:RLA:1055531.1055533,
 author = {Bientinesi, Paolo and Quintana-Ort\'{\i}, Enrique S. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {Representing linear algebra algorithms in code: the FLAME application program interfaces},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {March 2005},
 volume = {31},
 number = {1},
 month = mar,
 year = {2005},
 issn = {0098-3500},
 pages = {27--59},
 numpages = {33},
 url = {http://doi.acm.org/10.1145/1055531.1055533},
 doi = {10.1145/1055531.1055533},
 acmid = {1055533},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Application program interfaces, formal derivation, high-performance libraries, linear algebra},
}
@article{Gunter:2005:POC:1055531.1055534,
 author = {Gunter, Brian C. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {Parallel out-of-core computation and updating of the QR factorization},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {March 2005},
 volume = {31},
 number = {1},
 month = mar,
 year = {2005},
 issn = {0098-3500},
 pages = {60--78},
 numpages = {19},
 url = {http://doi.acm.org/10.1145/1055531.1055534},
 doi = {10.1145/1055531.1055534},
 acmid = {1055534},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Linear algebra, dense systems, linear least squares},
} 
@article{Bientinesi:2005:SDD:1055531.1055532,
 author = {Bientinesi, Paolo and Gunnels, John A. and Myers, Margaret E. and Quintana-Ort\'{\i}, Enrique S. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {The science of deriving dense linear algebra algorithms},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {March 2005},
 volume = {31},
 number = {1},
 month = mar,
 year = {2005},
 issn = {0098-3500},
 pages = {1--26},
 numpages = {26},
 url = {http://doi.acm.org/10.1145/1055531.1055532},
 doi = {10.1145/1055531.1055532},
 acmid = {1055532},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Formal derivation, high-performance computing, libraries, linear algebra},
}
@article{Quintana-Orti:2003:FDA:779359.779365,
 author = {Quintana-Ort\'{\i}, Enrique S. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {Formal derivation of algorithms: The triangular sylvester equation},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {June 2003},
 volume = {29},
 number = {2},
 month = jun,
 year = {2003},
 issn = {0098-3500},
 pages = {218--243},
 numpages = {26},
 url = {http://doi.acm.org/10.1145/779359.779365},
 doi = {10.1145/779359.779365},
 acmid = {779365},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Formal derivation, Sylvester equations, control theory, libraries, linear algebra},
} 
@article{Gunnels:2001:FFL:504210.504213,
 author = {Gunnels, John A. and Gustavson, Fred G. and Henry, Greg M. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {FLAME: Formal Linear Algebra Methods Environment},
 journal = {ACM Trans. Math. Softw.},
 issue_date = {December 2001},
 volume = {27},
 number = {4},
 month = dec,
 year = {2001},
 issn = {0098-3500},
 pages = {422--455},
 numpages = {34},
 url = {http://doi.acm.org/10.1145/504210.504213},
 doi = {10.1145/504210.504213},
 acmid = {504213},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {Formal derivation, libraries, linear algebra, performance},
} 
@inproceedings{DBLP:conf/asap/PedramGG11,
  author    = {Ardavan Pedram and
               Andreas Gerstlauer and
               Robert A. {v}an~{d}e~{G}eijn},
  title     = {A high-performance, low-power linear algebra core},
  booktitle = {22rd IEEE International Conference on
Application-specific Systems, Architectures and Processors (ASAP 2011)},
  year      = {2011},
  pages     = {35-42},
  ee        = {http://dx.doi.org/10.1109/ASAP.2011.6043234},
  crossref  = {DBLP:conf/asap/2011},
}
@inproceedings{conf/ieeehpcs/FoguaIQG10,
  added-at = {2010-08-23T00:00:00.000+0200},
  author = {Fogua, Manuel and Igual, Francisco D. and Quintana-Ortí, Enrique S. and {v}an~{d}e~{G}eijn, Robert A.},
  biburl = {http://www.bibsonomy.org/bibtex/250cc3c38168576fc9b4a537489e1df9f/dblp},
  booktitle = {2010 International Conference on High Performance Computing and Simulation (HPCS 2010)},
  crossref = {conf/ieeehpcs/2010},
  editor = {Smari, Waleed W. and McIntire, John P.},
  ee = {http://dx.doi.org/10.1109/HPCS.2010.5547094},
  interhash = {81a44e48d989952cd44db8c15b09a624},
  intrahash = {50cc3c38168576fc9b4a537489e1df9f},
  isbn = {978-1-4244-6828-7},
  keywords = {dblp},
  pages = {444-451},
  publisher = {IEEE},
  timestamp = {2010-08-23T00:00:00.000+0200},
  title = {Retargeting PLAPACK to clusters with hardware accelerators.},
  url = {http://dblp.uni-trier.de/db/conf/ieeehpcs/ieeehpcs2010.html#FoguaIQG10},
  year = 2010
}
@inproceedings{Chan:2010:MCL:1810479.1810520,
 author = {Chan, Ernie and {v}an~{d}e~{G}eijn, Robert and Chapman, Andrew},
 title = {Managing the complexity of lookahead for LU factorization with pivoting},
 booktitle = {Proceedings of the 22nd {ACM} Symposium on Parallelism in Algorithms and Architectures},
 series = {SPAA '10},
 year = {2010},
 isbn = {978-1-4503-0079-7},
 location = {Thira, Santorini, Greece},
 pages = {200--208},
 numpages = {9},
 url = {http://doi.acm.org/10.1145/1810479.1810520},
 doi = {10.1145/1810479.1810520},
 acmid = {1810520},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {LU factorization with partial pivoting, algorithm-by-blocks, directed acyclic graph, lookahead},
}
@inproceedings{DBLP:conf/ipps/ChanGZN10,
  author    = {Ernie Chan and
               Robert A. {v}an~{d}e~{G}eijn and
               Field G. {V}an~{Z}ee and
               Jim Nagle},
  title     = {Transforming linear algebra libraries: From abstraction
               to parallelism},
  booktitle = {HIPS'10: Proceedings of Fifteenth International Workshop on High-Level Parallel Programming Models and Supportive Environments (IPDPS Workshop)},
  year      = {2010},
}
@inproceedings{Marques:2009:OCQ:1616772.1616858,
 author = {Marqu{\'e}s, Mercedes and Quintana-Ort\'{\i}, Gregorio and Quintana-Ort\'{\i}, Enrique S. and {v}an~{d}e~{G}eijn, Robert},
 title = {Out-of-Core Computation of the QR Factorization on Multi-core Processors},
 booktitle = {Proceedings of the 15th International Euro-Par Conference on Parallel Processing},
 series = {Euro-Par '09},
 year = {2009},
 isbn = {978-3-642-03868-6},
 location = {Delft, The Netherlands},
 pages = {809--820},
 numpages = {12},
 url = {http://dx.doi.org/10.1007/978-3-642-03869-3_75},
 doi = {10.1007/978-3-642-03869-3_75},
 acmid = {1616858},
 publisher = {Springer-Verlag},
 address = {Berlin, Heidelberg},
 keywords = {Dense linear algebra, QR factorization, high performance, multi-core processors, out-of-core computation},
}
@inproceedings{Marques:2009:SLD:1586640.1587581,
 author = {Marques, Mercedes and Quintana-Orti, Gregorio and Quintana-Orti, Enrique S. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {Solving "large\&\#148; dense matrix problems on multi-core processors},
 booktitle = {Proceedings of the 2009 IEEE International Symposium on Parallel\&Distributed Processing},
 series = {IPDPS '09},
 year = {2009},
 isbn = {978-1-4244-3751-1},
 pages = {1--8},
 numpages = {8},
 url = {http://dx.doi.org/10.1109/IPDPS.2009.5161162},
 doi = {10.1109/IPDPS.2009.5161162},
 acmid = {1587581},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
}
@inproceedings{Marques:2009:UGP:1636704.1637056,
 author = {Marques, Mercedes and Quintana-Orti, Gregorio and Quintana-Orti, Enrique S. and {v}an~{d}e~{G}eijn, Robert},
 title = {Using Graphics Processors to Accelerate the Solution of Out-of-Core Linear Systems},
 booktitle = {Proceedings of the 2009 Eighth International Symposium on Parallel and Distributed Computing},
 series = {ISPDC '09},
 year = {2009},
 isbn = {978-0-7695-3680-4},
 pages = {169--176},
 numpages = {8},
 url = {http://dx.doi.org/10.1109/ISPDC.2009.7},
 doi = {10.1109/ISPDC.2009.7},
 acmid = {1637056},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
 keywords = {Linear Algebra, Linear Systems, Out-of-Core Algorithms, Graphics Processors},
} 
@inproceedings{Zafont:2009:FDD:1586640.1587393,
 author = {Zafont, M. Jesus and Martin, Alberto and Igual, Francisco and Quintana-Orti, Enrique S.},
 title = {Fast development of dense linear algebra codes on graphics processors},
 booktitle = {Proceedings of the 2009 IEEE International Symposium on Parallel\&Distributed Processing},
 series = {IPDPS '09},
 year = {2009},
 isbn = {978-1-4244-3751-1},
 pages = {1--8},
 numpages = {8},
 url = {http://dx.doi.org/10.1109/IPDPS.2009.5160940},
 doi = {10.1109/IPDPS.2009.5160940},
 acmid = {1587393},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
} 
@inproceedings{Quintana-Orti:2009:SDL:1504176.1504196,
 author = {Quintana-Ort\'{\i}, Gregorio and Igual, Francisco D. and Quintana-Ort\'{\i}, Enrique S. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {Solving dense linear systems on platforms with multiple hardware accelerators},
 booktitle = {Proceedings of the 14th ACM SIGPLAN symposium on Principles and practice of parallel programming},
 series = {PPoPP '09},
 year = {2009},
 isbn = {978-1-60558-397-6},
 location = {Raleigh, NC, USA},
 pages = {121--130},
 numpages = {10},
 url = {http://doi.acm.org/10.1145/1504176.1504196},
 doi = {10.1145/1504176.1504196},
 acmid = {1504196},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {algorithms-by-blocks, depencency analysis, dynamic scheduling, gpus, out-of-order execution},
} 
@inproceedings{Diamond:2008:HPD:1345206.1345218,
 author = {Diamond, Jeffrey R. and Robatmili, Behnam and Keckler, Stephen W. and {v}an~{d}e~{G}eijn, Robert and Goto, Kazushige and Burger, Doug},
 title = {High performance dense linear algebra on a spatially distributed processor},
 booktitle = {Proceedings of the 13th ACM SIGPLAN Symposium on Principles and practice of parallel programming},
 series = {PPoPP '08},
 year = {2008},
 isbn = {978-1-59593-795-7},
 location = {Salt Lake City, UT, USA},
 pages = {63--72},
 numpages = {10},
 url = {http://doi.acm.org/10.1145/1345206.1345218},
 doi = {10.1145/1345206.1345218},
 acmid = {1345218},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {dense linear algebra, gotoblas, grid processors, hybrid dataflow, instruction level parallelism, matrix multiply, on-chip networks, tile based architecture},
} 
@incollection{Quintana-Orti:2008:ASB:1505300.1505325,
 author = {Quintana-Ort\'{\i}, Gregorio and Quintana-Ort\'{\i}, Enrique S. and Rem\'{o}n, Alfredo and {v}an~{d}e~{G}eijn, Robert A.},
 chapter = {An Algorithm-by-Blocks for SuperMatrix Band Cholesky Factorization},
 title = {High Performance Computing for Computational Science - VECPAR 2008},
 editor = {Palma, Jos{\'e} M. and Amestoy, Patrick R. and Dayd{\'e}, Michel and Mattoso, Marta and Lopes, Jo\~{a}o Correia},
 year = {2008},
 isbn = {978-3-540-92858-4},
 pages = {228--239},
 numpages = {12},
 url = {http://dx.doi.org/10.1007/978-3-540-92859-1_21},
 doi = {10.1007/978-3-540-92859-1_21},
 acmid = {1505325},
 publisher = {Springer-Verlag},
 address = {Berlin, Heidelberg},
 keywords = {Cholesky factorization, band matrices, dynamic scheduling, high-performance, linear algebra libraries, out-of-order execution},
} 
@inproceedings{DBLP:conf/ipps/Quintana-OrtiQCGZ08,
  author    = {Gregorio Quintana-Ort\'{\i} and
               Enrique S. Quintana-Ort\'{\i} and
               Ernie Chan and
               Robert A. {v}an~{d}e~{G}eijn and
               Field G. {V}an~{Z}ee},
  title     = {Design of scalable dense linear algebra libraries for multithreaded
               architectures: the LU factorization},
  booktitle = {22nd IEEE International Symposium on Parallel and Distributed
               Processing, IPDPS 2008},
  year      = {2008},
  ee        = {http://dx.doi.org/10.1109/IPDPS.2008.4536353},
}
@inproceedings{Chan:2008:SMR:1345206.1345227,
 author = {Chan, Ernie and {V}an~{Z}ee, Field G. and Bientinesi, Paolo and Quintana-Orti, Enrique S. and Quintana-Orti, Gregorio and {v}an~{d}e~{G}eijn, Robert},
 title = {SuperMatrix: a multithreaded runtime scheduling system for algorithms-by-blocks},
 booktitle = {Proceedings of the 13th ACM SIGPLAN Symposium on Principles and practice of parallel programming},
 series = {PPoPP '08},
 year = {2008},
 isbn = {978-1-59593-795-7},
 location = {Salt Lake City, UT, USA},
 pages = {123--132},
 numpages = {10},
 url = {http://doi.acm.org/10.1145/1345206.1345227},
 doi = {10.1145/1345206.1345227},
 acmid = {1345227},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {algorithms-by-blocks, dependency analysis, dynamic scheduling, out-of-order execution},
}
@inproceedings{Quintana-Orti:2008:SQF:1343596.1343904,
 author = {Quintana-Orti, Gregorio and Quintana-Orti, Enrique S. and Chan, Ernie and {v}an~{d}e~{G}eijn, Robert A. and {V}an~{Z}ee, Field G.},
 title = {Scheduling of QR Factorization Algorithms on SMP and Multi-Core Architectures},
 booktitle = {Proceedings of the 16th Euromicro Conference on Parallel, Distributed and Network-Based Processing (PDP 2008)},
 series = {PDP '08},
 year = {2008},
 isbn = {978-0-7695-3089-5},
 pages = {301--310},
 numpages = {10},
 url = {http://dx.doi.org/10.1109/PDP.2008.37},
 doi = {10.1109/PDP.2008.37},
 acmid = {1343904},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
 keywords = {QR factorization, high-performance, dynamic scheduling, out-of-order execution, linear algebra libraries},
}
@inproceedings{Chan:2007:SYD:1545007.1545136,
 author = {Chan, Ernie and {V}an~{Z}ee, Field G. and Quintana-Orti, Enrique S. and Quintana-Orti, Gregorio and {v}an~{d}e~{G}eijn, Robert},
 title = {Satisfying your dependencies with SuperMatrix},
 booktitle = {Proceedings of the 2007 IEEE International Conference on Cluster Computing},
 series = {CLUSTER '07},
 year = {2007},
 isbn = {978-1-4244-1387-4},
 pages = {91--99},
 numpages = {9},
 url = {http://dx.doi.org/10.1109/CLUSTR.2007.4629221},
 doi = {10.1109/CLUSTR.2007.4629221},
 acmid = {1545136},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
} 

@inproceedings{Chan:2007:SOS:1248377.1248397,
 author = {Chan, Ernie and Quintana-Orti, Enrique S. and Quintana-Orti, Gregorio and {v}an~{d}e~{G}eijn, Robert},
 title = {Supermatrix out-of-order scheduling of matrix operations for SMP and multi-core architectures},
 booktitle = {Proceedings of the nineteenth annual ACM symposium on Parallel algorithms and architectures},
 series = {SPAA '07},
 year = {2007},
 isbn = {978-1-59593-667-7},
 location = {San Diego, California, USA},
 pages = {116--125},
 numpages = {10},
 url = {http://doi.acm.org/10.1145/1248377.1248397},
 doi = {10.1145/1248377.1248397},
 acmid = {1248397},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {data affinity, data-flow parallelism, dense linear algebra libraries, dynamic scheduling, out-of-order execution},
} 

@inproceedings{Chan:2006:CCA:1122971.1122975,
 author = {Chan, Ernie and {v}an~{d}e~{G}eijn, Robert and Gropp, William and Thakur, Rajeev},
 title = {Collective communication on architectures that support simultaneous communication over multiple links},
 booktitle = {Proceedings of the eleventh ACM SIGPLAN symposium on Principles and practice of parallel programming},
 series = {PPoPP '06},
 year = {2006},
 isbn = {1-59593-189-9},
 location = {New York, New York, USA},
 pages = {2--11},
 numpages = {10},
 url = {http://doi.acm.org/10.1145/1122971.1122975},
 doi = {10.1145/1122971.1122975},
 acmid = {1122975},
 publisher = {ACM},
 address = {New York, NY, USA},
} 
@inproceedings{Low:2005:ESP:1065944.1065965,
 author = {Low, Tze Meng and {v}an~{d}e~{G}eijn, Robert A. and {V}an~{Z}ee, Field G.},
 title = {Extracting SMP parallelism for dense linear algebra algorithms from high-level specifications},
 booktitle = {Proceedings of the tenth ACM SIGPLAN symposium on Principles and practice of parallel programming},
 series = {PPoPP '05},
 year = {2005},
 isbn = {1-59593-080-9},
 location = {Chicago, IL, USA},
 pages = {153--163},
 numpages = {11},
 url = {http://doi.acm.org/10.1145/1065944.1065965},
 doi = {10.1145/1065944.1065965},
 acmid = {1065965},
 publisher = {ACM},
 address = {New York, NY, USA},
 keywords = {SMP Parallelism, code generation, formal derivation, linear algebra},
} 




@inproceedings{Gunnels:2001:FHM:645455.653765,
 author = {Gunnels, John A. and Henry, Greg M. and {v}an~{d}e~{G}eijn, Robert A.},
 title = {A Family of High-Performance Matrix Multiplication Algorithms},
 booktitle = {Proceedings of the International Conference on Computational Sciences-Part I},
 series = {ICCS '01},
 year = {2001},
 isbn = {3-540-42232-3},
 pages = {51--60},
 numpages = {10},
 url = {http://dl.acm.org/citation.cfm?id=645455.653765},
 acmid = {653765},
 publisher = {Springer-Verlag},
 address = {London, UK, UK},
} 
@inproceedings{Gunnels:2001:FHM:647882.738103,
 author = {Gunnels, John A. and {v}an~{d}e~{G}eijn, Robert A. and Katz, Daniel S. and Quintana-Ort\'{\i}, Enrique S.},
 title = {Fault-Tolerant High-Performance Matrix Multiplication: Theory and Practice},
 booktitle = {Proceedings of the 2001 International Conference on Dependable Systems and Networks (formerly: FTCS)},
 series = {DSN '01},
 year = {2001},
 isbn = {0-7695-1101-5},
 pages = {47--56},
 numpages = {10},
 url = {http://dl.acm.org/citation.cfm?id=647882.738103},
 acmid = {738103},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
} 
@inproceedings{Gunnels:2000:FMH:647102.717559,
 author = {Gunnels, John A. and Geijn, Robert A. van de},
 title = {Formal Methods for High-Performance Linear Algebra Libraries},
 booktitle = {The Architecture of Scientific Software: Proceedings of the IFIP TC2/WG2.5 Working Conference on the  Architecture of Scientific Software},
 year = {2001},
 isbn = {0-7923-7339-1},
 pages = {193--210},
 numpages = {18},
 url = {http://dl.acm.org/citation.cfm?id=647102.717559},
 acmid = {717559},
 publisher = {Kluwer, B.V.},
 address = {Deventer, The Netherlands, The Netherlands},
} 

TECHREPORT{FLAWN71,
  author = {Tyler M. Smith and Robert {v}an~{d}e~{G}eijn and Mikhail Smelyanskiy and Jeff R. Hammond and Field G. {V}an~{Z}ee},
  title = {Opportunities for Parallelism in Matrix
Multiplication. {FLAME} {W}orking {N}ote \#71}, 
  institution = {Department of Computer Science, The University of Texas at Austin},  
  year = {2013},
  number = {TR-13-20},
  note = {submitted to IPDPS2014}
}
@TechReport{FLAWN70,
 title = {Adding Aggressive Early Deflation to the Restructured Symmetric QR Algorithm.           {FLAME} {W}orking {N}ote \#70},
 author = {James Levitt},
  institution = {The University of Texas at Austin, Department of
Computer Sciences},
  type = {Honors Thesis Report},
  number =      {HR-13-07},
  month = MAY,
  year = {2013}
}
@TechReport{FLAWN69,
 title = {Implementing Level-3 {BLAS} with {BLIS}: Early Experience},
 author = {Field G. Van Zee and
Tyler Smith and
Francisco D. Igual and
Mikhail Smelyanskiy and
Xianyi Zhang and
Michael Kistler and
Vernon Austel and
John Gunnels and
Tze Meng Low and
Bryan Marker and
Lee Killough and
Robert A. van de Geijn,
           {FLAME} {W}orking {N}ote \#69},
  institution = {The University of Texas at Austin, Department of
Computer Sciences},
  type = {Technical Report},
  number =      {TR-13-03},
  month = APRIL,
  year = {2013}
}
@TechReport{FLAWN68,
 title = {Exploiting Symmetry in Tensors for High Performance: an Initial Study},
 author = {Martin D. Schatz and Tze Meng Low and Robert A. {v}an~{d}e~{G}eijn and Tamara G. Kolda,
           {FLAME} {W}orking {N}ote \#68},
  institution = {The University of Texas at Austin, Department of
Computer Sciences},
  type = {Technical Report},
  number =      {TR-12-33},
  month = DEC,
  year = {2012}
}
@TechReport{FLAWN67,
 title = {Code Generation of Optimized Distributed-Memory Dense Linear Algebra Kernels},
 author = {Bryan Marker and Don Batory and Robert {v}an~{d}e~{G}eijn},
          {FLAME} {W}orking {N}ote \#67},
  institution = {The University of Texas at Austin, Department of
Computer Sciences},
  type = {Technical Report},
  number =      {TR-12-31},
  month = NOV,
  year = {2012}
}
@TechReport{FLAWN66,
 title = {{BLIS}: A Framework for Generating BLAS-like Libraries},
 author = {Field G. {V}an~{Z}ee and Robert A. {v}an~{d}e~{G}eijn.
          {FLAME} {W}orking {N}ote \#66},
  institution = {The University of Texas at Austin, Department of
Computer Sciences},
  type = {Technical Report},
  number =      {TR-12-30},
  month = NOV,
  year = {2012}
}
@TECHREPORT{FLAWN65,
 author = {Kyungjoo Kim and Victor Eijkhout},
 title = {A Parallel Sparse Direct Solver via Hierarchical DAG Scheduling},
 institution = {Texas Advanced Computing Center, The University of Texas at Austin},
 year = {2012},
 number = {TR-12-05}
}
@TechReport{FLAWN64,
 title = {Theory and Practice of Fusing Loops when Optimizing Parallel
Dense Linear Algebra Operations},
 author = {Tze~{M}eng Low and Bryan Marker and Robert
{v}an~de~{G}eijn.
          {FLAME} {W}orking {N}ote \#64},
  institution = {The University of Texas at Austin, Department of
Computer Sciences},
  type = {Technical Report},
  number =      {TR-12-18},
  month = AUG,
  year = {2012}
}
@TechReport{FLAWN63,
 number = {TR-12-04},
 title = {Dense Matrix Computation on a Heterogenous Architecture: A
          Block Synchronous Approach},
          {FLAME} {W}orking {N}ote \#63},
 author = {Kyungjoo Kim and Victor Eijkhout and Robert A. van de Geijn},
 institution = {Texas Advanced Computing Center, The University of Texas at Austin},
 year = {2012},
 owner = {eijkhout},
 timestamp = {2012.08.05}
}
@TechReport{FLAWN62,
 title = {Parallel Matrix Multiplication: 2D and 3D},
 author = {Martin Schatz and Jack Poulson and Robert {v}an~{d}e~{G}eijn},
          {FLAME} {W}orking {N}ote \#62},
  institution = {The University of Texas at Austin, Department of
Computer Sciences},
  type = {Technical Report},
  number =      {TR-12-13},
  month = JUNE,
  year = {2012}
}
@TechReport{FLAWN61,
  author =      {Francisco D. Igual and Murtaza Ali and Arnon Friedmann and Eric Stotzer and Timothy Wentz and Robert van de Geijn},
  title =       {Unleashing DSPs for General-Purpose HPC},
                {FLAME} {W}orking {N}ote \#61},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-12-02},
  month =       February,
  year =        2012
}
@TechReport{FLAWN60,
  author =      {Field G. {V}an~{Z}ee and Robert van de Geijn and Gregorio Quintana-Orti},
  title =       {Restructuring the {QR} Algorithm for High-Performance Application of {G}ivens Rotations},
                {FLAME} {W}orking {N}ote \#60},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-11-36},
  month =       {October},
  year =        2011
}
@TechReport{FLAWN59,
  author =      {Ardavan Pedram and Andreas Gerstlauer and Robert A. van de Geijn},
  title =       {Co-Design Tradeoffs for High-Performance, Low-Power Linear Algebra Architectures},
                {FLAME} {W}orking {N}ote \#59},
  institution = {The University of Texas at Austin, Computer Engineering Research Center},
  type =        {Technical Report},
  number =      {UT-CERC-12-02},
  month =       October,
  year =        2011
}
@TechReport{FLAWN58,
  author =      {Bryan Marker and Andy Terrel and Jack Poulson and Don Batory and Robert van de Geijn},
  title =       {Mechanizing the Expert Dense Linear Algebra Developer},
                {FLAME} {W}orking {N}ote \#58},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-11-09},
  month =       March,
  year =        2011
}
@TechReport{FLAWN57,
  author =      {Robert van de Geijn and Tyler Rhodes and Maggie Myers and Field Van Zee},
  title =       {Deriving Linear Algebra Libraries},
                {FLAME} {W}orking {N}ote \#57},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-11-09},
  month =       March,
  year =        2011
}
@TechReport{FLAWN56,
  author =       {Jack Poulson and Robert van de Geijn and Jeffrey Bennighof},
  title =        {Parallel Algorithms for Reducing the Generalized Hermitian-Definite Eigenvalue Problem.  {FLAME} {W}orking {N}ote \#56},
  institution =  {The University of Texas at Austin, Department of Computer Sciences},
  type =         {Technical Report},
  number =       {TR-11-05},
  month =        FEB
  year =         {2011}
}
@TechReport{FLAWN55,
  author =      {Bryan Marker and Ernie Chan and Jack Poulson and Robert van de Geijn and Rob F. {V}an~{d}er~{W}ijngaart and Timothy G. Mattson and Theodore E. Kubaska},
  title =       {Programming Many-Core Architectures - A Case Study: Dense Matrix Computations on the {I}ntel {SCC} Processor},
                {FLAME} {W}orking {N}ote \#55},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-11-03},
  month =       January,
  year =        2011
}
@TechReport{FLAWN54,
  author =      {Taylor L. Riche and Don Batory and Rui Goncalves and Bryan Marker},
  title =       {Architecture Design by Transformation},
                {FLAME} {W}orking {N}ote \#54},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-39},
  month =       December,
  year =        2010
}
@TechReport{FLAWN53,
  author =      {Field G. Van Zee and Robert van de Geijn and Gregorio Quintana-Orti and and G. Joseph Elizondo},
  title =       {Algorithms for Reducing a Matrix to Condensed Form},
                {FLAME} {W}orking {N}ote \#53},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-37},
  month =       October,
  year =        2010
}
@TechReport{FLAWN52,
  author =      {Matthias Petschow and Paolo Bientinesi}, 
  title =       {{MR3-SMP}: A Symmetric Tridiagonal Eigensolver for Multi-Core Architectures},
                {FLAME} {W}orking {N}ote \#52},
  institution = {Aachen Institute for Computational Engineering Science, RWTH Aachen},
  type =        {Technical Report},
  number =      {AICES-2010/10-1},
  month =       October,
  year =        2010
}
@TechReport{FLAWN51,
  author =      {Diego Fabregat and Paolo Bientinesi},
  title =       {Automatic Generation of Partitioned Matrix Expressions for Matrix Operations},
                {FLAME} {W}orking {N}ote \#41},
  institution = {Aachen Institute for Computational Engineering Science, RWTH Aachen},
  type =        {Technical Report},
  number =      {AICES-2010/10-1},
  month =       October,
  year =        2010
}
@TechReport{FLAWN50,
  author =      {Ernie Chan and Francisco D. Igual},
  title =       {Runtime Data Flow Graph Scheduling of Matrix Computations with Multiple Hardware Accelerators},
                {FLAME} {W}orking {N}ote \#50},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-36},
  month =       October,
  year =        2010
}
@TechReport{FLAWN49,
  author =      {Ardavan Pedram and Andreas Gerstlauer and Robert van de Geijn},
  title =       {Towards a High Performance, Low Power Linear Algebra Processor},
                {FLAME} {W}orking {N}ote \#49},
  institution = {The University of Texas at Austin, Computer Engineering Research Center},
  type =        {Technical Report},
  number =      {UT-CERC-10-03},
  month =       September,
  year =        2010
}
@TechReport{FLAWN48,
  author =      {Francisco D. Igual and Gregorio Quintana-Orti},
  title =       {Solving Linear Algebra Problems on Distributed-Memory Computers using Serial Codes},
                {FLAME} {W}orking {N}ote \#48},
  institution = {Universidad Jaume I, Depto. de Ingenieria y Ciencia de Computadores},
  type =        {Technical Report},
  number =      {DICC 2010-07-01},
  month =       July,
  year =        2010
}
@TechReport{FLAWN47,
  author =      {Victor Eijkhout and Paolo Bientinesi and Robert van de Geijn},
  title =       {Proof-driven Derivation of Krylov Solver Libraries},
                {FLAME} {W}orking {N}ote \#47},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-02},
  year =        2010
}
@TechReport{FLAWN46,
  author =      {Victor Eijkhout and Paolo Bientinesi and Robert van de Geijn},
  title =       {Toward Mechanical Derivation of Krylov Solver Libraries},
                {FLAME} {W}orking {N}ote \#46},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-01},
  year =        2010
}
@TechReport{FLAWN45,
  author =      {Paolo Bientinesi and Paolo Bientinesi and Margaret Myers and Robert van de Geijn},
  title =       {Formal correctness proof of mechanically derived {CG} methods},
                {FLAME} {W}orking {N}ote \#45},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-09-06},
  year =        2009
}
@TechReport{FLAWN44,
  author =     {Jack Poulson and Bryan Marker and Robert van de Geijn},
  title =      {Elemental: A New Framework for Distributed Memory
Dense Matrix Computations. 
               {FLAME} {W}orking {N}ote \#44},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-20},
  month =  June,
  year =  2010
}
@TechReport{FLAWN43,
  author =      {Gregorio Quintana-Orti and Francisco D. Igual and Mercedes Marques and Enrique Quintana-Orti and Robert van de Geijn},
  title =       {A Run-Time System for Programming Out-of-Core Matrix Algorithms-by-Tiles on Multithreaded Architectures},
                {FLAME} {W}orking {N}ote \#43},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-10},
  month =       March,
  year =        2010
}
@TechReport{FLAWN42,
  author =      {Manuel Fogue and Francisco D. Igual and Enrique Quintana-Orti and Robert van de Geijn},
  title =       {Retargeting PLAPACK to Clusters with Hardware Accelerators},
                {FLAME} {W}orking {N}ote \#42},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-06},
  month =       February,
  year =        2010
}
@TechReport{FLAWN41,
  author =      {Robert A. van de Geijn and Field G. Van Zee},
  title =       {High-Performance Up-and-Downdating via {H}ouseholder-like Transformations},
                {FLAME} {W}orking {N}ote \#41},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-04},
  month =       January,
  year =        2010
}
@TechReport{FLAWN40,
  author =      {Victor Eijkhout and Paolo Bientinesi and Robert van de Geijn},
  title =       {Toward Mechanical Derivation of Krylov Solver Libraries},
                {FLAME} {W}orking {N}ote \#40},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-10-01},
  year =        2010
}
@TechReport{FLAWN39,
  author =     {Ernie Chan},
  title =      {Runtime Data Flow Scheduling of Matrix Computations.
               {FLAME} {W}orking {N}ote \#39},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-09-22},
  month =  AUG,
  year =  2009
}
@TechReport{FLAWN38,
 author = {Ernie Chan and Jim Nagle and Robert {v}an~{d}e~Geijn and Field G. {V}an~{Z}ee},
 title =  {Transforming Linear Algebra Libraries: From Abstraction to Parallelism." FLAME Working Note \#38},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-09-17},
  month =  MAY, 
  year = 2009
}

@TechReport{FLAWN37,
 author = {Francisco D. Igual and Gregorio Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn}, 
 title = {Level-3 {BLAS} on a {GPU}: Picking the Low Hanging Fruit. {FLAME} {W}orking {N}ote \#37},
  institution = {Universidad Jaume I, Depto. de Ingenieria y Ciencia de Computadores.},
  type = {Technical Report},
  number = {DICC 2009-04-01},
  month = APR, 
  year = 2009,
  note = {Updated May 21, 2009.}
}
@TechReport{FLAWN36,
  author =     {Mercedes Marqu\'es and Gregorio Quintana-Ort\'{\i} and Enrique S. Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =      {Solving Large Dense Matrix Problems on Multi-Core Processors and {GPU}s. FLAME Working Note \#36},
  institution = {Universidad Jaume I, Depto. de Ingenieria y Ciencia de Computadores},
  type =  {Technical Report},
  number = {ICC 01-01-2009},
  month = JAN,
  year = 2009
}
@TechReport{FLAWN35,
  author =     {Richard Veras and Jonathan Monette and Enrique Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =      {{FLAMES2S}: From Abstraction to High Performance.
               {FLAME} {W}orking {N}ote \#35},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2008-49},
  month =  DEC,
  year =  2008
}

@TechReport{FLAWN34,
  author =     {Robert {v}an~{d}e~Geijn},
  title =      {Beautiful Parallel Code: Evolution vs.
              Intelligent Design. Presented at {S}upercomputing 2008 Workshop on Node Level Parallelism for Large Scale Supercomputers, Austin, Texas, November 2008. {FLAME} {W}orking {N}ote \#34},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2008-46},
  month =  NOV,
  year =  2008
}

@TechReport{FLAWN33,
  author =     {Paolo Bientinesi and Robert A. {v}an~{d}e~Geijn},
  title =      {The Science of Deriving Stability Analyses.
               {FLAME} {W}orking {N}ote \#33},
  institution = {Aachen Institute for Computational Engineering Sciences, RWTH Aachen},
  type =        {Technical Report},
  number =     {AICES-2008-2},
  month =  NOV,
  year = 2008
}

@TechReport{FLAWN32,
  author =     {Gregorio Quintana-Ort\'{\i} and Francisco D. Igual and Enrique S. Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =      {Solving Dense Linear Algebra Problems on Platforms with Multiple Hardware Accelerators.
               {FLAME} {W}orking {N}ote \#32},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2008-22},
  month =  MAY,
  year = 2008
}

@TechReport{FLAWN31,
author = {Maribel Castillo and Ernie Chan and Francisco D. Igual and Rafael Mayo and Enrique S. Quintana-Ort\'{\i} and Gregorio Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn and Field G. {V}an~{Z}ee},
 title = {Making Parallel Programming Synonymous with Programming for Linear Algebra Libraries.  {FLAME} {W}orking {N}ote \#31},
 institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2008-20},
  month =  APR,
  year = 2008
}
@TechReport{FLAWN30,
  author =     {Sergio Barrachina and Maribel Castillo and Francisco D. Igual and Rafael Mayo and Enrique S. Quintana-Ort\'{\i}},
  title =      {{FLAG@lab}: An M-script API for Linear Algebra Operations on Graphics Processors.
               {FLAME} {W}orking {N}ote \#30},
  institution = {Universidad Jaume I, Depto. de Ingenieria y Ciencia de Computadores},
  type =  {Technical Report},
  number =  {ICC 01-02-2008},
  month = FEB,
  year = 2008
}
@TechReport{FLAWN29,
  author =     {Gregorio Quintana-Ort\'{\i} and Enrique S. Quintana-Ort\'{\i} and Ernie Chan and Field G. {V}an~{Z}ee and Robert {v}an~{d}e~Geijn},
  title =      {Programming Algorithms-by-Blocks for Matrix Computations on Multithreaded Architectures.
               {FLAME} {W}orking {N}ote \#29},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2008-04},
  month =  JAN,
  year = 2008
}

@TechReport{FLAWN28,
  author =     {Bryan Marker},
  title =      {On Composing Matrix Multiplication from Kernels.
               {FLAME} {W}orking {N}ote \#28},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Report \#HR-2007-32},
  number =      {(honors thesis)},
  month =  SEP,
  year =  2007
}
@TechReport{FLAWN27,
  author =     {Gregorio Quintana-Ort\'{\i} and Enrique S. Quintana-Ort\'{\i} and Alfredo Remon and Robert {v}an~{d}e~Geijn},
  title =      {{S}uper{M}atrix for the Factorization of Band Matrices.
               {FLAME} {W}orking {N}ote \#27},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-07-51},
  month =  SEP,
  year = 2007
}

@TechReport{FLAWN26,
  author =     {Gregorio Quintana-Ort\'{\i} and Enrique S. Quintana-Ort\'{\i} and Ernie Chan and Robert {v}an~{d}e~Geijn and Field G. {V}an~{Z}ee},
  title =      {Design and Scheduling of an Algorithm-by-Blocks for LU Factorization on Multithreaded Architectures.
               {FLAME} {W}orking {N}ote \#26},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2007-50},
  month = SEP,
  year = 2007
}
@TechReport{FLAWN25,
  author =     {Ernie Chan and Field G. {V}an~{Z}ee and Paolo Bientinesi and Enrique S. Quintana-Ort\'{\i} and Gregorio Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =      {{S}uper{M}atrix: A Multithreaded Runtime Scheduling System for Algorithms-by-Blocks.
               {FLAME} {W}orking {N}ote \#2},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2007-41},
  month =  AUG,
  year = 2007
}

@TechReport{FLAWN24,
  author =     {Gregorio Quintana-Ort\'{\i} and Enrique S. Quintana-Ort\'{\i} and Ernie Chan and Field G. {V}an~{Z}ee and Robert {v}an~{d}e~Geijn},
  title =      {Scheduling of {QR} factorization algorithms on {SMP} and multi-core architectures.
               {FLAME} {W}orking {N}ote \#24},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2007-37},
  month =  JUL,
  year = 2007
}
@TechReport{FLAWN23,
  author =     {Ernie Chan and Enrique S. Quintana-Ort\'{\i} and Gregorio Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =      {{S}uper{M}atrix Out-of-Order Scheduling of Matrix Operations for {SMP} and Multi-Core Architectures.
               {FLAME} {W}orking {N}ote \#23},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2006-67},
  month =  DEC,
  year = 2006
}
@TechReport{FLAWN22,
  author =     {Ernie Chan and Marcel Heimlich and Avijit Purkayastha and Robert {v}an~{d}e~Geijn},
  title =      {Collective Communication: Theory, Practice, and Experience.
               {FLAME} {W}orking {N}ote \#2},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2006-44},
  month =  SEP,
  year = 2006
}
@TechReport{FLAWN21,
  author =     {Enrique S. Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =      {Updating an LU Factorization with Pivoting.
               {FLAME} {W}orking {N}ote \#21},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2006-42},
  month =   SEP,
  year = 2006
}


@TechReport{FLAWN20,
  author =     {Kazushige Goto and Robert {v}an~{d}e~Geijn},
  title =      {High-Performance Implementation of the Level-3 BLAS.
               {FLAME} {W}orking {N}ote \#20},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2006-23},
  month =  MAY,
  year = 2006
}
@TechReport{FLAWN19,
  author = {Paolo Bientinesi and Brian Gunter and Robert {v}an~{d}e~Geijn}, 
  title = {Families of Algorithms Related to the Inversion of a Symmetric Positive Definite Matrix. {FLAME} {W}orking {N}ote \#19},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2006-20},
  month = APR,
  year = 2006
}

@TechReport{FLAWN18,
  author =     {H. Carter Edwards and Robert A. {v}an~{d}e~Geijn},
  title =      {Application Interface to Parallel Dense Matrix Libraries: Just let me solve my problem!" {FLAME} {W}orking {N}ote \#18},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =  {Technical Report},
  number = {TR-2006-15},
  month = FEB,
  year = 2006
}
@TechReport{FLAWN17,
  author =     {Paolo Bientinesi and Robert {v}an~{d}e~Geijn},
  title =      {Representing Dense Linear Algebra Algorithms: A Farewell to Indices.
               {FLAME} {W}orking {N}ote \#17},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2006-10},
  month =FEB,
  year = 2006
}

@TechReport{FLAWN16,
  author =     {Paolo Bientinesi and Kazushige Goto and Tze~Meng Low and Enrique S. Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn and Field {V}an~{Z}ee},
  title =      {FLAME 2005 Prospectus: Towards the Final Generation of Dense Linear Algebra Libraries.
               {FLAME} {W}orking {N}ote \#16},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2005-15},
  month = APR,
  year = 2005
}

@TechReport{FLAWN15,
  author =     {Tze~Meng Low and Kent Milfeld and Robert {v}an~{d}e~Geijn and Field {V}an~{Z}ee},
  title =      {Parallelizing {FLAME} Code with {O}pen{MP} Task Queues.
               {FLAME} {W}orking {N}ote \#15},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2004-50},
  month = MAY,
  year = 2004
}

@TechReport{FLAWN14,
  author =     {Gregorio Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =      {Improving the Performance of Reduction to {H}essenberg Form.
               {FLAME} {W}orking {N}ote \#14},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2004-44},
  month =  OCT,
  year = 2004
}
@TechReport{FLAWN13,
  author =     {Thierry Joffrain and Tze~Meng Low and Enrique S. Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn and Field {V}an~{Z}ee},
  title =      {On Accumulating {H}ouseholder Transformations.
               {FLAME} {W}orking {N}ote \#13},
  institution = {The University of Texas at Austin and Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2004-43},
  month =  OCT,
  year = 2004
}
@TechReport{FLAWN12,
  author =     {Tze Meng Low and Robert {v}an~{d}e~Geijn},
  title =      {An {API} for Manipulating Matrices Stored by Blocks.
               {FLAME} {W}orking {N}ote \#12},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2004-15},
  month =  MAY,
  year = 2004
}
@TechReport{FLAWN11,
  author =     {Paolo Bientinesi and Enrique S. Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =      {{FLAME\@lab}: A Farewell to Indices.
               {FLAME} {W}orking {N}ote \#11},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2003-11},
  month =  APR,
  year = 2003
}
@TechReport{FLAWN10,
  author =     {Robert A. {v}an~{d}e~Geijn},
  title =      {Representing Linear Algebra Algorithms in Code: The {FLAME} {API}.
               {FLAME} {W}orking {N}ote \#10},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2003-01},
  month =  JAN,
  year =  2003
}
@TechReport{FLAWN9,
  author =     {Kazushige Goto and Robert {v}an~{d}e~Geijn},
  title =  {On Reducing TLB Misses in Matrix Multiplication. 
           {FLAME} {W}orking {N}ote \#9},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =  {Technical Report},
  number = {TR-2002-55},
  month =  NOV,
  year =  2002
}
@TechReport{FLAWN8,
  author =     {Paolo Bientinesi and John A. Gunnels and Margaret E. Myers and Enrique S. Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =      {The Science of Deriving Dense Linear Algebra Algorithms.
               {FLAME} {W}orking {N}ote \#8},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2002-53},
  month =  SEP,
  year =  2002
}
@TechReport{FLAWN7,
  author =     {Greg M. Henry},
  title =      {Flexible High-Performance Matrix Multiply via a Self-Modifying Runtime Code.
               {FLAME} {W}orking {N}ote \#7},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2001-46},
  month =  DEC,
  year =  2001
}
@TechReport{FLAWN6,
  author =     {John A. Gunnels},
  title =      {A Systematic Approach to the Design and Analysis of Linear Algebra Algorithms.
               {P}h.{D}. Dissertation. {FLAME} {W}orking {N}ote \#6},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2001-44},
  month =  NOV,
  year =  2001
}
@TechReport{FLAWN5,
  author =     {Enrique S. Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title  =     {Formal Derivation of Algorithms: The Triangular {S}ylvester Equation.
   {FLAME} {W}orking {N}ote \#5},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =    {Technical Report},
  number =  {TR-2001-35},
  month =  SEP,
  year =   2001
}
@TechReport{FLAWN4,
  author =      {John Gunnels and Greg Henry and Robert {v}an~{d}e~Geijn},
  title  =      {High-Performance Matrix Multiplication Algorithms for Architectures with Hierarchical Memories.
                {FLAME} {W}orking {N}ote \#4},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2001-22},
  month =       JUN,
  year =        2001
}
@TechReport{FLAWN3,
  author =      {John Gunnels and Robert {v}an~{d}e~Geijn},
  title =       {Developing Linear Algebra Algorithms: A Collection of Class Projects.
                {FLAME} {W}orking {N}ote \#3},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2001-19},
  month =       MAY,
  year =        2001
}

@TechReport{FLAWN2,
  author =      {John A. Gunnels and Daniel S. Katz and Enrique S. Quintana-Ort\'{\i} and Robert {v}an~{d}e~Geijn},
  title =       {Fault-Tolerant High-Performance Matrix-Matrix Multiplication.
                {FLAME} {W}orking {N}ote \#2},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2000-34},
  month =       DEC,
  year =        2000
}

@TechReport{FLAWN1,
  author =      {John Gunnels and Greg Henry and Robert~{v}an~{d}e~Geijn},
  title =       {{F}ormal {L}inear {A}lgebra {M}ethods {E}nvironment ({FLAME}): Overview. 
                {FLAME} {W}orking {N}ote \#1},
  institution = {The University of Texas at Austin, Department of Computer Sciences},
  type =        {Technical Report},
  number =      {TR-2000-28},
  month =       NOV,
  year =        {2000}
}

rvdg@cs.utexas.edu
Last modified: Thu Oct 24 19:32:33 CDT 2013