% latest update:  mbilenko, 11/1/2005

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Papers from the Statistics community
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

@article{newcombe:science59,
  author =	 {H.B. Newcombe and J.M. Kennedy and S.J. Axford and
                  A.P. James},
  title =	 {Automatic Linkage of Vital Records},
  journal =	 {Science},
  volume =	 130,
  year =	 1959,
  pages =	 {954--959},
}

@article{tepping:jasa68,
  author =	 {B. J. Tepping},
  title =	 {A model for optimum linkage of records},
  journal =	 {Journal of the American Statistical Association},
  volume =	 {63},
  year =	 1968,
  pages =	 {1321--1332}
}

@article{fellegi:jasa69,
  author =	 {I. P. Fellegi and A. B. Sunter},
  title =	 {A Theory for Record Linkage},
  journal =	 {Journal of the American Statistical Association},
  volume =	 {64},
  year =	 1969,
  pages =	 {1183--1210}
}

@inproceedings{kelley:rl85,
  author =	 {R. P. Kelley},
  title =	 {Advances in record linkage methodology: a method for
                  determining the best blocking strategy},
  booktitle =	 {Record Linkage Techniques - 1985: Proceedings of the
                  Workshop on Exact Matching Methodologies},
  address =	 {Arlington, VA},
  pages =	 {199--203},
  year =	 1985,
  url =		 "http://www.fcsm.gov/working-papers/1367_3.pdf"
}

@article{winkler:asa88,
  author =	 {William E. Winkler},
  title =	 {Using the {EM} Algorithm for Weight Computation in
                  the Fellegi-Sunter Model of Record Linkage},
  journal =	 {American Statistical Association, Proceedings of the
                  Section on Survey Research Methods},
  pages =	 {667--671},
  year =	 1988
}

@book{newcombe:book88,
  author =	 {H. B. Newcombe},
  title =	 {Handbook of record linkage: methods for health and
                  statistical studies, administration, and business},
  publisher =	 {Oxford University Press},
  year =	 1988
}

@article{jaro:jasa89,
  author =	 {M. A. Jaro},
  title =	 {Advances in record-linkage methodology as applied to
                  matching the 1985 Census of Tampa, Florida},
  journal =	 {Journal of the American Statistical Association},
  volume =	 84,
  number =	 406,
  pages =	 {414-420},
  year =	 1989
}

@article{copas:jrssa90,
  author =	 {J. Copas and F. Hilton},
  title =	 {Record linkage: statistical models for matching
                  computer records},
  journal =	 {Journal of the Royal Statistical Society: Series A},
  volume =	 153,
  number =	 3,
  pages =	 {287--320},
  year =	 1990
}

@inproceedings{winkler:asa90,
  author =	 {William E. Winkler},
  title =	 {String Comparator Metrics and Enhanced Decision
                  Rules in the {F}ellegi-{S}unter Model of Record
                  Linkage},
  booktitle =	 {Proceedings of the Section on Survey Research
                  Methods, American Statistical Association},
  pages =	 {354--359},
  year =	 1990
}

@techreport{winkler:tr93,
  title =	 {Improved Decision Rules in the {F}ellegi-{S}unter
                  Model of Record Linkage},
  author =	 {William E. Winkler},
  institution =	 {Statistical Research Division, U.S. Census Bureau,
                  Washington, DC},
  year =	 1993
}

@techreport{winkler:tr94,
  title =	 {Advanced Methods for Record Linkage},
  author =	 {William E. Winkler},
  institution =	 {Statistical Research Division, U.S. Census Bureau,
                  Washington, DC},
  year =	 1994,
}

@article{belin:jasa95,
  title =	 {A Method for Calibrating False-Match Rates in Record
                  Linkage},
  author =	 {Thomas R. Belin and Donald B. Rubin},
  journal =	 {Journal of the American Statistical Association},
  volume =	 {90},
  number =	 {430},
  pages =	 {694--707},
  year =	 1995,
}

@article{jaro:statmed95,
  author =	 {M. A. Jaro},
  title =	 {Probabilistic linkage of large public health data
                  files},
  journal =	 {Statistics in Medicine},
  volume =	 14,
  number =	 {5--7},
  pages =	 {491--498},
  year =	 1995
}

@techreport{winkler:tr99,
  title =	 {The State of Record Linkage and Current Research
                  Problems},
  author =	 {William E. Winkler},
  institution =	 {Statistical Research Division, U.S. Census Bureau,
                  Washington, DC},
  year =	 1999,
}

@techreport{winkler:tr02,
  title =	 {Methods for Record Linkage and {B}ayesian Networks},
  author =	 {William E. Winkler},
  institution =	 {Statistical Research Division, U.S. Census Bureau,
                  Washington, DC},
  year =	 2002
}

@techreport{yancey:tr02,
  title =	 {Improving {EM} Algorithm Estimates for Record
                  Linkage Parameters},
  author =	 {William E. Yancey},
  institution =	 {Statistical Research Division, U.S. Census Bureau,
                  Washington, DC},
  year =	 2002
}

@techreport{yancey:tr04,
  title =	 {An Adaptive String Comparator for Record Linkage},
  author =	 {William E. Yancey},
  institution =	 {Statistical Research Division, U.S. Census Bureau,
                  Washington, DC},
  year =	 2004
}


@inproceedings{noren:kdd05,
  title =	 {A Hit-Miss Model for Duplicate Detection in the
                  {WHO} {D}rug {S}afety {D}atabase},
  author =	 {G. Niklas Nor{\'{e}}n and Roland Orre and Andrew
                  Bate},
  booktitle =	 {Proceedings of the 11th International Conference
                  on Knowledge Discovery and Data Mining (KDD-05)},
  year =	 2005,
  address =	 {Chicago, IL},
  pages =	 {459--468}
}

@techreport{winkler:tr05,
  title =	 {Approximate String Comparator Search Strategies for
                  Very Large Administrative Lists},
  author =	 {William E. Winkler},
  institution =	 {Statistical Research Division, U.S. Census Bureau},
  address =	 {Washington, DC},
  year =	 2005
}

@techreport{winkler:tr06,
  title =	 {Overview of Record Linkage and Current Research
                  Directions},
  author =	 {William E. Winkler},
  institution =	 {Statistical Research Division, U.S. Census Bureau},
  address =	 {Washington, DC},
  year =	 2006
}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Papers from Computer Science venues
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@InProceedings{hernandez:sigmod95,
  title =	 {The Merge/Purge Problem for Large Databases},
  author =	 {Mauricio A. Hern{\'a}ndez and Salvatore J. Stolfo},
  booktitle =	 {Proceedings of the 1995 ACM SIGMOD International
                  Conference on Management of Data (SIGMOD-95)},
  year =	 {1995},
  month =	 may,
  address =	 {San Jose, CA},
  pages =	 {127--138},
}

@inproceedings{monge:kdd96,
  title =	 {The field matching problem: Algorithms and
                  applications},
  author =	 {Alvaro E. Monge and Charles P. Elkan},
  booktitle =	 {Proceedings of the Second International Conference
                  on Knowledge Discovery and Data Mining (KDD-96)},
  year =	 1996,
  month =	 {August},
  address =	 {Portland, OR},
  pages =	 {267--270},
}

@mastersthesis{hylton:mthesis96,
  author =	 {Jeremy A. Hylton},
  title =	 {Identifying and merging related bibliographic
                  records},
  school =	 {Department of Electrical Engineering and Computer
                  Science, MIT},
  year =	 1996,
}

@inproceedings{monge:dmkd97,
  title =	 {An efficient domain-independent algorithm for
                  detecting approximately duplicate database records},
  author =	 {Alvaro E. Monge and Charles P. Elkan},
  booktitle =	 {Proceedings of the 1997 ACM SIGMOD Workshop on
                  Research Issues on Data Mining and Knowledge
                  Discovery},
  year =	 {1997},
  month =	 {May},
  address =	 {Tuscon, AZ},
  pages =	 {23--29},
}

@inproceedings{cohen:sigmod98,
  author =	 {William W. Cohen},
  title =	 {Integration of heterogeneous databases without
                  common domains using queries based on textual
                  similarity},
  booktitle =	 {Proceedings of the 1998 ACM SIGMOD International
                  Conference on Management of Data (SIGMOD-98)},
  pages =	 {201--212},
  year =	 {1998}
}

@inproceedings{giles:dl98,
  title =	 {{CiteSeer}: An Automatic Citation Indexing System},
  author =	 {C. Lee Giles and Kurt Bollacker and Steve Lawrence},
  booktitle =	 {Proceedings of the 3rd ACM Conference on Digital
                  Libraries},
  year =	 1998,
  address =	 {Pittsburgh, PA},
  pages =	 {89--98},
}

@inproceedings{lee:dexa99,
  title =	 {Cleansing Data for Mining and Warehousing},
  author =	 {Mong Li Lee and Hongjun Lu and Tok Wang Ling and Yee
                  Teng Ko},
  booktitle =	 {Proceedings of the 10th International Conference on
                  Database and Expert Systems Applications (DEXA-99)},
  year =	 1999,
  month =	 {August},
  address =	 {Florence, Italy},
}

@inproceedings{lawrence:agents99,
  author =	 {Steve Lawrence and Kurt Bollacker and C. Lee Giles},
  title =	 {Autonomous Citation Matching},
  booktitle =	 {Proceedings of the 3rd International Conference on
                  Autonomous Agents},
  publisher =	 {ACM Press},
  pages =	 {392--393},
  address =	 {New York, NY},
  year =	 {May 1999}
}

@inproceedings{zhu:kdd00-wkshp,
  author =	 {J. J. Zhu and L. H. Ungar},
  title =	 {String Edit Analysis for Merging Databases},
  booktitle =	 {Proceedings of the KDD-2000 Workshop on Text Mining},
  year =	 2000
}

@unpublished{monge:submitted00,
  author =	 {Alvaro E. Monge},
  title =	 {An adaptive and efficient algorithm for detecting
                  approximately duplicate database records},
  year =	 {2000},
  note =	 {Submitted paper}
}

@inproceedings{cohen:kdd00,
  title =	 {Hardening Soft Information Sources},
  author =	 {William W. Cohen and Henry Kautz and David
                  McAllester},
  booktitle =	 {Proceedings of the 6th International Conference on
                  Knowledge Discovery and Data Mining (KDD-2000)},
  year =	 {2000},
  month =	 {August},
  address =	 {Boston, MA},
  pages =	 {255--259},
}

@InProceedings{lee:kdd00,
  Author =	 {Mong-Li Lee and Tok Wang Ling and Wai Lup Low},
  Title =	 {IntelliClean: a knowledge-based intelligent data
                  cleaner},
  BookTitle =	 {Proceedings of the 6th International Conference On
                  Knowledge Discovery and Data Mining (KDD-2000)},
  Pages =	 {290--294},
  Address =	 {Boston, MA},
  year =	 2000,
}

@InProceedings{mccallum:kdd00,
  title =	 {Efficient Clustering of High-Dimensional Data Sets
                  with Application to Reference Matching},
  author =	 {Andrew K. McCallum and Kamal Nigam and Lyle Ungar},
  booktitle =	 {Proceedings of the 6th International Conference On
                  Knowledge Discovery and Data Mining (KDD-2000)},
  address =	 {Boston, MA},
  month =	 aug,
  pages =	 {169--178},
  year =	 2000
}

@InProceedings{cohen:sigir01-wkshp,
  title =	 {Learning to Match and Cluster Entity Names},
  author =	 {William Cohen and Jacob Richman},
  booktitle =	 {Proceedings of the 2001 ACM SIGIR Workshop on
                  Mathematical/Formal Methods in Information
                  Retrieval},
  year =	 2001,
  month =	 Sep,
  address =	 {New Orleans, LA}
}

@InProceedings{galhardas:vldb01,
  Author =	 {Helena Galhardas and Daniela Florescu and Dennis
                  Shasha and Eric Simon and Cristian Saita},
  Title =	 {Declarative data cleaning: Language, model, and
                  algorithms},
  BookTitle =	 {Proceedings of the 27th International Conference on
                  Very Large Databases (VLDB-2001)},
  Pages =	 {371--380},
  Address =	 {Rome, Italy},
  year =	 2001
}

@article{tejada:isj01,
  author =	 {Sheila Tejada and Craig A. Knoblock and Steven
                  Minton},
  title =	 {Learning Object Identification Rules for Information
                  Integration},
  journal =	 {Information Systems Journal},
  volume =	 {26},
  number =	 {8},
  year =	 2001,
  pages =	 {635--656}
}

@inproceedings{christen:adm02,
  author =	 "P. Christen and T. Churches and J. Zhu",
  title =	 "Probabilistic Name and Address Cleaning and
                  Standardisation",
  booktitle =	 "Proceedings of the Australasian Data Mining
                  Workshop",
  year =	 "2002"
}

@inproceedings{cohen:kdd02,
  author =	 {William W. Cohen and Jacob Richman},
  title =	 {Learning to Match and Cluster Large High-Dimensional
                  Data Sets for Data Integration},
  booktitle =	 {Proceedings of the 8th ACM SIGKDD International
                  Conference on Knowledge Discovery and Data Mining
                  (KDD-2002)},
  pages =	 {475--480},
  year =	 2002,
  address =	 {Edmonton, Alberta}
}

@inproceedings{sarawagi:kdd02,
  author =	 {Sunita Sarawagi and Anuradha Bhamidipaty},
  title =	 {Interactive Deduplication using Active Learning},
  booktitle =	 {Proceedings of the 8th ACM SIGKDD International
                  Conference on Knowledge Discovery and Data Mining
                  (KDD-2002)},
  year =	 2002,
  pages =	 {269--278},
  address =	 {Edmonton, Alberta}
}

@inproceedings{tejada:kdd02,
  author =	 {Sheila Tejada and Craig A. Knoblock and Steven
                  Minton},
  title =	 {Learning Domain-Independent String Transformation
                  Weights for High Accuracy Object Identification},
  booktitle =	 {Proceedings of the 8th ACM SIGKDD International
                  Conference on Knowledge Discovery and Data Mining
                  (KDD-2002)},
  pages =	 {350--359},
  year =	 2002,
  address =	 {Edmonton, Alberta}
}

@inproceedings{elfeky:icde02,
  author =	 {Mohamed G. Elfeky and Ahmed K. Elmagarmid and
                  Vassilios S. Verykios},
  title =	 {{TAILOR}: A Record Linkage Tool Box},
  booktitle =	 {Proceedings of the 18th International Conference on
                  Data Engineering (ICDE-2002)},
  year =	 {2002},
  pages =	 {17-28}
}


@inproceedings{ananthakrishna:vldb02,
  author =	 {Rohit Ananthakrishna and Surajit Chaudhuri and
                  Venkatesh Ganti},
  title =	 {Eliminating Fuzzy Duplicates in Data Warehouses},
  booktitle =	 {Proceedings of the 28th International Conference on
                  Very Large Databases (VLDB-2002)},
  address =	 {Hong Kong, China},
  year =	 2002
}

@techreport{jin:tr02,
  author =	 {Liang Jin and Chen Li and Sharad Mehrotra},
  title =	 {Efficient Similarity String Joins in Large Data
                  Sets},
  institution =	 {UCI ICS},
  number =	 {TR-DB-02-04},
  year =	 2002
}


@inproceedings{jin:dasfaa03,
  author =	 {Liang Jin and Chen Li and Sharad Mehrotra},
  title =	 {Efficient Record Linkage in Large Data Sets},
  booktitle =	 {Proceedings of the 8th International Conference on
                  Database Systems for Advanced Applications
                  (DASFAA-03)},
  pages =	 {137--148},
  address =	 {Kyoto, Japan},
  year =	 2003
}


@TechReport{bilenko:tr02,
  author =	 {Mikhail Bilenko and Raymond J. Mooney},
  title =	 {Learning to Combine Trained Distance Metrics for
                  Duplicate Detection in Databases},
  month =	 feb,
  year =	 {2002},
  number =	 {AI 02-296},
  institution =	 {Artificial Intelligence Laboratory, University of
                  Texas at Austin},
  address =	 {Austin, TX},
}

@inproceedings{pasula:nips03,
  author =	 {Hanna Pasula and Bhaskara Marthi and Brian Milch and
                  Stuart Russell and Ilya Shpitser},
  title =	 {Identity Uncertainty and Citation Matching},
  booktitle =	 {Advances in Neural Information Processing Systems
                  15},
  publisher =	 {MIT Press},
  pages =	 {1401--1408},
  year =	 {2003}
}

@article{verykios:vldbj03,
  author =	 {Vassilios S. Verykios and George V. Moustakides and
                  Mohamed G. Elfeky},
  title =	 {A Bayesian Decision Model for Cost Optimal Record
                  Matching},
  journal =	 {The VLDB Journal},
  volume =	 12,
  number =	 1,
  pages =	 {28--40},
  year =	 2003
}

@inproceedings{bilenko:kdd03,
  author =	 {Mikhail Bilenko and Raymond J. Mooney},
  title =	 {Adaptive Duplicate Detection Using Learnable String
                  Similarity Measures},
  booktitle =	 {Proceedings of the 9th ACM SIGKDD International
                  Conference on Knowledge Discovery and Data Mining
                  (KDD-2003)},
  year =	 {2003},
  address =	 {Washington, DC}
}

@inproceedings{cohen:ijcai03-wkshp,
  author =	 {William W. Cohen and Pradeep Ravikumar and Stephen
                  E. Fienberg},
  title =	 {A Comparison of String Distance Metrics for
                  Name-Matching Tasks},
  booktitle =	 {Proceedings of the IJCAI-2003 Workshop on
                  Information Integration on the Web},
  pages =	 {73--78},
  address =	 {Acapulco, Mexico},
  year =	 2003,
  month =	 aug
}

@inproceedings{doan:ijcai03-wkshp,
  author =	 {AnHai Doan and Ying Lu and Yoonkyong Lee and Jiawei
                  Han},
  title =	 {Object Matching for Information Integration: A
                  Profiler-Based Approach},
  booktitle =	 {Proceedings of the IJCAI-2003 Workshop on
                  Information Integration on the Web},
  pages =	 {53--58},
  address =	 {Acapulco, Mexico},
  year =	 2003,
  month =	 aug
}

@inproceedings{mccallum:ijcai03-wkshp,
  author =	 {Andrew McCallum and Ben Wellner},
  title =	 {Toward Conditional Models of Identity Uncertainty
                  with Application to Proper Noun Coreference},
  booktitle =	 {Proceedings of the IJCAI-2003 Workshop on
                  Information Integration on the Web},
  pages =	 {79--86},
  address =	 {Acapulco, Mexico},
  year =	 2003,
  month =	 aug
}

@inproceedings{bilenko:ijcai03-wkshp,
  author =	 {Mikhail Bilenko and Raymond J. Mooney},
  title =	 {Employing Trainable String Similarity Metrics for
                  Information Integration},
  booktitle =	 {Proceedings of the IJCAI-2003 Workshop on
                  Information Integration on the Web},
  pages =	 {67--72},
  address =	 {Acapulco, Mexico},
  year =	 2003,
  month =	 aug
}

@inproceedings{hill:ijcai03-wkshop,
  author =	 {Shawndra Hill},
  title =	 {Social Network Relational Vectors for Anonymous
                  Identity Matching},
  booktitle =	 {Proceedings of the IJCAI-2003 Workshop on Learning
                  Statistical Models from Relational Data},
  pages =	 {48--52},
  address =	 {Acapulco, Mexico},
  year =	 2003,
  month =	 aug
}

@inproceedings{mccallum:ijcai03-wkshop-2,
  author =	 {Andrew McCallum and David Jensen},
  title =	 {A Note on the Unification of Information Extraction
                  and Data Mining using Conditional-Probability,
                  Relational Models},
  booktitle =	 {Proceedings of the IJCAI-2003 Workshop on Learning
                  Statistical Models from Relational Data},
  pages =	 {79--86},
  address =	 {Acapulco, Mexico},
  year =	 2003,
  month =	 aug
}


@misc{christen:febrl,
  author = 	 {P. Christen and T. Churches},
  title = 	 {Febrl -- Freely extensible biomedical record linkage},
  howpublished = {http://datamining.anu.edu.au/linkage.html}
}

@inproceedings{milch:ijcai03-wkshop,
  author =	 {Bhaskara Marthi and Brian Milch, and Stuart Russell},
  title =	 {First-order probabilistic models for information
                  extraction},
  booktitle =	 {Proceedings of the IJCAI-2003 Workshop on Learning
                  Statistical Models from Relational Data},
  pages =	 {71--78},
  address =	 {Acapulco, Mexico},
  year =	 2003,
  month =	 aug
}

@inproceedings{chaudhuri:sigmod03,
  author =	 {Surajit Chaudhuri and Kris Ganjam and Venkatesh
                  Ganti and Rajeev Motwani},
  title =	 {Robust and efficient fuzzy match for online data
                  cleaning},
  booktitle =	 {Proceedings of the 2003 ACM SIGMOD International
                  Conference on Management of Data (SIGMOD-03)},
  year =	 {2003},
  isbn =	 {1-58113-634-X},
  pages =	 {313--324},
  location =	 {San Diego, California},
  doi =		 {http://doi.acm.org/10.1145/872757.872796},
  publisher =	 {ACM Press}
}

@inproceedings{li:naacl04,
  title =	 {Robust Reading: Identification and Tracing of
                  Ambiguous Names},
  author =	 {Xin Li and Paul Morie and Dan Roth},
  booktitle =	 {Proceedings of the 2004 Annual Meeting of the North
                  American Association of Computational Linguistics
                  (NAACL-04)},
  pages =	 {17--24},
  address =	 {Boston, MA},
  year =	 {2004}
}

@inproceedings{li:aaai04,
  title =	 {Identification and Tracing of Ambiguous Names:
                  Discriminative and Generative Approaches},
  author =	 {Xin Li and Paul Morie and Dan Roth},
  booktitle =	 {Proceedings of the 19th National Conference on
                  Artificial Intelligence (AAAI-2004)},
  year =	 {2004},
  location =	 {San Jose, CA}
}

@inproceedings{agichtein:kdd04,
  author =	 {Eugene Agichtein and Venkatesh Ganti},
  title =	 {Mining reference tables for automatic text
                  segmentation},
  booktitle =	 {Proceedings of the 10th ACM SIGKDD
                  International Conference on Knowledge Discovery and
                  Data Mining (KDD-2004)},
  year =	 {2004},
  location =	 {Seattle, WA}
}

@inproceedings{wellner:uai04,
  author =	 {Ben Wellner and Andrew McCallum and Fuchun Peng and
                  Michael Hay},
  title =	 {An Integrated, Conditional Model of Information
                  Extraction and Coreference with Application to
                  Citation Matching},
  booktitle =	 {Proceedings of the 20th Conference on Uncertainty in
                  Artificial Intelligence (UAI-2004)},
  year =	 {2004},
  month =	 jul,
  location =	 {Banff, Canada}
}

@inproceedings{ravikumar:uai04,
  title =	 {A Hierarchical Graphical Model for Record Linkage},
  author =	 {Pradeep Ravikumar and William W. Cohen},
  booktitle =	 {Proceedings of the 20th Conference on Uncertainty in
                  Artificial Intelligence (UAI-2004)},
  year =	 {2004},
  month =	 jul,
  location =	 {Banff, Canada}
}

@inproceedings{bhattacharya:dmkd04,
  author =	 {Indrajit Bhattacharya and Lise Getoor},
  title =	 {Iterative Record Linkage For Cleaning And
                  Integration},
  booktitle =	 {Proceedings of the 2004 ACM SIGMOD Workshop on
                  Research Issues on Data Mining and Knowledge
                  Discovery (DMKD-2004)},
  pages =	 {11--18},
  month =	 jun,
  year =	 {2004}
}

@inproceedings{bhattacharya:linkkdd04,
  author =	 {Indrajit Bhattacharya and Lise Getoor},
  title =	 {Deduplication and Group Detection Using Links},
  booktitle =	 {Proceedings of the 2004 ACM SIGKDD Workshop on Link
                  Analysis and Group Detection},
  month =	 aug,
  year =	 {2004},
  location =	 {Seattle, WA}
}

@inproceedings{parag:mrdm04,
  author =	 {Parag and Pedro Domingos},
  title =	 {Multi-Relational Record Linkage},
  booktitle =	 {Proceedings of the 2004 ACM SIGKDD Workshop on
                  Multi-Relational Data Mining},
  pages =	 {31--48},
  month =	 aug,
  year =	 2004,
  location =	 {Seattle, WA}
}

@inproceedings{gu:sdm04,
  author =	 {Lifang Gu and Rohan Baxter},
  title =	 {Adaptive Filtering for Efficient Record Linkage},
  booktitle =	 {Proceedings of the Fourth SIAM International
                  Conference on Data Mining (SDM-04)},
  year =	 {2004}
}

@inproceedings{mccallum:nips05,
  author =	 {Andrew McCallum and Ben Wellner},
  title =	 {Conditional Models of Identity Uncertainty with
                  Application to Noun Coreference},
  booktitle =	 {Advances in Neural Information Processing Systems
                  17},
  publisher =	 {MIT Press},
  pages =	 {905--912},
  year =	 {2005}
}


@INPROCEEDINGS{camacho:micai05,
  author =	 {Horacio Camacho and Abdel Salhi},
  year =	 {2005},
  title =	 {A Graph Theoretic Approach to Key Equivalence},
  booktitle =	 {MICAI 2005: Advances in Artifical Intelligence,
                  proceedings of the 4th Mexican International
                  Conference on Artificial Intelligence, LNAI 3789},
  pages =	 {524-533},
  address =	 {Monterrey, Mexico},
}


@inproceedings{kalashnikov:sdm05,
  title =	 {Exploiting relationships for domain-independent data
                  cleaning},
  author =	 {Dmitri V. Kalashnikov and Sharad Mehrotra and Zhaoqi
                  Chen},
  booktitle =	 {Proceedings of the 5th SIAM International Conference
                  on Data Mining (SDM-2005)},
  address =	 {Newport Beach, CA},
  year =	 2005
}

@inproceedings{dong:sigmod05,
  title =	 {Reference reconciliation in complex information spaces},
  author =	 {Xin Dong and Alon Halevy and Jayant Madhavan},
  booktitle =	 {Proceedings of the 2005 ACM SIGMOD international
                  conference on Management of data (SIGMOD-2005)},
  pages =	 {85--96},
  address =	 {Baltimore, MD},
  year =	 2005
}

@inproceedings{parag:aaai05,
  title =	 {Discriminative Training of {M}arkov {L}ogic
                  {N}etworks},
  author =	 {Parag Singla and Pedro Domingos},
  booktitle =	 {Proceedings of the 20th National Conference on
                  Artificial Intelligence (AAAI-2005)},
  pages =	 {868--873},
  address =	 {Pittsburgh, PA},
  year =	 2005
}

@inproceedings{shen:aaai05,
  title =	 {Constraint-based entity matching},
  author =	 {Warren Shen and Xin Li and An{H}ai Doan},
  booktitle =	 {Proceedings of the 20th National Conference on
                  Artificial Intelligence (AAAI-2005)},
  pages =	 {862--867},
  address =	 {Pittsburgh, PA},
  year =	 2005
}

@inproceedings{mccallum:uai05,
  title =	 {A Conditional Random Field for
                  Discriminatively-trained Finite-state String Edit
                  Distance},
  author =	 {Andrew McCallum and Kedar Bellare and Fernando
                  Pereira},
  booktitle =	 {Proceedings of the 21st Conference on Uncertainty in
                  Artificial Intelligence (UAI-2005)},
  year =	 2005
}


@inproceedings{parag:pkdd05,
  title =	 {Object Identification with Attribute-Mediated
                  Dependences},
  author =	 {Parag Singla and Pedro Domingos},
  booktitle =	 {Proceedings of the 9th European Conference on
                  Principles and Practice of Knowledge Discovery in
                  Databases (PKDD-2005)},
  year =	 {2005},
  address =	 {Porto, Portugal}
}

@inproceedings{bilenko:icdm05,
  author =	 {Mikhail Bilenko and Sugato Basu and Mehran Sahami},
  title =	 {Adaptive Product Normalization: Using Online
                  Learning for Record Linkage in Comparison Shopping},
  booktitle =	 {Proceedings of the 5th IEEE International Conference
                  on Data Mining (ICDM-2005)},
  pages =	 {58--65},
  year =	 2005
}


@inproceedings{minton:icdm05,
  author =	 {Steven N. Minton and Claude Nanjo and Craig
                  A. Knoblock and Martin Michalowski and Matthew
                  Michelson},
  title =	 {A Heterogeneous Field Matching Method for Record
                  Linkage},
  booktitle =	 {Proceedings of the 5th IEEE International Conference
                  on Data Mining (ICDM-2005)},
  pages =	 {314--321},
  year =	 2005
}


@inproceedings{chaudhuri:icde05,
  title =	 {Robust identification of fuzzy duplicates},
  author =	 {Surajit Chaudhuri and Venkatesh Ganti and Rajeev
                  Motwani},
  booktitle =	 {Proceedings of the 21st International Conference on
                  Data Engineering (ICDE-2005)},
  year =	 2005,
  address =	 {Tokyo, Japan}
}

@inproceedings{jin:vldb05,
  author =	 {Liang Jin and Chen Li},
  title =	 {Selectivity Estimation for Fuzzy String Predicates
                  in Large Data Sets},
  booktitle =	 {Proceedings of the 31st International Conference on
                  Very Large Data Bases (VLDB-2005)},
  year =	 2005
}


@ARTICLE{camacho:rcs06,
  author =	 {Abdel Salhi and Horacio Camacho},
  year =	 {2006},
  title =	 {A String Metric Based on a One-to-one Greedy
                  Matching Algorithm},
  journal =	 {Research in Computer Science},
  volume =	 {number 19},
  pages =	 {171-182}
}


@inproceedings{bhattacharya:sdm06,
  title =	 {A Latent Dirichlet Model for Unsupervised Entity
                  Resolution},
  author =	 {Indrajit Bhattacharya and Lise Getoor},
  booktitle =	 {6th SIAM Conference on Data Mining (SDM-2006)},
  address =	 {Bethesda, MD},
  year =	 {2006}
}

@incollection{bhattacharya:bkchapter06,
  title =	 {Entity Resolution in Graphs},
  author =	 {Indrajit Bhattacharya and Lise Getoor},
  booktitle =	 {Mining Graph Data},
  editor =	 {Lawrence B. Holder and Diane J. Cook},
  publisher =	 {Wiley},
  year =	 2006
}




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Proceedings of the KDD-2003 Workshop on Data Cleaning, Record Linkage, and Object Consolidation
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

@inproceedings{winkler:kdd03-wkshp,
  author =	 {William E. Winkler},
  title =	 {Data Cleaning Methods},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {1--6}
}

@inproceedings{bilenko:kdd03-wkshp,
  author =	 {Mikhail Bilenko and Raymond J. Mooney},
  title =	 {On Evaluation and Training-Set Construction for
                  Duplicate Detection},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {7--12}
}

@inproceedings{cohen:kdd03-wkshp,
  author =	 {William W. Cohen and Pradeep Ravikumar and Stephen
                  E. Fienberg},
  title =	 {A Comparison of String Metrics for Matching Names
                  and Records},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {13--18}
}

@inproceedings{mccallum:kdd03-wkshp,
  author =	 {Andrew McCallum and Ben Wellner},
  title =	 {Object Consolidation by Graph Partitioning with a
                  Conditionally Trained Distance Metric},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {19--24}
}

@inproceedings{baxter:kdd03-wkshp,
  author =	 {Rohan Baxter and Peter Christen and Tim Churches},
  title =	 {A Comparison of Fast Blocking Methods for Record
                  Linkage},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {25--27}
}

@inproceedings{borthwick:kdd03-wkshp,
  author =	 {A. Borthwick and M. Buechi and A. Goldberg},
  title =	 {Key Concepts in the ChoiceMaker 2 Record Matching
                  System},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {28--30}
}

@inproceedings{verykios:kdd03-wkshp,
  author =	 {Mohamed G. Elfeky and Vassilios S. Verykios},
  title =	 {On Search Enhancement of the Record Linkage Process},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {31--33}
}

@inproceedings{michalowski:kdd03-wkshp,
  author =	 {M. Michalowski and S. Thakkar and C. Knoblock},
  title =	 {Exploiting Secondary Sources for Automatic Object
                  Consolidation},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {34--36}
}

@inproceedings{neiling:kdd03-wkshp,
  author =	 {M. Neiling and S. Jurk},
  title =	 {The Object Identification Framework},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {37--39}
}

@inproceedings{quass:kdd03-wkshp,
  author =	 {D. Quass and P. Starkey},
  title =	 {Record Linkage for Genealogical Databases},
  booktitle =	 {Proceedings of the 2003 ACM SIGKDD Workshop on Data
                  Cleaning, Record Linkage, and Object Consolidation},
  year =	 {2003},
  address =	 {Washington, DC},
  pages =	 {40--42}
}

