% latest update: mbilenko, 11/1/2005 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Papers from the Statistics community %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @article{newcombe:science59, author = {H.B. Newcombe and J.M. Kennedy and S.J. Axford and A.P. James}, title = {Automatic Linkage of Vital Records}, journal = {Science}, volume = 130, year = 1959, pages = {954--959}, } @article{tepping:jasa68, author = {B. J. Tepping}, title = {A model for optimum linkage of records}, journal = {Journal of the American Statistical Association}, volume = {63}, year = 1968, pages = {1321--1332} } @article{fellegi:jasa69, author = {I. P. Fellegi and A. B. Sunter}, title = {A Theory for Record Linkage}, journal = {Journal of the American Statistical Association}, volume = {64}, year = 1969, pages = {1183--1210} } @inproceedings{kelley:rl85, author = {R. P. Kelley}, title = {Advances in record linkage methodology: a method for determining the best blocking strategy}, booktitle = {Record Linkage Techniques - 1985: Proceedings of the Workshop on Exact Matching Methodologies}, address = {Arlington, VA}, pages = {199--203}, year = 1985, url = "http://www.fcsm.gov/working-papers/1367_3.pdf" } @article{winkler:asa88, author = {William E. Winkler}, title = {Using the {EM} Algorithm for Weight Computation in the Fellegi-Sunter Model of Record Linkage}, journal = {American Statistical Association, Proceedings of the Section on Survey Research Methods}, pages = {667--671}, year = 1988 } @book{newcombe:book88, author = {H. B. Newcombe}, title = {Handbook of record linkage: methods for health and statistical studies, administration, and business}, publisher = {Oxford University Press}, year = 1988 } @article{jaro:jasa89, author = {M. A. Jaro}, title = {Advances in record-linkage methodology as applied to matching the 1985 Census of Tampa, Florida}, journal = {Journal of the American Statistical Association}, volume = 84, number = 406, pages = {414-420}, year = 1989 } @article{copas:jrssa90, author = {J. Copas and F. Hilton}, title = {Record linkage: statistical models for matching computer records}, journal = {Journal of the Royal Statistical Society: Series A}, volume = 153, number = 3, pages = {287--320}, year = 1990 } @inproceedings{winkler:asa90, author = {William E. Winkler}, title = {String Comparator Metrics and Enhanced Decision Rules in the {F}ellegi-{S}unter Model of Record Linkage}, booktitle = {Proceedings of the Section on Survey Research Methods, American Statistical Association}, pages = {354--359}, year = 1990 } @techreport{winkler:tr93, title = {Improved Decision Rules in the {F}ellegi-{S}unter Model of Record Linkage}, author = {William E. Winkler}, institution = {Statistical Research Division, U.S. Census Bureau, Washington, DC}, year = 1993 } @techreport{winkler:tr94, title = {Advanced Methods for Record Linkage}, author = {William E. Winkler}, institution = {Statistical Research Division, U.S. Census Bureau, Washington, DC}, year = 1994, } @article{belin:jasa95, title = {A Method for Calibrating False-Match Rates in Record Linkage}, author = {Thomas R. Belin and Donald B. Rubin}, journal = {Journal of the American Statistical Association}, volume = {90}, number = {430}, pages = {694--707}, year = 1995, } @article{jaro:statmed95, author = {M. A. Jaro}, title = {Probabilistic linkage of large public health data files}, journal = {Statistics in Medicine}, volume = 14, number = {5--7}, pages = {491--498}, year = 1995 } @techreport{winkler:tr99, title = {The State of Record Linkage and Current Research Problems}, author = {William E. Winkler}, institution = {Statistical Research Division, U.S. Census Bureau, Washington, DC}, year = 1999, } @techreport{winkler:tr02, title = {Methods for Record Linkage and {B}ayesian Networks}, author = {William E. Winkler}, institution = {Statistical Research Division, U.S. Census Bureau, Washington, DC}, year = 2002 } @techreport{yancey:tr02, title = {Improving {EM} Algorithm Estimates for Record Linkage Parameters}, author = {William E. Yancey}, institution = {Statistical Research Division, U.S. Census Bureau, Washington, DC}, year = 2002 } @techreport{yancey:tr04, title = {An Adaptive String Comparator for Record Linkage}, author = {William E. Yancey}, institution = {Statistical Research Division, U.S. Census Bureau, Washington, DC}, year = 2004 } @inproceedings{noren:kdd05, title = {A Hit-Miss Model for Duplicate Detection in the {WHO} {D}rug {S}afety {D}atabase}, author = {G. Niklas Nor{\'{e}}n and Roland Orre and Andrew Bate}, booktitle = {Proceedings of the 11th International Conference on Knowledge Discovery and Data Mining (KDD-05)}, year = 2005, address = {Chicago, IL}, pages = {459--468} } @techreport{winkler:tr05, title = {Approximate String Comparator Search Strategies for Very Large Administrative Lists}, author = {William E. Winkler}, institution = {Statistical Research Division, U.S. Census Bureau}, address = {Washington, DC}, year = 2005 } @techreport{winkler:tr06, title = {Overview of Record Linkage and Current Research Directions}, author = {William E. Winkler}, institution = {Statistical Research Division, U.S. Census Bureau}, address = {Washington, DC}, year = 2006 } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Papers from Computer Science venues %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @InProceedings{hernandez:sigmod95, title = {The Merge/Purge Problem for Large Databases}, author = {Mauricio A. Hern{\'a}ndez and Salvatore J. Stolfo}, booktitle = {Proceedings of the 1995 ACM SIGMOD International Conference on Management of Data (SIGMOD-95)}, year = {1995}, month = may, address = {San Jose, CA}, pages = {127--138}, } @inproceedings{monge:kdd96, title = {The field matching problem: Algorithms and applications}, author = {Alvaro E. Monge and Charles P. Elkan}, booktitle = {Proceedings of the Second International Conference on Knowledge Discovery and Data Mining (KDD-96)}, year = 1996, month = {August}, address = {Portland, OR}, pages = {267--270}, } @mastersthesis{hylton:mthesis96, author = {Jeremy A. Hylton}, title = {Identifying and merging related bibliographic records}, school = {Department of Electrical Engineering and Computer Science, MIT}, year = 1996, } @inproceedings{monge:dmkd97, title = {An efficient domain-independent algorithm for detecting approximately duplicate database records}, author = {Alvaro E. Monge and Charles P. Elkan}, booktitle = {Proceedings of the 1997 ACM SIGMOD Workshop on Research Issues on Data Mining and Knowledge Discovery}, year = {1997}, month = {May}, address = {Tuscon, AZ}, pages = {23--29}, } @inproceedings{cohen:sigmod98, author = {William W. Cohen}, title = {Integration of heterogeneous databases without common domains using queries based on textual similarity}, booktitle = {Proceedings of the 1998 ACM SIGMOD International Conference on Management of Data (SIGMOD-98)}, pages = {201--212}, year = {1998} } @inproceedings{giles:dl98, title = {{CiteSeer}: An Automatic Citation Indexing System}, author = {C. Lee Giles and Kurt Bollacker and Steve Lawrence}, booktitle = {Proceedings of the 3rd ACM Conference on Digital Libraries}, year = 1998, address = {Pittsburgh, PA}, pages = {89--98}, } @inproceedings{lee:dexa99, title = {Cleansing Data for Mining and Warehousing}, author = {Mong Li Lee and Hongjun Lu and Tok Wang Ling and Yee Teng Ko}, booktitle = {Proceedings of the 10th International Conference on Database and Expert Systems Applications (DEXA-99)}, year = 1999, month = {August}, address = {Florence, Italy}, } @inproceedings{lawrence:agents99, author = {Steve Lawrence and Kurt Bollacker and C. Lee Giles}, title = {Autonomous Citation Matching}, booktitle = {Proceedings of the 3rd International Conference on Autonomous Agents}, publisher = {ACM Press}, pages = {392--393}, address = {New York, NY}, year = {May 1999} } @inproceedings{zhu:kdd00-wkshp, author = {J. J. Zhu and L. H. Ungar}, title = {String Edit Analysis for Merging Databases}, booktitle = {Proceedings of the KDD-2000 Workshop on Text Mining}, year = 2000 } @unpublished{monge:submitted00, author = {Alvaro E. Monge}, title = {An adaptive and efficient algorithm for detecting approximately duplicate database records}, year = {2000}, note = {Submitted paper} } @inproceedings{cohen:kdd00, title = {Hardening Soft Information Sources}, author = {William W. Cohen and Henry Kautz and David McAllester}, booktitle = {Proceedings of the 6th International Conference on Knowledge Discovery and Data Mining (KDD-2000)}, year = {2000}, month = {August}, address = {Boston, MA}, pages = {255--259}, } @InProceedings{lee:kdd00, Author = {Mong-Li Lee and Tok Wang Ling and Wai Lup Low}, Title = {IntelliClean: a knowledge-based intelligent data cleaner}, BookTitle = {Proceedings of the 6th International Conference On Knowledge Discovery and Data Mining (KDD-2000)}, Pages = {290--294}, Address = {Boston, MA}, year = 2000, } @InProceedings{mccallum:kdd00, title = {Efficient Clustering of High-Dimensional Data Sets with Application to Reference Matching}, author = {Andrew K. McCallum and Kamal Nigam and Lyle Ungar}, booktitle = {Proceedings of the 6th International Conference On Knowledge Discovery and Data Mining (KDD-2000)}, address = {Boston, MA}, month = aug, pages = {169--178}, year = 2000 } @InProceedings{cohen:sigir01-wkshp, title = {Learning to Match and Cluster Entity Names}, author = {William Cohen and Jacob Richman}, booktitle = {Proceedings of the 2001 ACM SIGIR Workshop on Mathematical/Formal Methods in Information Retrieval}, year = 2001, month = Sep, address = {New Orleans, LA} } @InProceedings{galhardas:vldb01, Author = {Helena Galhardas and Daniela Florescu and Dennis Shasha and Eric Simon and Cristian Saita}, Title = {Declarative data cleaning: Language, model, and algorithms}, BookTitle = {Proceedings of the 27th International Conference on Very Large Databases (VLDB-2001)}, Pages = {371--380}, Address = {Rome, Italy}, year = 2001 } @article{tejada:isj01, author = {Sheila Tejada and Craig A. Knoblock and Steven Minton}, title = {Learning Object Identification Rules for Information Integration}, journal = {Information Systems Journal}, volume = {26}, number = {8}, year = 2001, pages = {635--656} } @inproceedings{christen:adm02, author = "P. Christen and T. Churches and J. Zhu", title = "Probabilistic Name and Address Cleaning and Standardisation", booktitle = "Proceedings of the Australasian Data Mining Workshop", year = "2002" } @inproceedings{cohen:kdd02, author = {William W. Cohen and Jacob Richman}, title = {Learning to Match and Cluster Large High-Dimensional Data Sets for Data Integration}, booktitle = {Proceedings of the 8th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2002)}, pages = {475--480}, year = 2002, address = {Edmonton, Alberta} } @inproceedings{sarawagi:kdd02, author = {Sunita Sarawagi and Anuradha Bhamidipaty}, title = {Interactive Deduplication using Active Learning}, booktitle = {Proceedings of the 8th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2002)}, year = 2002, pages = {269--278}, address = {Edmonton, Alberta} } @inproceedings{tejada:kdd02, author = {Sheila Tejada and Craig A. Knoblock and Steven Minton}, title = {Learning Domain-Independent String Transformation Weights for High Accuracy Object Identification}, booktitle = {Proceedings of the 8th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2002)}, pages = {350--359}, year = 2002, address = {Edmonton, Alberta} } @inproceedings{elfeky:icde02, author = {Mohamed G. Elfeky and Ahmed K. Elmagarmid and Vassilios S. Verykios}, title = {{TAILOR}: A Record Linkage Tool Box}, booktitle = {Proceedings of the 18th International Conference on Data Engineering (ICDE-2002)}, year = {2002}, pages = {17-28} } @inproceedings{ananthakrishna:vldb02, author = {Rohit Ananthakrishna and Surajit Chaudhuri and Venkatesh Ganti}, title = {Eliminating Fuzzy Duplicates in Data Warehouses}, booktitle = {Proceedings of the 28th International Conference on Very Large Databases (VLDB-2002)}, address = {Hong Kong, China}, year = 2002 } @techreport{jin:tr02, author = {Liang Jin and Chen Li and Sharad Mehrotra}, title = {Efficient Similarity String Joins in Large Data Sets}, institution = {UCI ICS}, number = {TR-DB-02-04}, year = 2002 } @inproceedings{jin:dasfaa03, author = {Liang Jin and Chen Li and Sharad Mehrotra}, title = {Efficient Record Linkage in Large Data Sets}, booktitle = {Proceedings of the 8th International Conference on Database Systems for Advanced Applications (DASFAA-03)}, pages = {137--148}, address = {Kyoto, Japan}, year = 2003 } @TechReport{bilenko:tr02, author = {Mikhail Bilenko and Raymond J. Mooney}, title = {Learning to Combine Trained Distance Metrics for Duplicate Detection in Databases}, month = feb, year = {2002}, number = {AI 02-296}, institution = {Artificial Intelligence Laboratory, University of Texas at Austin}, address = {Austin, TX}, } @inproceedings{pasula:nips03, author = {Hanna Pasula and Bhaskara Marthi and Brian Milch and Stuart Russell and Ilya Shpitser}, title = {Identity Uncertainty and Citation Matching}, booktitle = {Advances in Neural Information Processing Systems 15}, publisher = {MIT Press}, pages = {1401--1408}, year = {2003} } @article{verykios:vldbj03, author = {Vassilios S. Verykios and George V. Moustakides and Mohamed G. Elfeky}, title = {A Bayesian Decision Model for Cost Optimal Record Matching}, journal = {The VLDB Journal}, volume = 12, number = 1, pages = {28--40}, year = 2003 } @inproceedings{bilenko:kdd03, author = {Mikhail Bilenko and Raymond J. Mooney}, title = {Adaptive Duplicate Detection Using Learnable String Similarity Measures}, booktitle = {Proceedings of the 9th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2003)}, year = {2003}, address = {Washington, DC} } @inproceedings{cohen:ijcai03-wkshp, author = {William W. Cohen and Pradeep Ravikumar and Stephen E. Fienberg}, title = {A Comparison of String Distance Metrics for Name-Matching Tasks}, booktitle = {Proceedings of the IJCAI-2003 Workshop on Information Integration on the Web}, pages = {73--78}, address = {Acapulco, Mexico}, year = 2003, month = aug } @inproceedings{doan:ijcai03-wkshp, author = {AnHai Doan and Ying Lu and Yoonkyong Lee and Jiawei Han}, title = {Object Matching for Information Integration: A Profiler-Based Approach}, booktitle = {Proceedings of the IJCAI-2003 Workshop on Information Integration on the Web}, pages = {53--58}, address = {Acapulco, Mexico}, year = 2003, month = aug } @inproceedings{mccallum:ijcai03-wkshp, author = {Andrew McCallum and Ben Wellner}, title = {Toward Conditional Models of Identity Uncertainty with Application to Proper Noun Coreference}, booktitle = {Proceedings of the IJCAI-2003 Workshop on Information Integration on the Web}, pages = {79--86}, address = {Acapulco, Mexico}, year = 2003, month = aug } @inproceedings{bilenko:ijcai03-wkshp, author = {Mikhail Bilenko and Raymond J. Mooney}, title = {Employing Trainable String Similarity Metrics for Information Integration}, booktitle = {Proceedings of the IJCAI-2003 Workshop on Information Integration on the Web}, pages = {67--72}, address = {Acapulco, Mexico}, year = 2003, month = aug } @inproceedings{hill:ijcai03-wkshop, author = {Shawndra Hill}, title = {Social Network Relational Vectors for Anonymous Identity Matching}, booktitle = {Proceedings of the IJCAI-2003 Workshop on Learning Statistical Models from Relational Data}, pages = {48--52}, address = {Acapulco, Mexico}, year = 2003, month = aug } @inproceedings{mccallum:ijcai03-wkshop-2, author = {Andrew McCallum and David Jensen}, title = {A Note on the Unification of Information Extraction and Data Mining using Conditional-Probability, Relational Models}, booktitle = {Proceedings of the IJCAI-2003 Workshop on Learning Statistical Models from Relational Data}, pages = {79--86}, address = {Acapulco, Mexico}, year = 2003, month = aug } @misc{christen:febrl, author = {P. Christen and T. Churches}, title = {Febrl -- Freely extensible biomedical record linkage}, howpublished = {http://datamining.anu.edu.au/linkage.html} } @inproceedings{milch:ijcai03-wkshop, author = {Bhaskara Marthi and Brian Milch, and Stuart Russell}, title = {First-order probabilistic models for information extraction}, booktitle = {Proceedings of the IJCAI-2003 Workshop on Learning Statistical Models from Relational Data}, pages = {71--78}, address = {Acapulco, Mexico}, year = 2003, month = aug } @inproceedings{chaudhuri:sigmod03, author = {Surajit Chaudhuri and Kris Ganjam and Venkatesh Ganti and Rajeev Motwani}, title = {Robust and efficient fuzzy match for online data cleaning}, booktitle = {Proceedings of the 2003 ACM SIGMOD International Conference on Management of Data (SIGMOD-03)}, year = {2003}, isbn = {1-58113-634-X}, pages = {313--324}, location = {San Diego, California}, doi = {http://doi.acm.org/10.1145/872757.872796}, publisher = {ACM Press} } @inproceedings{li:naacl04, title = {Robust Reading: Identification and Tracing of Ambiguous Names}, author = {Xin Li and Paul Morie and Dan Roth}, booktitle = {Proceedings of the 2004 Annual Meeting of the North American Association of Computational Linguistics (NAACL-04)}, pages = {17--24}, address = {Boston, MA}, year = {2004} } @inproceedings{li:aaai04, title = {Identification and Tracing of Ambiguous Names: Discriminative and Generative Approaches}, author = {Xin Li and Paul Morie and Dan Roth}, booktitle = {Proceedings of the 19th National Conference on Artificial Intelligence (AAAI-2004)}, year = {2004}, location = {San Jose, CA} } @inproceedings{agichtein:kdd04, author = {Eugene Agichtein and Venkatesh Ganti}, title = {Mining reference tables for automatic text segmentation}, booktitle = {Proceedings of the 10th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2004)}, year = {2004}, location = {Seattle, WA} } @inproceedings{wellner:uai04, author = {Ben Wellner and Andrew McCallum and Fuchun Peng and Michael Hay}, title = {An Integrated, Conditional Model of Information Extraction and Coreference with Application to Citation Matching}, booktitle = {Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence (UAI-2004)}, year = {2004}, month = jul, location = {Banff, Canada} } @inproceedings{ravikumar:uai04, title = {A Hierarchical Graphical Model for Record Linkage}, author = {Pradeep Ravikumar and William W. Cohen}, booktitle = {Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence (UAI-2004)}, year = {2004}, month = jul, location = {Banff, Canada} } @inproceedings{bhattacharya:dmkd04, author = {Indrajit Bhattacharya and Lise Getoor}, title = {Iterative Record Linkage For Cleaning And Integration}, booktitle = {Proceedings of the 2004 ACM SIGMOD Workshop on Research Issues on Data Mining and Knowledge Discovery (DMKD-2004)}, pages = {11--18}, month = jun, year = {2004} } @inproceedings{bhattacharya:linkkdd04, author = {Indrajit Bhattacharya and Lise Getoor}, title = {Deduplication and Group Detection Using Links}, booktitle = {Proceedings of the 2004 ACM SIGKDD Workshop on Link Analysis and Group Detection}, month = aug, year = {2004}, location = {Seattle, WA} } @inproceedings{parag:mrdm04, author = {Parag and Pedro Domingos}, title = {Multi-Relational Record Linkage}, booktitle = {Proceedings of the 2004 ACM SIGKDD Workshop on Multi-Relational Data Mining}, pages = {31--48}, month = aug, year = 2004, location = {Seattle, WA} } @inproceedings{gu:sdm04, author = {Lifang Gu and Rohan Baxter}, title = {Adaptive Filtering for Efficient Record Linkage}, booktitle = {Proceedings of the Fourth SIAM International Conference on Data Mining (SDM-04)}, year = {2004} } @inproceedings{mccallum:nips05, author = {Andrew McCallum and Ben Wellner}, title = {Conditional Models of Identity Uncertainty with Application to Noun Coreference}, booktitle = {Advances in Neural Information Processing Systems 17}, publisher = {MIT Press}, pages = {905--912}, year = {2005} } @INPROCEEDINGS{camacho:micai05, author = {Horacio Camacho and Abdel Salhi}, year = {2005}, title = {A Graph Theoretic Approach to Key Equivalence}, booktitle = {MICAI 2005: Advances in Artifical Intelligence, proceedings of the 4th Mexican International Conference on Artificial Intelligence, LNAI 3789}, pages = {524-533}, address = {Monterrey, Mexico}, } @inproceedings{kalashnikov:sdm05, title = {Exploiting relationships for domain-independent data cleaning}, author = {Dmitri V. Kalashnikov and Sharad Mehrotra and Zhaoqi Chen}, booktitle = {Proceedings of the 5th SIAM International Conference on Data Mining (SDM-2005)}, address = {Newport Beach, CA}, year = 2005 } @inproceedings{dong:sigmod05, title = {Reference reconciliation in complex information spaces}, author = {Xin Dong and Alon Halevy and Jayant Madhavan}, booktitle = {Proceedings of the 2005 ACM SIGMOD international conference on Management of data (SIGMOD-2005)}, pages = {85--96}, address = {Baltimore, MD}, year = 2005 } @inproceedings{parag:aaai05, title = {Discriminative Training of {M}arkov {L}ogic {N}etworks}, author = {Parag Singla and Pedro Domingos}, booktitle = {Proceedings of the 20th National Conference on Artificial Intelligence (AAAI-2005)}, pages = {868--873}, address = {Pittsburgh, PA}, year = 2005 } @inproceedings{shen:aaai05, title = {Constraint-based entity matching}, author = {Warren Shen and Xin Li and An{H}ai Doan}, booktitle = {Proceedings of the 20th National Conference on Artificial Intelligence (AAAI-2005)}, pages = {862--867}, address = {Pittsburgh, PA}, year = 2005 } @inproceedings{mccallum:uai05, title = {A Conditional Random Field for Discriminatively-trained Finite-state String Edit Distance}, author = {Andrew McCallum and Kedar Bellare and Fernando Pereira}, booktitle = {Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence (UAI-2005)}, year = 2005 } @inproceedings{parag:pkdd05, title = {Object Identification with Attribute-Mediated Dependences}, author = {Parag Singla and Pedro Domingos}, booktitle = {Proceedings of the 9th European Conference on Principles and Practice of Knowledge Discovery in Databases (PKDD-2005)}, year = {2005}, address = {Porto, Portugal} } @inproceedings{bilenko:icdm05, author = {Mikhail Bilenko and Sugato Basu and Mehran Sahami}, title = {Adaptive Product Normalization: Using Online Learning for Record Linkage in Comparison Shopping}, booktitle = {Proceedings of the 5th IEEE International Conference on Data Mining (ICDM-2005)}, pages = {58--65}, year = 2005 } @inproceedings{minton:icdm05, author = {Steven N. Minton and Claude Nanjo and Craig A. Knoblock and Martin Michalowski and Matthew Michelson}, title = {A Heterogeneous Field Matching Method for Record Linkage}, booktitle = {Proceedings of the 5th IEEE International Conference on Data Mining (ICDM-2005)}, pages = {314--321}, year = 2005 } @inproceedings{chaudhuri:icde05, title = {Robust identification of fuzzy duplicates}, author = {Surajit Chaudhuri and Venkatesh Ganti and Rajeev Motwani}, booktitle = {Proceedings of the 21st International Conference on Data Engineering (ICDE-2005)}, year = 2005, address = {Tokyo, Japan} } @inproceedings{jin:vldb05, author = {Liang Jin and Chen Li}, title = {Selectivity Estimation for Fuzzy String Predicates in Large Data Sets}, booktitle = {Proceedings of the 31st International Conference on Very Large Data Bases (VLDB-2005)}, year = 2005 } @ARTICLE{camacho:rcs06, author = {Abdel Salhi and Horacio Camacho}, year = {2006}, title = {A String Metric Based on a One-to-one Greedy Matching Algorithm}, journal = {Research in Computer Science}, volume = {number 19}, pages = {171-182} } @inproceedings{bhattacharya:sdm06, title = {A Latent Dirichlet Model for Unsupervised Entity Resolution}, author = {Indrajit Bhattacharya and Lise Getoor}, booktitle = {6th SIAM Conference on Data Mining (SDM-2006)}, address = {Bethesda, MD}, year = {2006} } @incollection{bhattacharya:bkchapter06, title = {Entity Resolution in Graphs}, author = {Indrajit Bhattacharya and Lise Getoor}, booktitle = {Mining Graph Data}, editor = {Lawrence B. Holder and Diane J. Cook}, publisher = {Wiley}, year = 2006 } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Proceedings of the KDD-2003 Workshop on Data Cleaning, Record Linkage, and Object Consolidation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @inproceedings{winkler:kdd03-wkshp, author = {William E. Winkler}, title = {Data Cleaning Methods}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {1--6} } @inproceedings{bilenko:kdd03-wkshp, author = {Mikhail Bilenko and Raymond J. Mooney}, title = {On Evaluation and Training-Set Construction for Duplicate Detection}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {7--12} } @inproceedings{cohen:kdd03-wkshp, author = {William W. Cohen and Pradeep Ravikumar and Stephen E. Fienberg}, title = {A Comparison of String Metrics for Matching Names and Records}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {13--18} } @inproceedings{mccallum:kdd03-wkshp, author = {Andrew McCallum and Ben Wellner}, title = {Object Consolidation by Graph Partitioning with a Conditionally Trained Distance Metric}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {19--24} } @inproceedings{baxter:kdd03-wkshp, author = {Rohan Baxter and Peter Christen and Tim Churches}, title = {A Comparison of Fast Blocking Methods for Record Linkage}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {25--27} } @inproceedings{borthwick:kdd03-wkshp, author = {A. Borthwick and M. Buechi and A. Goldberg}, title = {Key Concepts in the ChoiceMaker 2 Record Matching System}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {28--30} } @inproceedings{verykios:kdd03-wkshp, author = {Mohamed G. Elfeky and Vassilios S. Verykios}, title = {On Search Enhancement of the Record Linkage Process}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {31--33} } @inproceedings{michalowski:kdd03-wkshp, author = {M. Michalowski and S. Thakkar and C. Knoblock}, title = {Exploiting Secondary Sources for Automatic Object Consolidation}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {34--36} } @inproceedings{neiling:kdd03-wkshp, author = {M. Neiling and S. Jurk}, title = {The Object Identification Framework}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {37--39} } @inproceedings{quass:kdd03-wkshp, author = {D. Quass and P. Starkey}, title = {Record Linkage for Genealogical Databases}, booktitle = {Proceedings of the 2003 ACM SIGKDD Workshop on Data Cleaning, Record Linkage, and Object Consolidation}, year = {2003}, address = {Washington, DC}, pages = {40--42} }