{"id":"https://openalex.org/W2153741584","doi":"https://doi.org/10.1109/ideas.2006.18","title":"Effective Incremental Clustering for Duplicate Detection in Large Databases","display_name":"Effective Incremental Clustering for Duplicate Detection in Large Databases","publication_year":2006,"publication_date":"2006-12-01","ids":{"openalex":"https://openalex.org/W2153741584","doi":"https://doi.org/10.1109/ideas.2006.18","mag":"2153741584"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ideas.2006.18","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022488945","display_name":"Francesco Folino","orcid":"https://orcid.org/0000-0002-4952-1187"},"institutions":[],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Francesco Folino","raw_affiliation_strings":["ICAR-CNR, Rende, Cosenza, Italy"],"affiliations":[{"raw_affiliation_string":"ICAR-CNR, Rende, Cosenza, Italy","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004372595","display_name":"Giuseppe Manco","orcid":"https://orcid.org/0000-0001-9672-3833"},"institutions":[],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Giuseppe Manco","raw_affiliation_strings":["ICAR-CNR, Rende, Cosenza, Italy"],"affiliations":[{"raw_affiliation_string":"ICAR-CNR, Rende, Cosenza, Italy","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051833173","display_name":"Luigi Pontieri","orcid":"https://orcid.org/0000-0003-4513-0362"},"institutions":[],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Luigi Pontieri","raw_affiliation_strings":["ICAR-CNR, Rende, Cosenza, Italy"],"affiliations":[{"raw_affiliation_string":"ICAR-CNR, Rende, Cosenza, Italy","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.284,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":3,"citation_normalized_percentile":{"value":0.344507,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":72,"max":74},"biblio":{"volume":null,"issue":null,"first_page":"45","last_page":"52"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/database-index","display_name":"Database index","score":0.6089625}],"concepts":[{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.90297496},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7817831},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.7683213},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6352551},{"id":"https://openalex.org/C59276292","wikidata":"https://www.wikidata.org/wiki/Q580427","display_name":"Database index","level":3,"score":0.6089625},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.60119873},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5953876},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.5396045},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5042895},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.48748612},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.43650195},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.41589448},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21450543},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.111467004},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ideas.2006.18","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":19,"referenced_works":["https://openalex.org/W130948412","https://openalex.org/W1502916507","https://openalex.org/W1559390933","https://openalex.org/W2024770506","https://openalex.org/W2036216970","https://openalex.org/W2043481183","https://openalex.org/W2067566391","https://openalex.org/W2081193615","https://openalex.org/W2105423800","https://openalex.org/W2121093772","https://openalex.org/W2147717514","https://openalex.org/W2150698190","https://openalex.org/W2152565070","https://openalex.org/W2154785834","https://openalex.org/W2161936973","https://openalex.org/W2169844574","https://openalex.org/W2998852864","https://openalex.org/W4254734767","https://openalex.org/W4254788633"],"related_works":["https://openalex.org/W4301000806","https://openalex.org/W4237510188","https://openalex.org/W2185250746","https://openalex.org/W2163864652","https://openalex.org/W2158269934","https://openalex.org/W2077014748","https://openalex.org/W2048379072","https://openalex.org/W1562055306","https://openalex.org/W1560164226","https://openalex.org/W1505866794"],"abstract_inverted_index":{"We":[0],"propose":[1],"an":[2,23],"incremental":[3],"algorithm":[4],"for":[5,27],"discovering":[6],"clusters":[7],"of":[8,16,22,39,101],"duplicate":[9],"tuples":[10,40],"in":[11,41],"large":[12],"databases.":[13],"The":[14,66],"core":[15],"the":[17,20,42,57,82,91,99],"approach":[18,74,93],"is":[19,62,69],"usage":[21],"indexing":[24],"technique":[25],"which,":[26],"any":[28],"newly":[29],"arrived":[30],"tuple":[31],"mu,":[32,49],"allows":[33],"to":[34,48,54,56,77,81],"efficiently":[35],"retrieve":[36],"a":[37,72],"set":[38],"database":[43],"which":[44,51,61,75],"are":[45,52],"mostly":[46],"similar":[47,79],"and":[50,86],"likely":[53],"refer":[55],"same":[58,83],"real-world":[59],"entity":[60],"associated":[63],"with":[64],"mu.":[65],"proposed":[67,92],"index":[68],"based":[70],"on":[71],"hashing":[73],"tends":[76],"assign":[78],"objects":[80],"buckets.":[84],"Empirical":[85],"analytical":[87],"evaluation":[88],"demonstrates":[89],"that":[90],"achieves":[94],"satisfactory":[95],"efficiency":[96],"results,":[97],"at":[98],"cost":[100],"low":[102],"accuracy":[103],"loss":[104]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2153741584","counts_by_year":[],"updated_date":"2024-12-09T22:16:47.289024","created_date":"2016-06-24"}