{"id":"https://openalex.org/W2136201726","doi":"https://doi.org/10.1109/spire.2001.989770","title":"A comparative study of topic identification on newspaper and e-mail","display_name":"A comparative study of topic identification on newspaper and e-mail","publication_year":2001,"publication_date":"2001-01-01","ids":{"openalex":"https://openalex.org/W2136201726","doi":"https://doi.org/10.1109/spire.2001.989770","mag":"2136201726"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/spire.2001.989770","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/inria-00107535/document","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041939827","display_name":"Brigitte Bigi","orcid":"https://orcid.org/0000-0003-1834-6918"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"B. Bigi","raw_affiliation_strings":["LORIA INRIA-Lorraine"],"affiliations":[{"raw_affiliation_string":"LORIA INRIA-Lorraine","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103222074","display_name":"Armelle Brun","orcid":"https://orcid.org/0000-0002-9876-6906"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"A. Brun","raw_affiliation_strings":["Analysis, perception and recognition of speech"],"affiliations":[{"raw_affiliation_string":"Analysis, perception and recognition of speech","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111482011","display_name":"Jean\u2010Paul Haton","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"J.-P. Haton","raw_affiliation_strings":["Analysis, perception and recognition of speech"],"affiliations":[{"raw_affiliation_string":"Analysis, perception and recognition of speech","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090778317","display_name":"Kamel Sma\u0131\u0308li","orcid":"https://orcid.org/0000-0002-4237-7303"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"K. Smaili","raw_affiliation_strings":["Analysis, perception and recognition of speech"],"affiliations":[{"raw_affiliation_string":"Analysis, perception and recognition of speech","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108365460","display_name":"Imed Zitouni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"I. Zitouni","raw_affiliation_strings":["Analysis, perception and recognition of speech"],"affiliations":[{"raw_affiliation_string":"Analysis, perception and recognition of speech","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.862,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":34,"citation_normalized_percentile":{"value":0.860136,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":90},"biblio":{"volume":null,"issue":null,"first_page":"238","last_page":"241"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Multi-label Text Classification in Machine Learning","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Multi-label Text Classification in Machine Learning","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Extraction and Crawling Techniques","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Automatic Keyword Extraction from Textual Data","score":0.9985,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.68351007},{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.63350147},{"id":"https://openalex.org/keywords/textual-data","display_name":"Textual Data","score":0.520705},{"id":"https://openalex.org/keywords/web-data-extraction","display_name":"Web Data Extraction","score":0.51687}],"concepts":[{"id":"https://openalex.org/C201280247","wikidata":"https://www.wikidata.org/wiki/Q11032","display_name":"Newspaper","level":2,"score":0.8846524},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.84611946},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.68351007},{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.63350147},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.61118954},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5572936},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.5519374},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47691867},{"id":"https://openalex.org/C112698675","wikidata":"https://www.wikidata.org/wiki/Q37038","display_name":"Advertising","level":1,"score":0.111068785},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.08944982},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/spire.2001.989770","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://hal.inria.fr/inria-00107535","pdf_url":"https://inria.hal.science/inria-00107535/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://inria.hal.science/inria-00107535/file/A01-R-341.pdf","pdf_url":"https://inria.hal.science/inria-00107535/file/A01-R-341.pdf","source":null,"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://hal.inria.fr/inria-00107535/file/A01-R-341.pdf","pdf_url":"https://hal.inria.fr/inria-00107535/file/A01-R-341.pdf","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://hal.inria.fr/inria-00107535/document","pdf_url":"https://hal.inria.fr/inria-00107535/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://hal.inria.fr/inria-00107535","pdf_url":"https://inria.hal.science/inria-00107535/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality education","score":0.63}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":9,"referenced_works":["https://openalex.org/W1597533204","https://openalex.org/W2005422315","https://openalex.org/W2024780708","https://openalex.org/W20483661","https://openalex.org/W2071106922","https://openalex.org/W2102028744","https://openalex.org/W2133636047","https://openalex.org/W2142749559","https://openalex.org/W2441154163"],"related_works":["https://openalex.org/W644339423","https://openalex.org/W612150824","https://openalex.org/W3204681432","https://openalex.org/W2796524599","https://openalex.org/W2383443050","https://openalex.org/W2376554757","https://openalex.org/W2367702734","https://openalex.org/W2361959990","https://openalex.org/W2100945520","https://openalex.org/W1596512750"],"abstract_inverted_index":{"This":[0,52],"work":[1,39],"presents":[2],"several":[3],"statistical":[4],"methods":[5,20,44,63,78,126],"for":[6,57],"topic":[7,27,33,61,81],"identification":[8],"on":[9,23,67,72,85,95,104,112],"two":[10,25,134],"kinds":[11],"of":[12,76,83],"textual":[13],"data:":[14],"newspaper":[15,88,114],"articles":[16],"and":[17,35],"e-mails.":[18],"Five":[19],"are":[21],"tested":[22],"these":[24,43],"corpora:":[26],"unigrams,":[28],"cache":[29],"model,":[30],"TFIDF":[31],"classijier,":[32],"peqdexity,":[34],"weighted":[36],"model.":[37],"Our":[38],"aims":[40],"to":[41,48],"study":[42,53],"by":[45],"confronting":[46],"them":[47],"very":[49,55],"diferent":[50],"data.":[51],"is":[54],"fruitful":[56],"our":[58,125],"research.":[59],"Statistical":[60],"identiJication":[62],"depend":[64],"not":[65,92,108],"only":[66],"a":[68,80,86,113],"corpus,":[69],"but":[70,90,106],"also":[71,117],"its":[73],"type.":[74],"One":[75],"the":[77,101,109,132],"achieves":[79],"identiJcation":[82],"80%":[84],"general":[87],"corpus":[89],"does":[91],"exceed":[93],"30%":[94],"e-mail":[96],"corpus.":[97,115],"Another":[98],"method":[99],"gives":[100],"best":[102],"result":[103],"e-mails,":[105],"has":[107],"same":[110],"behavior":[111],"We":[116],"show":[118],"in":[119,130],"this":[120],"paper":[121],"that":[122],"almost":[123],"all":[124],"achieve":[127],"good":[128],"results":[129],"retrieving":[131],"first":[133],"manually":[135],"annotated":[136],"labels.":[137]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2136201726","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":5},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":5},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2024-10-20T19:18:05.825094","created_date":"2016-06-24"}