{"id":"https://openalex.org/W2128644237","doi":"https://doi.org/10.1109/icdm.2007.69","title":"Local Word Bag Model for Text Categorization","display_name":"Local Word Bag Model for Text Categorization","publication_year":2007,"publication_date":"2007-10-01","ids":{"openalex":"https://openalex.org/W2128644237","doi":"https://doi.org/10.1109/icdm.2007.69","mag":"2128644237"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2007.69","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009428029","display_name":"Wen Yu Pu","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Pu","raw_affiliation_strings":["Peking Univ., Peking"],"affiliations":[{"raw_affiliation_string":"Peking Univ., Peking","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100432418","display_name":"Ning Liu","orcid":"https://orcid.org/0000-0003-1129-9071"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ning Liu","raw_affiliation_strings":["Microsoft Research Asia, Beijing,"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing,","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100381753","display_name":"Shuicheng Yan","orcid":"https://orcid.org/0000-0001-8906-3777"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuicheng Yan","raw_affiliation_strings":["University of Illinois Urbana Champaign > > > >"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana Champaign > > > >","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030330619","display_name":"Jun Yan","orcid":"https://orcid.org/0000-0003-2497-5518"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jun Yan","raw_affiliation_strings":["University of Illinois Urbana Champaign > > > >"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana Champaign > > > >","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100825074","display_name":"Kunqing Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kunqing Xie","raw_affiliation_strings":["Peking Univ., Peking"],"affiliations":[{"raw_affiliation_string":"Peking Univ., Peking","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100665770","display_name":"Zheng Chen","orcid":"https://orcid.org/0000-0001-5252-4752"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Chen","raw_affiliation_strings":["Microsoft Research Asia, Beijing,"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing,","institution_ids":["https://openalex.org/I4210113369"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.377,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":22,"citation_normalized_percentile":{"value":0.721589,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":88,"max":89},"biblio":{"volume":null,"issue":null,"first_page":"625","last_page":"630"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9965,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bag-of-words-model","display_name":"Bag-of-words model","score":0.6870436},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5564881},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5102985},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.45331478},{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.43533647}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7618363},{"id":"https://openalex.org/C13672336","wikidata":"https://www.wikidata.org/wiki/Q3460803","display_name":"Bag-of-words model","level":2,"score":0.6870436},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6833295},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5564881},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.52297926},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5102985},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48651212},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.45331478},{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.43533647},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4218788},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.41837317},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38397443},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14523038},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2007.69","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.71,"display_name":"Reduced inequalities"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":12,"referenced_works":["https://openalex.org/W113396932","https://openalex.org/W1540550673","https://openalex.org/W2053463056","https://openalex.org/W2066680326","https://openalex.org/W2101087387","https://openalex.org/W2104978738","https://openalex.org/W2118020653","https://openalex.org/W2143668817","https://openalex.org/W2151103935","https://openalex.org/W2153635508","https://openalex.org/W3120421331","https://openalex.org/W69295931"],"related_works":["https://openalex.org/W4200355488","https://openalex.org/W3149865514","https://openalex.org/W2619127353","https://openalex.org/W2614038472","https://openalex.org/W2609539975","https://openalex.org/W2377059580","https://openalex.org/W2188650855","https://openalex.org/W2165805973","https://openalex.org/W2141479508","https://openalex.org/W2086485446"],"abstract_inverted_index":{"Many":[0],"text":[1,50,60],"processing":[2],"applications":[3],"adopted":[4],"the":[5,30,39,43,52,57,63,114,120,130,135,152,161,184,193],"bag":[6,138],"of":[7,12,23,66,88,116,123,144,167,195],"words":[8,67],"(BOW)":[9],"model":[10,55],"representation":[11],"documents,":[13],"in":[14,46,169],"which":[15],"each":[16],"document":[17,84,107],"is":[18,36],"represented":[19],"as":[20,38,85],"a":[21,78,83,86,106,142],"vector":[22],"weighted":[24],"terms":[25],"or":[26,70,92],"n-grams,":[27],"and":[28,49],"then":[29],"cosine":[31],"distance":[32],"between":[33,164],"two":[34,117,165],"vectors":[35],"used":[37],"similarity":[40,115],"measurement.":[41],"Although":[42],"great":[44],"success":[45],"information":[47,103],"retrieval":[48],"categorization,":[51],"conventional":[53],"BOW":[54],"ignores":[56],"detailed":[58],"local":[59,89,96,102,126,136,181],"information,":[61],"i.e.":[62],"co-occurrence":[64],"pattern":[65],"at":[68,183],"sentence":[69],"paragraph":[71],"level.":[72],"In":[73],"this":[74],"paper,":[75],"we":[76,94,111,133],"propose":[77],"novel":[79],"approach":[80],"to":[81,159],"represent":[82],"set":[87],"tf-idf":[90],"vectors,":[91],"what":[93],"called":[95],"word":[97,137],"bags":[98,182],"(LWB).":[99],"By":[100],"encapsulating":[101],"distributed":[104],"around":[105],"into":[108],"multiple":[109],"LWBs,":[110],"can":[112],"measure":[113],"documents":[118],"via":[119],"partial":[121,162],"match":[122,146],"their":[124],"corresponding":[125],"bags.":[127],"To":[128],"perform":[129],"matching":[131,163],"efficiently,":[132],"introduce":[134],"kernel":[139,150],"(LWB":[140],"kernel),":[141],"variant":[143],"VG-Pyramid":[145],"kernel.":[147],"The":[148],"new":[149,197],"enables":[151],"discriminative":[153],"machine":[154],"learning":[155],"methods":[156],"like":[157],"SVM":[158],"compute":[160],"sets":[166],"LWBs":[168],"linear":[170],"time":[171,175],"after":[172],"an":[173],"one":[174],"hierarchical":[176],"clustering":[177],"procedure":[178],"over":[179],"all":[180],"initialization":[185],"stage.":[186],"Experiments":[187],"on":[188],"real":[189],"world":[190],"datasets":[191],"demonstrate":[192],"effectiveness":[194],"our":[196],"approach.":[198]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2128644237","counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2016,"cited_by_count":7}],"updated_date":"2024-12-08T10:42:41.322472","created_date":"2016-06-24"}