{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T20:08:31Z","timestamp":1729627711672,"version":"3.28.0"},"reference-count":27,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2007,10]]},"DOI":"10.1109\/icdm.2007.21","type":"proceedings-article","created":{"date-parts":[[2008,3,19]],"date-time":"2008-03-19T14:33:38Z","timestamp":1205937218000},"page":"3-12","source":"Crossref","is-referenced-by-count":48,"title":["How Much Noise Is Too Much: A Study in Automatic Text Classification"],"prefix":"10.1109","author":[{"given":"Sumeet","family":"Agarwal","sequence":"first","affiliation":[]},{"given":"Shantanu","family":"Godbole","sequence":"additional","affiliation":[]},{"given":"Diwakar","family":"Punjani","sequence":"additional","affiliation":[]},{"given":"Shourya","family":"Roy","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"year":"1998","author":"joachims","journal-title":"Advances in Kernel Methods Support Vector Machines","article-title":"Making large-scale support vector machine learning practical","key":"ref10"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1145\/146370.146380"},{"key":"ref12","article-title":"Binary codes capable of correcting deletions, insertions, and reversals","volume":"8","author":"levenshtein","year":"1966","journal-title":"Technical Report"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.3115\/112405.112471"},{"year":"2003","author":"lewis","journal-title":"3rd Workshop on Operational Text Classification (OTC) in Conjunction with SIGKDD","key":"ref14"},{"year":"1996","author":"mccallum","journal-title":"Bow A Toolkit for Statistical Language Modeling Text Retrieval Classification and Clustering","key":"ref15"},{"key":"ref16","first-page":"1091","article-title":"Semantic annotation of unstructured and ungrammatical text","author":"michelson","year":"2005","journal-title":"Proc of IJCAI"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.3115\/974147.974191"},{"key":"ref18","first-page":"103","article-title":"Ocr correction based on document level knowledge","author":"nartker","year":"2003","journal-title":"Proc Symp Electronic Imaging Science and Technology"},{"year":"2007","author":"nasukawa","journal-title":"Proc of AND07 Workshop in conjunction with IJCAI","article-title":"Adding sentence boundaries to conversational speech transcriptions using noisily labeled examples","key":"ref19"},{"year":"2007","author":"choudhury","journal-title":"Proc of AND07 Workshop in conjunction with IJCAI","article-title":"Investigation and modeling of the structure of texting language","key":"ref4"},{"year":"2000","author":"zhang","journal-title":"Proc of ICML","article-title":"A probability analysis on the value of unlabeled data for classification problems","key":"ref27"},{"key":"ref3","article-title":"Latent dirichlet allocation","volume":"14","author":"blei","year":"2002","journal-title":"Proc of NIPS"},{"year":"2000","author":"duda","journal-title":"Pattern Classification","key":"ref6"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1145\/363958.363994"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/ICASSP.2003.1198860"},{"key":"ref7","first-page":"771","article-title":"A short introduction to boosting","volume":"14","author":"freund","year":"1999","journal-title":"Journal of Japan Society for AI"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1006\/cviu.1998.0687"},{"year":"1999","author":"hofmann","journal-title":"Proc Of UAI","article-title":"Probabilistic latent semantic analysis","key":"ref9"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/ICASSP.1995.479268"},{"year":"1999","author":"nigam","journal-title":"Proc of IJCAI-99 Workshop on Machine Learning for Information Filtering","article-title":"Using maximum entropy for text classification","key":"ref20"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.3115\/1220175.1220268"},{"year":"1993","author":"rabiner","journal-title":"Fundamentals of speech recognition","key":"ref21"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1109\/ICPR.2004.1334303"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.3115\/1613984.1614006"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1145\/860435.860471"},{"year":"2006","author":"vlachos","journal-title":"EACL 2006 Workshop on Adaptive Text Extraction and Mining","article-title":"Active annotation","key":"ref25"}],"event":{"name":"Seventh IEEE International Conference on Data Mining (ICDM 2007)","start":{"date-parts":[[2007,10,28]]},"location":"Omaha, NE, USA","end":{"date-parts":[[2007,10,31]]}},"container-title":["Seventh IEEE International Conference on Data Mining (ICDM 2007)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4470209\/4470210\/04470224.pdf?arnumber=4470224","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,16]],"date-time":"2017-03-16T18:10:53Z","timestamp":1489687853000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4470224\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,10]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/icdm.2007.21","relation":{},"subject":[],"published":{"date-parts":[[2007,10]]}}}