iBet uBet web content aggregator. Adding the entire web to your favor.
iBet uBet web content aggregator. Adding the entire web to your favor.



Link to original content: https://api.openalex.org/works/doi:10.48550/ARXIV.2409.13152
{"id":"https://openalex.org/W4403754993","doi":"https://doi.org/10.48550/arxiv.2409.13152","title":"Leveraging Audio-Only Data for Text-Queried Target Sound Extraction","display_name":"Leveraging Audio-Only Data for Text-Queried Target Sound Extraction","publication_year":2024,"publication_date":"2024-09-19","ids":{"openalex":"https://openalex.org/W4403754993","doi":"https://doi.org/10.48550/arxiv.2409.13152"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.13152","pdf_url":"http://arxiv.org/pdf/2409.13152","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2409.13152","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079146015","display_name":"Kohei Saijo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saijo, Kohei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055580486","display_name":"Janek Ebbers","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ebbers, Janek","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102812631","display_name":"Fran\u00e7ois G. Germain","orcid":"https://orcid.org/0000-0002-8973-5315"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Germain, Fran\u00e7ois G.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008673783","display_name":"Sameer Khurana","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khurana, Sameer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086940921","display_name":"Gordon Wichern","orcid":"https://orcid.org/0000-0002-8597-6795"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wichern, Gordon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5064097430","display_name":"Jonathan Le Roux","orcid":"https://orcid.org/0000-0002-0158-2837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roux, Jonathan Le","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":85},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Audio Signal Classification and Analysis","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Audio Signal Classification and Analysis","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition Technology","score":0.9889,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Statistical Machine Translation and Natural Language Processing","score":0.9627,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/audio-event-detection","display_name":"Audio Event Detection","score":0.651093},{"id":"https://openalex.org/keywords/melody-extraction","display_name":"Melody Extraction","score":0.620934},{"id":"https://openalex.org/keywords/environmental-sound-recognition","display_name":"Environmental Sound Recognition","score":0.602046},{"id":"https://openalex.org/keywords/music-information-retrieval","display_name":"Music Information Retrieval","score":0.600754},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature Extraction","score":0.587589}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6718979},{"id":"https://openalex.org/C203718221","wikidata":"https://www.wikidata.org/wiki/Q491713","display_name":"Sound (geography)","level":2,"score":0.50531965},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41244698},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.05698967},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.13152","pdf_url":"http://arxiv.org/pdf/2409.13152","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.13152","pdf_url":"http://arxiv.org/pdf/2409.13152","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2909726438","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"The":[0,117],"goal":[1],"of":[2,37,43],"text-queried":[3,68,217],"target":[4],"sound":[5,15],"extraction":[6],"(TSE)":[7],"is":[8,24,84,199],"to":[9,26,29,33,59,71,81,85,130,163,165,172,215],"extract":[10],"from":[11,113],"a":[12,14,19,35,87,101,106],"mixture":[13],"source":[16],"specified":[17],"with":[18,196],"natural-language":[20],"caption.":[21],"While":[22,134],"it":[23],"preferable":[25],"have":[27,155],"access":[28],"large-scale":[30],"text-audio":[31,46],"pairs":[32,47],"address":[34],"variety":[36],"text":[38,123,132,143,204],"prompts,":[39],"the":[40,49,67,75,94,114,131,140,153,160],"limited":[41],"number":[42],"available":[44],"high-quality":[45],"hinders":[48],"data":[50,62,76,195,210],"scaling.":[51],"To":[52],"this":[53,55,135,187],"end,":[54],"work":[56,138],"explores":[57],"how":[58],"leverage":[60],"audio-only":[61,194,209],"without":[63],"any":[64],"captions":[65,205],"for":[66],"TSE":[69,107,118,161,218],"task":[70],"potentially":[72],"scale":[73],"up":[74],"amount.":[77],"A":[78],"straightforward":[79],"way":[80],"do":[82],"so":[83],"use":[86],"joint":[88],"audio-text":[89],"embedding":[90,144,197],"model,":[91,99],"such":[92,181],"as":[93,100,182,200,202],"contrastive":[95],"language-audio":[96],"pre-training":[97],"(CLAP)":[98],"query":[102],"encoder":[103],"and":[104,142,175,208],"train":[105],"model":[108,119,162],"using":[109,193,203],"audio":[110,141,166],"embeddings":[111,154],"obtained":[112],"ground-truth":[115],"audio.":[116],"can":[120,184,211],"then":[121],"accept":[122],"queries":[124],"at":[125],"inference":[126],"time":[127],"by":[128],"switching":[129],"encoder.":[133],"approach":[136],"should":[137],"if":[139],"spaces":[145],"in":[146,151],"CLAP":[147],"were":[148],"well":[149],"aligned,":[150],"practice,":[152],"domain-specific":[156],"information":[157],"that":[158,177,192],"causes":[159],"overfit":[164],"queries.":[167],"We":[168],"investigate":[169],"several":[170],"methods":[171,180],"avoid":[173],"overfitting":[174],"show":[176],"simple":[178],"embedding-manipulation":[179],"dropout":[183,198],"effectively":[185,213],"alleviate":[186],"issue.":[188],"Extensive":[189],"experiments":[190],"demonstrate":[191],"effective":[201],"during":[206],"training,":[207],"be":[212],"leveraged":[214],"improve":[216],"models.":[219]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403754993","counts_by_year":[],"updated_date":"2024-11-27T07:00:31.066215","created_date":"2024-10-26"}