iBet uBet web content aggregator. Adding the entire web to your favor.
iBet uBet web content aggregator. Adding the entire web to your favor.



Link to original content: https://api.openalex.org/works/doi:10.1109/ICASSP40776.2020.9053915
{"id":"https://openalex.org/W3015478688","doi":"https://doi.org/10.1109/icassp40776.2020.9053915","title":"Transformer-Based Text-to-Speech with Weighted Forced Attention","display_name":"Transformer-Based Text-to-Speech with Weighted Forced Attention","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015478688","doi":"https://doi.org/10.1109/icassp40776.2020.9053915","mag":"3015478688"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053915","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022943637","display_name":"Takuma Okamoto","orcid":"https://orcid.org/0000-0001-9913-4647"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takuma Okamoto","raw_affiliation_strings":["National Institute of Information and Communications Technology, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078330211","display_name":"Tomoki Toda","orcid":"https://orcid.org/0000-0001-8146-1279"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomoki Toda","raw_affiliation_strings":["Information Technology Center, Nagoya University, Japan"],"affiliations":[{"raw_affiliation_string":"Information Technology Center, Nagoya University, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109350986","display_name":"Yoshinori Shiga","orcid":null},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshinori Shiga","raw_affiliation_strings":["National Institute of Information and Communications Technology, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114514387","display_name":"Hisashi Kawai","orcid":"https://orcid.org/0000-0002-0914-5092"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hisashi Kawai","raw_affiliation_strings":["National Institute of Information and Communications Technology, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Japan","institution_ids":["https://openalex.org/I90023481"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.745,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.839389,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":87,"max":88},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition Technology","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition Technology","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Audio Signal Classification and Analysis","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Statistical Machine Translation and Natural Language Processing","score":0.9973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker Verification","score":0.537499},{"id":"https://openalex.org/keywords/syntax-based-translation-models","display_name":"Syntax-based Translation Models","score":0.531718},{"id":"https://openalex.org/keywords/acoustic-modeling","display_name":"Acoustic Modeling","score":0.520895},{"id":"https://openalex.org/keywords/language-modeling","display_name":"Language Modeling","score":0.519826},{"id":"https://openalex.org/keywords/end-to-end-speech-recognition","display_name":"End-to-End Speech Recognition","score":0.519475},{"id":"https://openalex.org/keywords/fundamental-frequency","display_name":"Fundamental frequency","score":0.42913848}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7506044},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7105489},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.632706},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.58463436},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.55430883},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5462238},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.48808348},{"id":"https://openalex.org/C10513763","wikidata":"https://www.wikidata.org/wiki/Q1331774","display_name":"Fundamental frequency","level":2,"score":0.42913848},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3582213},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.17893025},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.15329486},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.1017783},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053915","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Gender equality","score":0.45,"id":"https://metadata.un.org/sdg/5"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":33,"referenced_works":["https://openalex.org/W2049686551","https://openalex.org/W2169711598","https://openalex.org/W2294797155","https://openalex.org/W2394662942","https://openalex.org/W2508073380","https://openalex.org/W2519091744","https://openalex.org/W2584032004","https://openalex.org/W2749651610","https://openalex.org/W2767052532","https://openalex.org/W2901997113","https://openalex.org/W2903739847","https://openalex.org/W2936103087","https://openalex.org/W2938102059","https://openalex.org/W2942807473","https://openalex.org/W2946200149","https://openalex.org/W2963300588","https://openalex.org/W2963403868","https://openalex.org/W2963609956","https://openalex.org/W2963691546","https://openalex.org/W2963736842","https://openalex.org/W2963945466","https://openalex.org/W2964121744","https://openalex.org/W2964243274","https://openalex.org/W2970730223","https://openalex.org/W2970971581","https://openalex.org/W2972569067","https://openalex.org/W2972789651","https://openalex.org/W2972831865","https://openalex.org/W2972885185","https://openalex.org/W3007328579","https://openalex.org/W3007859642","https://openalex.org/W4295312788","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W844516635","https://openalex.org/W4385573012","https://openalex.org/W4304195479","https://openalex.org/W3162414783","https://openalex.org/W3161109662","https://openalex.org/W2973019895","https://openalex.org/W2892009249","https://openalex.org/W2888778920","https://openalex.org/W2097900004","https://openalex.org/W1550041424"],"abstract_inverted_index":{"This":[0],"paper":[1],"investigates":[2],"state-of-the-art":[3],"Transformer-":[4],"and":[5,41,62,68,106,127],"FastSpeech-based":[6],"high-fidelity":[7],"neural":[8],"text-to-speech":[9],"(TTS)":[10],"with":[11,48,73,118],"full-context":[12],"label":[13],"input":[14],"for":[15],"pitch":[16],"accent":[17],"languages.":[18],"The":[19,97],"aim":[20],"is":[21,56,94],"to":[22,58],"realize":[23],"faster":[24],"training":[25],"than":[26],"conventional":[27,92],"Tacotron-based":[28,34],"models.":[29],"Introducing":[30],"phoneme":[31,54,86],"durations":[32,55,87],"into":[33],"TTS":[35],"models":[36],"improves":[37,134],"both":[38,65],"synthesis":[39,60,135,146],"quality":[40],"stability.":[42,147],"Therefore,":[43],"a":[44,74,80,102,119],"Transformer-based":[45],"acoustic":[46],"model":[47],"weighted":[49,140],"forced":[50,69,116,141],"attention":[51,67,70,117,142],"obtained":[52],"from":[53,132],"proposed":[57,113,139],"improve":[59,145],"accuracy":[61],"stability,":[63],"where":[64],"encoder-decoder":[66],"are":[71,88],"used":[72],"weighting":[75,120],"factor.":[76],"Furthermore,":[77],"FastSpeech":[78,133],"without":[79],"duration":[81,130],"predictor,":[82],"in":[83],"which":[84],"the":[85,107,112,129,138],"predicted":[89],"by":[90],"another":[91],"model,":[93],"also":[95],"investigated.":[96],"results":[98],"of":[99,122],"experiments":[100],"using":[101,115],"Japanese":[103],"female":[104],"corpus":[105],"WaveGlow":[108],"vocoder":[109],"indicate":[110],"that":[111],"Transformer":[114],"factor":[121],"0.5":[123],"outperforms":[124],"other":[125],"models,":[126],"removing":[128],"predictor":[131],"quality,":[136],"although":[137],"does":[143],"not":[144]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3015478688","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":3}],"updated_date":"2024-11-07T17:06:35.342437","created_date":"2020-04-17"}