iBet uBet web content aggregator. Adding the entire web to your favor.
iBet uBet web content aggregator. Adding the entire web to your favor.



Link to original content: https://api.openalex.org/works/doi:10.1109/SLT48900.2021.9383573
{"id":"https://openalex.org/W3144366453","doi":"https://doi.org/10.1109/slt48900.2021.9383573","title":"Detecting Expressions with Multimodal Transformers","display_name":"Detecting Expressions with Multimodal Transformers","publication_year":2021,"publication_date":"2021-01-19","ids":{"openalex":"https://openalex.org/W3144366453","doi":"https://doi.org/10.1109/slt48900.2021.9383573","mag":"3144366453"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt48900.2021.9383573","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2012.00063","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103091951","display_name":"Srinivas Parthasarathy","orcid":"https://orcid.org/0000-0001-8928-9821"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Srinivas Parthasarathy","raw_affiliation_strings":["Amazon"],"affiliations":[{"raw_affiliation_string":"Amazon","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113697636","display_name":"Shiva Sundaram","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Shiva Sundaram","raw_affiliation_strings":["Amazon"],"affiliations":[{"raw_affiliation_string":"Amazon","institution_ids":["https://openalex.org/I4210089985"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.832,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.997886,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"636","last_page":"643"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.998,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5662282},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.55040914},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.5291648},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.45824525},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.41050678}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7830244},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.59388256},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.57715046},{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.5742462},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5662282},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.55040914},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.5291648},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.47729552},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.47353575},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46256125},{"id":"https://openalex.org/C2779916870","wikidata":"https://www.wikidata.org/wiki/Q14467155","display_name":"Gaze","level":2,"score":0.45896554},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.45824525},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.43460637},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.41050678},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34728524},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.19880277},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.10960892},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt48900.2021.9383573","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2012.00063","pdf_url":"https://arxiv.org/pdf/2012.00063","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2012.00063","pdf_url":"https://arxiv.org/pdf/2012.00063","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.46,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":56,"referenced_works":["https://openalex.org/W147964346","https://openalex.org/W1501669607","https://openalex.org/W1964469912","https://openalex.org/W1965947362","https://openalex.org/W1974210421","https://openalex.org/W1978262206","https://openalex.org/W1997060370","https://openalex.org/W2003502731","https://openalex.org/W2074788634","https://openalex.org/W2075688085","https://openalex.org/W2078671978","https://openalex.org/W2103184652","https://openalex.org/W2110885456","https://openalex.org/W2144005487","https://openalex.org/W2145310492","https://openalex.org/W2149628368","https://openalex.org/W2156140659","https://openalex.org/W2156503193","https://openalex.org/W2156848952","https://openalex.org/W2158705122","https://openalex.org/W2217426128","https://openalex.org/W2244142460","https://openalex.org/W2325939864","https://openalex.org/W2610961739","https://openalex.org/W2625297138","https://openalex.org/W2648194195","https://openalex.org/W2655404332","https://openalex.org/W2745497104","https://openalex.org/W2747172199","https://openalex.org/W2767415038","https://openalex.org/W2798536775","https://openalex.org/W2805662932","https://openalex.org/W2885005742","https://openalex.org/W2886300652","https://openalex.org/W2889100420","https://openalex.org/W2896457183","https://openalex.org/W2901836079","https://openalex.org/W2963252191","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963709343","https://openalex.org/W2972602947","https://openalex.org/W2980182114","https://openalex.org/W3004939989","https://openalex.org/W3011853006","https://openalex.org/W3034266838","https://openalex.org/W3034751874","https://openalex.org/W3035299099","https://openalex.org/W3106250896","https://openalex.org/W3114214226","https://openalex.org/W3122081138","https://openalex.org/W3126471623","https://openalex.org/W3128420506","https://openalex.org/W4214724298","https://openalex.org/W4288102735","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4298396513","https://openalex.org/W2999894541","https://openalex.org/W2795961259","https://openalex.org/W2417260800","https://openalex.org/W2291113340","https://openalex.org/W2283130723","https://openalex.org/W2117933979","https://openalex.org/W2098987383","https://openalex.org/W2082717302","https://openalex.org/W1596203174"],"abstract_inverted_index":{"Developing":[0],"machine":[1],"learning":[2],"algorithms":[3,59],"to":[4,81,148],"understand":[5],"person-to-person":[6],"engagement":[7,49],"can":[8],"result":[9],"in":[10,52],"natural":[11],"user":[12],"experiences":[13],"for":[14,60,101,127,159],"communal":[15],"devices":[16],"such":[17,24],"as":[18,25,44],"Amazon":[19],"Alexa.":[20],"Among":[21],"other":[22],"cues":[23],"voice":[26,39],"activity":[27],"and":[28,40,129],"gaze,":[29],"a":[30,53],"person's":[31],"audio-visual":[32,61,70,99],"expression":[33,42,161],"that":[34,76,96,110],"includes":[35],"tone":[36],"of":[37,48,63,84,146,155],"the":[38,85,90,106,111,153,156,160,164],"facial":[41],"serves":[43],"an":[45,69],"implicit":[46],"signal":[47],"between":[50],"parties":[51],"dialog.":[54],"This":[55],"study":[56],"investigates":[57],"deep-learning":[58],"detection":[62,162],"user's":[64],"expression.":[65],"We":[66],"first":[67],"implement":[68],"baseline":[71,117],"model":[72],"with":[73,93,119,122,144],"recurrent":[74,120],"layers":[75,95,121],"shows":[77,109],"competitive":[78],"results":[79],"compared":[80],"current":[82],"state":[83],"art.":[86],"Next,":[87],"we":[88],"propose":[89],"transformer":[91],"architecture":[92,118],"encoder":[94],"better":[97,115],"integrate":[98],"features":[100],"expressions":[102],"tracking.":[103],"Performance":[104],"on":[105,141,163],"Aff-Wild2":[107,165],"database":[108],"proposed":[112],"methods":[113],"perform":[114],"than":[116],"absolute":[123],"gains":[124,145],"approximately":[125],"2%":[126],"arousal":[128],"valence":[130],"descriptors.":[131],"Further,":[132],"multimodal":[133],"architectures":[134],"show":[135,152],"significant":[136],"improvements":[137],"over":[138],"models":[139],"trained":[140],"single":[142],"modalities":[143],"up":[147],"3.6%.":[149],"Ablation":[150],"studies":[151],"significance":[154],"visual":[157],"modality":[158],"database.":[166]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3144366453","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":3}],"updated_date":"2024-12-09T23:58:35.210073","created_date":"2021-04-13"}