{"id":"https://openalex.org/W4393247592","doi":"https://doi.org/10.48550/arxiv.2403.16169","title":"Gaze-guided Hand-Object Interaction Synthesis: Benchmark and Method","display_name":"Gaze-guided Hand-Object Interaction Synthesis: Benchmark and Method","publication_year":2024,"publication_date":"2024-03-24","ids":{"openalex":"https://openalex.org/W4393247592","doi":"https://doi.org/10.48550/arxiv.2403.16169"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.16169","pdf_url":"https://arxiv.org/pdf/2403.16169","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.16169","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112284258","display_name":"Jie Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114950263","display_name":"Lingxiao Yang","orcid":"https://orcid.org/0000-0001-8642-9614"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Lingxiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105471683","display_name":"Ran Ji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Ran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102015139","display_name":"Yuexin Ma","orcid":"https://orcid.org/0000-0001-7237-988X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Yuexin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100777698","display_name":"Lan Xu","orcid":"https://orcid.org/0000-0002-8807-7787"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Lan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101500646","display_name":"Jingyi Yu","orcid":"https://orcid.org/0000-0001-9198-6853"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Jingyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032717078","display_name":"Shi Ye","orcid":"https://orcid.org/0000-0002-1354-0650"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Ye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100639522","display_name":"Jingya Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jingya","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.998288,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":84,"max":93},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Gesture Recognition in Human-Computer Interaction","score":0.9979,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Gesture Recognition in Human-Computer Interaction","score":0.9979,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Eye Tracking in Human-Computer Interaction","score":0.9957,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10914","display_name":"Tactile Perception and Cross-modal Plasticity","score":0.9891,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7403139}],"concepts":[{"id":"https://openalex.org/C2779916870","wikidata":"https://www.wikidata.org/wiki/Q14467155","display_name":"Gaze","level":2,"score":0.8617727},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7403139},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6158038},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5902645},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.53105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5216277},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.51665},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.35126132},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.1679256},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.10590184}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.16169","pdf_url":"https://arxiv.org/pdf/2403.16169","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2403.16169","pdf_url":"https://arxiv.org/pdf/2403.16169","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4321353415","https://openalex.org/W4240909707","https://openalex.org/W3207760378","https://openalex.org/W3014378845","https://openalex.org/W2745001401","https://openalex.org/W2385108104","https://openalex.org/W2378211422","https://openalex.org/W2130974462","https://openalex.org/W2059546927","https://openalex.org/W1880689012"],"abstract_inverted_index":{"Gaze":[0],"plays":[1],"a":[2,57,74,115,120,205],"crucial":[3],"role":[4],"in":[5],"revealing":[6],"human":[7,18,35],"attention":[8],"and":[9,48,53,72,92,107,139,237],"intention,":[10],"shedding":[11],"light":[12],"on":[13,119,179,192],"the":[14,26,32,39,45,65,129,150,161,168,183,193,211,215,221,232,238],"cognitive":[15],"processes":[16],"behind":[17],"actions.":[19],"The":[20],"integration":[21],"of":[22,28,34,41,89,102,110,146,164,175,234,240],"gaze":[23,134,180],"guidance":[24],"with":[25,98],"dynamics":[27],"hand-object":[29,80,122,165],"interactions":[30],"boosts":[31],"accuracy":[33],"motion":[36,170,185],"prediction.":[37],"However,":[38],"lack":[40],"datasets":[42],"that":[43],"capture":[44],"intricate":[46],"relationship":[47],"consistency":[49],"among":[50],"gaze,":[51,90],"hand,":[52,91],"object":[54,93,169,176,195],"movements":[55],"remains":[56],"substantial":[58],"hurdle.":[59],"In":[60,128,214],"this":[61],"paper,":[62],"we":[63,132,203,219],"introduce":[64,204],"first":[66],"Gaze-guided":[67],"Hand-Object":[68],"Interaction":[69],"dataset,":[70,83],"GazeHOI,":[71,84],"present":[73],"novel":[75],"task":[76],"for":[77],"synthesizing":[78],"gaze-guided":[79,121],"interactions.":[81],"Our":[82,228],"features":[85,138],"simultaneous":[86],"3D":[87],"modeling":[88],"interactions,":[94],"comprising":[95],"479":[96],"sequences":[97,174],"an":[99],"average":[100],"duration":[101],"19.1":[103],"seconds,":[104],"812":[105],"sub-sequences,":[106],"33":[108],"objects":[109],"various":[111],"sizes.":[112],"We":[113],"propose":[114],"hierarchical":[116],"framework":[117],"centered":[118],"interaction":[123],"diffusion":[124,151,155,171,186],"model,":[125],"named":[126],"GHO-Diffusion.":[127],"pre-diffusion":[130],"phase,":[131,152,218],"separate":[133],"conditions":[135,142],"into":[136],"spatial-temporal":[137],"goal":[140,200],"pose":[141,201],"at":[143],"different":[144],"levels":[145],"information":[147],"granularity.":[148],"During":[149],"two":[153],"gaze-conditioned":[154],"models":[156],"are":[157],"stacked":[158],"to":[159,209],"simplify":[160],"complex":[162],"synthesis":[163],"motions.":[166],"Here,":[167],"model":[172,187],"generates":[173],"motions":[177,190,224],"based":[178,191],"conditions,":[181],"while":[182],"hand":[184,189,223],"produces":[188],"generated":[194,222],"motion.":[196],"To":[197],"improve":[198],"fine-grained":[199],"alignment,":[202],"Spherical":[206],"Gaussian":[207],"constraint":[208],"guide":[210],"denoising":[212],"step.":[213],"subsequent":[216],"post-diffusion":[217],"optimize":[220],"using":[225],"contact":[226],"consistency.":[227],"extensive":[229],"experiments":[230],"highlight":[231],"uniqueness":[233],"our":[235,241],"dataset":[236],"effectiveness":[239],"approach.":[242]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4393247592","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2024-12-04T10:15:52.730342","created_date":"2024-03-28"}