{"id":"https://openalex.org/W4383987970","doi":"https://doi.org/10.48550/arxiv.2307.04106","title":"Parametric Depth Based Feature Representation Learning for Object Detection and Segmentation in Bird's Eye View","display_name":"Parametric Depth Based Feature Representation Learning for Object Detection and Segmentation in Bird's Eye View","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4383987970","doi":"https://doi.org/10.48550/arxiv.2307.04106"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.04106","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2307.04106","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100759136","display_name":"Jiayu Yang","orcid":"https://orcid.org/0000-0001-7475-1286"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jiayu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041031140","display_name":"Enze Xie","orcid":"https://orcid.org/0000-0001-6890-1049"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Enze","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101540588","display_name":"Jose M. \u00c1lvarez","orcid":"https://orcid.org/0000-0002-7535-6322"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alvarez, Jose M.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100379983","display_name":"Miaomiao Liu","orcid":"https://orcid.org/0000-0001-6485-3510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Miaomiao","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":69},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Stereo Vision and Depth Estimation","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Stereo Vision and Depth Estimation","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Computational Modeling of Visual Saliency Detection","score":0.9988,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Image Feature Retrieval and Recognition Techniques","score":0.9963,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image Segmentation","score":0.585595},{"id":"https://openalex.org/keywords/video-object-segmentation","display_name":"Video Object Segmentation","score":0.578669},{"id":"https://openalex.org/keywords/depth-estimation","display_name":"Depth Estimation","score":0.555818},{"id":"https://openalex.org/keywords/object-recognition","display_name":"Object Recognition","score":0.545485},{"id":"https://openalex.org/keywords/feature-matching","display_name":"Feature Matching","score":0.542444},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.52660334},{"id":"https://openalex.org/keywords/visibility","display_name":"Visibility","score":0.5000205},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.46545842}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.77113366},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7466829},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6767774},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6118692},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.52660334},{"id":"https://openalex.org/C123403432","wikidata":"https://www.wikidata.org/wiki/Q654068","display_name":"Visibility","level":2,"score":0.5000205},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.480204},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.46545842},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44083667},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12921944},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.04106","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2307.04106","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.04106","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W598185802","https://openalex.org/W4226107239","https://openalex.org/W4220843223","https://openalex.org/W4200176076","https://openalex.org/W2954738200","https://openalex.org/W2392812199","https://openalex.org/W2361471170","https://openalex.org/W2355516524","https://openalex.org/W2025616642","https://openalex.org/W1954972543"],"abstract_inverted_index":{"Recent":[0],"vision-only":[1],"perception":[2],"models":[3],"for":[4,85,100,110,142,178],"autonomous":[5],"driving":[6],"achieved":[7],"promising":[8],"results":[9],"by":[10],"encoding":[11],"multi-view":[12],"image":[13,31,93],"features":[14,32,94,141],"into":[15,33,170,216],"Bird's-Eye-View":[16],"(BEV)":[17],"space.":[18],"A":[19],"critical":[20],"step":[21],"and":[22,149,184,195,228],"the":[23,34,68,91,96,101,119,125,133,139,188,220,232],"main":[24],"bottleneck":[25],"of":[26],"these":[27],"methods":[28,155,241],"is":[29],"transforming":[30],"BEV":[35,134],"coordinate":[36],"frame.":[37,135],"This":[38,172],"paper":[39],"focuses":[40],"on":[41,56,124,225,231,242],"leveraging":[42],"geometry":[43,69],"information,":[44],"such":[45,50,145,181],"as":[46,146,163,182],"depth,":[47],"to":[48,62,71,79,95,132,206],"model":[49],"feature":[51,86,121],"transformation.":[52,87],"Existing":[53,152],"works":[54],"rely":[55],"non-parametric":[57],"depth":[58,82,108,131,193,204],"distribution":[59,83,109],"modeling":[60,84,205],"leading":[61],"significant":[63],"memory":[64],"consumption,":[65],"or":[66],"ignore":[67],"information":[70,169],"address":[72],"this":[73],"problem.":[74,222],"In":[75],"contrast,":[76],"we":[77,117,137],"propose":[78],"use":[80,138],"parametric":[81,107,203],"We":[88,199],"first":[89],"lift":[90],"2D":[92],"3D":[97,120,126],"space":[98,127],"defined":[99],"ego":[102],"vehicle":[103],"via":[104],"a":[105,208],"predicted":[106],"each":[111,114],"pixel":[112],"in":[113],"view.":[115],"Then,":[116],"aggregate":[118],"volume":[122],"based":[123],"occupancy":[128],"derived":[129],"from":[130,159],"Finally,":[136],"transformed":[140],"downstream":[143],"tasks":[144],"object":[147,226],"detection":[148,227],"semantic":[150,153,229],"segmentation.":[151],"segmentation":[154,230],"do":[156,165],"also":[157],"suffer":[158],"an":[160],"hallucination":[161,173,221],"problem":[162],"they":[164],"not":[166],"take":[167],"visibility":[168],"account.":[171],"can":[174,218],"be":[175],"particularly":[176],"problematic":[177],"subsequent":[179],"modules":[180],"control":[183],"planning.":[185],"To":[186],"mitigate":[187,219],"issue,":[189],"our":[190,202,237],"method":[191,238],"provides":[192],"uncertainty":[194],"reliable":[196],"visibility-aware":[197,210],"estimations.":[198],"further":[200],"leverage":[201],"present":[207],"novel":[209],"evaluation":[211],"metric":[212],"that,":[213],"when":[214],"taken":[215],"account,":[217],"Extensive":[223],"experiments":[224],"nuScenes":[233],"datasets":[234],"demonstrate":[235],"that":[236],"outperforms":[239],"existing":[240],"both":[243],"tasks.":[244]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4383987970","counts_by_year":[],"updated_date":"2024-11-22T08:02:52.911812","created_date":"2023-07-12"}