{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T11:00:01Z","timestamp":1726484401426},"reference-count":70,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,1]],"date-time":"2022-07-01T00:00:00Z","timestamp":1656633600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Image and Vision Computing"],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1016\/j.imavis.2022.104474","type":"journal-article","created":{"date-parts":[[2022,5,17]],"date-time":"2022-05-17T16:45:30Z","timestamp":1652805930000},"page":"104474","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":5,"special_numbering":"C","title":["STCA: Utilizing a spatio-temporal cross-attention network for enhancing video person re-identification"],"prefix":"10.1016","volume":"123","author":[{"given":"Amran","family":"Bhuiyan","sequence":"first","affiliation":[]},{"given":"Jimmy Xiangji","family":"Huang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"article-title":"An improved deep learning architecture for person re-identification","year":"2015","series-title":"In CVPR","author":"Ahmed","key":"10.1016\/j.imavis.2022.104474_bb0005"},{"key":"10.1016\/j.imavis.2022.104474_bb0010","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2019.07.009","article-title":"Improved person re-identification based on saliency and semantic parsing with deep neural network models","volume":"92","author":"Quispe","year":"2019","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2022.104474_bb0015","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2020.103970","article-title":"Person search: new paradigm of person re-identification: a survey and outlook of recent works","volume":"101","author":"Islam","year":"2020","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2022.104474_bb0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2020.103875","article-title":"Person re-identification with expanded neighborhoods distance re-ranking","volume":"95","author":"Lv","year":"2020","journal-title":"Image Vis. Comput."},{"article-title":"Deep residual learning for image recognition","year":"2016","series-title":"In CVPR","author":"He","key":"10.1016\/j.imavis.2022.104474_bb0025"},{"author":"Hermans","key":"10.1016\/j.imavis.2022.104474_bb0030"},{"key":"10.1016\/j.imavis.2022.104474_bb0035","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"402","article-title":"Part-aligned bilinear representations for person re-identification","author":"Lee","year":"2018"},{"article-title":"Bag of tricks and a strong baseline for deep person re-identification","year":"2019","series-title":"CVPRWK","author":"Luo","key":"10.1016\/j.imavis.2022.104474_bb0040"},{"key":"10.1016\/j.imavis.2022.104474_bb0045","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"1179","article-title":"Mask-guided contrastive attention model for person re-identification","author":"Song","year":"2018"},{"key":"10.1016\/j.imavis.2022.104474_bb2000","doi-asserted-by":"crossref","unstructured":"Zhaohui Liang, Andrew Powell, Ilker Ersoy, Mahdieh Poostchi, Kamol- rat Silamut, Kannappan Palaniappan, Peng Guo, Md Amir Hossain, Antani Sameer, Richard James Maude, et al. Cnn-based image anal- ysis for malaria diagnosis. In 2016 IEEE international conference on bioinformatics and biomedicine (BIBM), pages 493\u2013496. IEEE, 2016.","DOI":"10.1109\/BIBM.2016.7822567"},{"key":"10.1016\/j.imavis.2022.104474_bb3000","doi-asserted-by":"crossref","unstructured":"Zhaohui Liang, Gang Zhang, Jimmy Xiangji Huang, and Qm- ming Vivian Hu. Deep learning for healthcare decision making with emrs. In 2014 IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pages 556\u2013559. IEEE, 2014.","DOI":"10.1109\/BIBM.2014.6999219"},{"key":"10.1016\/j.imavis.2022.104474_bb0050","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"1983","article-title":"A two stream siamese convolutional neural network for person re-identification","author":"Chung","year":"2017"},{"article-title":"Gated siamese convolutional neural network architecture for human re-identification","year":"2016","series-title":"ECCV","author":"Rahul Rama Varior","key":"10.1016\/j.imavis.2022.104474_bb0055"},{"key":"10.1016\/j.imavis.2022.104474_bb0060","series-title":"The IEEE Winter Conference on Applications of Computer Vision","first-page":"2675","article-title":"Pose guided gated fusion for person re-identification","author":"Bhuiyan","year":"2020"},{"author":"Gao","key":"10.1016\/j.imavis.2022.104474_bb0065"},{"key":"10.1016\/j.imavis.2022.104474_bb0070","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"8287","article-title":"Sta: Spatial-temporal attention for large-scale video-based person re-identification","volume":"33","author":"Yang","year":"2019"},{"key":"10.1016\/j.imavis.2022.104474_bb0075","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"7183","article-title":"Vrstc: Occlusion-free video person re-identification","author":"Hou","year":"2019"},{"key":"10.1016\/j.imavis.2022.104474_bb0080","series-title":"Asian Conference on Computer Vision","first-page":"620","article-title":"Video-based person re-identification via 3d convolutional networks and non-local attention","author":"Liao","year":"2018"},{"key":"10.1016\/j.imavis.2022.104474_bb0085","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"1169","article-title":"Video person re-identification with competitive snippet-similarity aggregation and co-attentive snippet embedding","author":"Chen","year":"2018"},{"key":"10.1016\/j.imavis.2022.104474_bb0090","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"8618","article-title":"Multi-scale 3d convolution network for video based person re-identification","volume":"33","author":"Li","year":"2019"},{"key":"10.1016\/j.imavis.2022.104474_bb0095","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"10407","article-title":"Multi-granularity reference-aided attentive feature aggregation for video-based person re-identification","author":"Zhang","year":"2020"},{"year":"2020","series-title":"Appearance-Preserving 3d Convolution for Video-Based Person Re-Identification","author":"Xinqian","key":"10.1016\/j.imavis.2022.104474_bb0100"},{"key":"10.1016\/j.imavis.2022.104474_bb0105","doi-asserted-by":"crossref","first-page":"104246","DOI":"10.1016\/j.imavis.2021.104246","article-title":"Flow guided mutual attention for person re-identification","volume":"113","author":"Kiran","year":"2021","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2022.104474_bb0110","first-page":"104356","article-title":"Spatial temporal and channel aware network for video-based person re-identification","author":"Hui","year":"2021","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2022.104474_bb0115","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"562","article-title":"Co-segmentation inspired attention networks for video-based person re-identification","author":"Subramaniam","year":"2019"},{"first-page":"2899","article-title":"Learning multi-granular hypergraphs for video-based person re-identification","year":"2020","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","key":"10.1016\/j.imavis.2022.104474_bb0120"},{"key":"10.1016\/j.imavis.2022.104474_bb0125","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"369","article-title":"Diversity regularized spatiotemporal attention for video-based person re-identification","author":"Li","year":"2018"},{"article-title":"Beyond part models: Person retrieval with refined part pooling (and a strong convolutional baseline)","year":"2018","series-title":"ECCV","author":"Sun","key":"10.1016\/j.imavis.2022.104474_bb0130"},{"article-title":"Auto-reid: Searching for a part-aware convnet for person re-identification","year":"2019","series-title":"ICCV","author":"Quan","key":"10.1016\/j.imavis.2022.104474_bb0135"},{"year":"2019","series-title":"Mixed high-order attention network for person re-identification","author":"Chen","key":"10.1016\/j.imavis.2022.104474_bb0140"},{"article-title":"Re-identification with consistent attentive siamese networks","year":"2019","series-title":"CVPR","author":"Zheng","key":"10.1016\/j.imavis.2022.104474_bb0145"},{"article-title":"Aanet: Attribute attention network for person re-identifications","year":"2019","series-title":"CVPR","author":"Tay","key":"10.1016\/j.imavis.2022.104474_bb0150"},{"article-title":"Densely semantically aligned person re-identification","year":"2019","series-title":"CVPR","author":"Zhang","key":"10.1016\/j.imavis.2022.104474_bb0155"},{"article-title":"Unsupervised domain adaptation in the dissimilarity space for person re-identification","year":"2020","series-title":"ECCV","author":"Mekhazni","key":"10.1016\/j.imavis.2022.104474_bb0160"},{"key":"10.1016\/j.imavis.2022.104474_bb0165","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1325","article-title":"Recurrent convolutional network for video-based person re-identification","author":"McLaughlin","year":"2016"},{"article-title":"Spatio-temporal lstm with trust gates for 3d human action recognition","year":"2016","series-title":"ECCV","author":"Liu","key":"10.1016\/j.imavis.2022.104474_bb0170"},{"issue":"10","key":"10.1016\/j.imavis.2022.104474_bb0175","doi-asserted-by":"crossref","first-page":"2788","DOI":"10.1109\/TCSVT.2017.2715499","article-title":"Video-based person re-identification with accumulative motion context","volume":"28","author":"Liu","year":"2017","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.imavis.2022.104474_bb0180","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"7794","article-title":"Non-local neural networks","author":"Wang","year":"2018"},{"key":"10.1016\/j.imavis.2022.104474_bb0185","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"4913","article-title":"Attribute-driven feature disentangling and temporal aggregation for video person re-identification","author":"Yiru Zhao","year":"2019"},{"first-page":"5363","article-title":"Dual attention matching network for context-aware feature sequence based person re-identification","year":"2018","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","key":"10.1016\/j.imavis.2022.104474_bb0190"},{"key":"10.1016\/j.imavis.2022.104474_bb0195","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"4733","article-title":"Jointly attentive spatial-temporal pooling networks for video-based person re-identification","author":"Xu","year":"2017"},{"key":"10.1016\/j.imavis.2022.104474_bb0200","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"4370","article-title":"Spatial-temporal correlation and topology learning for person re-identification in videos","author":"Liu","year":"2021"},{"key":"10.1016\/j.imavis.2022.104474_bb0205","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"1490","article-title":"Dense interaction learning for video-based person re-identification","author":"Hua","year":"2021"},{"key":"10.1016\/j.imavis.2022.104474_bb0210","doi-asserted-by":"crossref","first-page":"424","DOI":"10.1016\/j.neucom.2022.03.032","article-title":"Relation-based global-partial feature learning network for video-based person re-identification","volume":"488","author":"Yang","year":"2022","journal-title":"Neurocomputing"},{"key":"10.1016\/j.imavis.2022.104474_bb0215","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1016\/j.neucom.2021.10.018","article-title":"What-where-when attention network for video-based person re-identification","volume":"468","author":"Zhang","year":"2022","journal-title":"Neurocomputing"},{"key":"10.1016\/j.imavis.2022.104474_bb0220","doi-asserted-by":"crossref","DOI":"10.1109\/TCSVT.2022.3157130","article-title":"Saliency and granularity: discovering temporal coherence for video-based person re-identification","author":"Chen","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.imavis.2022.104474_bb0225","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"152","article-title":"Spatio-temporal representation factorization for video-based person re-identification","author":"Aich","year":"2021"},{"key":"10.1016\/j.imavis.2022.104474_bb0230","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13334","article-title":"Watching you: global-guided reciprocal learning for video-based person re-identification","author":"Liu","year":"2021"},{"article-title":"Quality aware network for set to set recognition","year":"2017","series-title":"IEEE Conference on Computer Vision and Pattern Recognition","author":"Liu","key":"10.1016\/j.imavis.2022.104474_bb0235"},{"key":"10.1016\/j.imavis.2022.104474_bb0240","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"6776","article-title":"See the forest for the trees: Joint spatial and temporal recurrent neural networks for video-based person re-identification","author":"Zhou","year":"2017"},{"author":"Simonyan","key":"10.1016\/j.imavis.2022.104474_bb0245"},{"key":"10.1016\/j.imavis.2022.104474_bb0250","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"464","article-title":"Two at once: Enhancing learning and generalization capacities via ibn-net","author":"Pan","year":"2018"},{"key":"10.1016\/j.imavis.2022.104474_bb0255","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"7132","article-title":"Squeeze-and-excitation networks","author":"Jie","year":"2018"},{"key":"10.1016\/j.imavis.2022.104474_bb0260","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"2818","article-title":"Rethinking the inception architecture for computer vision","author":"Szegedy","year":"2016"},{"key":"10.1016\/j.imavis.2022.104474_bb0265","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"4489","article-title":"Learning spatiotemporal features with 3d convolutional networks","author":"Tran","year":"2015"},{"key":"10.1016\/j.imavis.2022.104474_bb0270","series-title":"proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"6299","article-title":"Quo vadis, action recognition? A new model and the kinetics dataset","author":"Carreira","year":"2017"},{"key":"10.1016\/j.imavis.2022.104474_bb0275","series-title":"Proceedings of the IEEE conference on Computer Vision and Pattern Recognition","first-page":"6546","article-title":"Can spatiotemporal 3d cnns retrace the history of 2d cnns and imagenet?","author":"Hara","year":"2018"},{"article-title":"Cross attention network for few-shot classification","year":"2019","series-title":"NeurIPS","author":"Hou","key":"10.1016\/j.imavis.2022.104474_bb0280"},{"article-title":"A discriminative feature learning approach for deep face recognition","year":"2016","series-title":"ECCV","author":"Wen","key":"10.1016\/j.imavis.2022.104474_bb0285"},{"article-title":"Mancs: A multi-task attentional network with curriculum sampling for person re-identification","year":"2018","series-title":"In ECCV","author":"Cheng","key":"10.1016\/j.imavis.2022.104474_bb0290"},{"first-page":"868","article-title":"Mars: A video benchmark for large-scale person re-identification","year":"2016","series-title":"European Conference on Computer Vision","key":"10.1016\/j.imavis.2022.104474_bb0295"},{"key":"10.1016\/j.imavis.2022.104474_bb0300","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"5177","article-title":"Exploit the unknown gradually: One-shot video-based person re-identification by stepwise learning","author":"Yu","year":"2018"},{"key":"10.1016\/j.imavis.2022.104474_bb0305","series-title":"European Conference on Computer Vision","first-page":"688","article-title":"Person re-identification by video ranking","author":"Wang","year":"2014"},{"issue":"3","key":"10.1016\/j.imavis.2022.104474_bb0310","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","article-title":"Imagenet large scale visual recognition challenge","volume":"115","author":"Russakovsky","year":"2015","journal-title":"Int. J. Comput. Vis."},{"author":"Kay","key":"10.1016\/j.imavis.2022.104474_bb0315"},{"article-title":"Spatially and temporally efficient non-local attention network for video-based person re-identification","year":"2019","series-title":"BMVC","author":"Liu","key":"10.1016\/j.imavis.2022.104474_bb0320"},{"key":"10.1016\/j.imavis.2022.104474_bb0325","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2020.104068","article-title":"Video-based person re-identification by intra-frame and inter-frame graph neural network","volume":"106","author":"Liu","year":"2021","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2022.104474_bb0330","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"2014","article-title":"Bicnet-tks: Learning efficient spatial-temporal representation for video person re-identification","author":"Hou","year":"2021"},{"key":"10.1016\/j.imavis.2022.104474_bb0335","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"12036","article-title":"Video-based person re-identification with spatial and temporal memory networks","author":"Eom","year":"2021"},{"first-page":"3958","year":"2019","series-title":"In Proceedings of the IEEE International Conference on Computer Vision","author":"Li","key":"10.1016\/j.imavis.2022.104474_bb0340"}],"container-title":["Image and Vision Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885622001032?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885622001032?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,2,12]],"date-time":"2024-02-12T14:01:28Z","timestamp":1707746488000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885622001032"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7]]},"references-count":70,"alternative-id":["S0262885622001032"],"URL":"https:\/\/doi.org\/10.1016\/j.imavis.2022.104474","relation":{},"ISSN":["0262-8856"],"issn-type":[{"type":"print","value":"0262-8856"}],"subject":[],"published":{"date-parts":[[2022,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"STCA: Utilizing a spatio-temporal cross-attention network for enhancing video person re-identification","name":"articletitle","label":"Article Title"},{"value":"Image and Vision Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.imavis.2022.104474","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2022 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"104474"}}