{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T11:24:15Z","timestamp":1726485855746},"reference-count":51,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2022,10,17]],"date-time":"2022-10-17T00:00:00Z","timestamp":1665964800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100005153","name":"China National Funds for Distinguished Young Scientists","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100005153","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002367","name":"Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["67125105"],"id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Applied Earth Observation and Geoinformation"],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1016\/j.jag.2022.103071","type":"journal-article","created":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T11:55:43Z","timestamp":1667476543000},"page":"103071","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":10,"special_numbering":"C","title":["MCRN: A Multi-source Cross-modal Retrieval Network for remote sensing"],"prefix":"10.1016","volume":"115","author":[{"ORCID":"http:\/\/orcid.org\/0000-0003-2150-4067","authenticated-orcid":false,"given":"Zhiqiang","family":"Yuan","sequence":"first","affiliation":[]},{"given":"Wenkai","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Changyuan","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Yongqiang","family":"Mao","sequence":"additional","affiliation":[]},{"given":"Ruixue","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Hongqi","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Kun","family":"Fu","sequence":"additional","affiliation":[]},{"given":"Xian","family":"Sun","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"3","key":"10.1016\/j.jag.2022.103071_b1","doi-asserted-by":"crossref","first-page":"405","DOI":"10.3390\/rs12030405","article-title":"TextRS: Deep bidirectional triplet network for matching text to remote sensing images","volume":"12","author":"Abdullah","year":"2020","journal-title":"Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b2","article-title":"Comprehensively analyzing optical and polarimetric SAR features for land-use\/land-cover classification and urban vegetation extraction in highly-dense urban area","volume":"103","author":"Bai","year":"2021","journal-title":"Int. J. Appl. Earth Obs. Geoinf."},{"issue":"2","key":"10.1016\/j.jag.2022.103071_b3","doi-asserted-by":"crossref","first-page":"1144","DOI":"10.1109\/TGRS.2017.2760909","article-title":"Multilabel remote sensing image retrieval using a semisupervised graph-theoretic method","volume":"56","author":"Chaudhuri","year":"2018","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b4","series-title":"International Conference on Machine Learning","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"Chen","year":"2020"},{"year":"2020","series-title":"Big self-supervised models are strong semi-supervised learners","author":"Chen","key":"10.1016\/j.jag.2022.103071_b5"},{"issue":"10","key":"10.1016\/j.jag.2022.103071_b6","doi-asserted-by":"crossref","first-page":"7049","DOI":"10.1109\/TGRS.2020.2979273","article-title":"Deep cross-modal image voice retrieval in remote sensing","volume":"58","author":"Chen","year":"2020","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b7","doi-asserted-by":"crossref","first-page":"4284","DOI":"10.1109\/JSTARS.2021.3070872","article-title":"A deep semantic alignment network for the cross-modal image-text retrieval in remote sensing","volume":"14","author":"Cheng","year":"2021","journal-title":"IEEE J. Sel. Top. Appl. Earth Observ. Remote Sens."},{"issue":"12","key":"10.1016\/j.jag.2022.103071_b8","doi-asserted-by":"crossref","first-page":"7405","DOI":"10.1109\/TGRS.2016.2601622","article-title":"Learning rotation-invariant convolutional neural networks for object detection in VHR optical remote sensing images","volume":"54","author":"Cheng","year":"2016","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"year":"2018","series-title":"Voxceleb2: Deep speaker recognition","author":"Chung","key":"10.1016\/j.jag.2022.103071_b9"},{"issue":"5","key":"10.1016\/j.jag.2022.103071_b10","doi-asserted-by":"crossref","first-page":"1431","DOI":"10.1109\/36.718847","article-title":"Spatial information retrieval from remote-sensing images, I. Information theoretical perspective","volume":"36","author":"Datcu","year":"1998","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b11","series-title":"2017 IEEE 60th International Midwest Symposium on Circuits and Systems","first-page":"1597","article-title":"Gate-variants of gated recurrent unit (GRU) neural networks","author":"Dey","year":"2017"},{"key":"10.1016\/j.jag.2022.103071_b12","series-title":"2017 IEEE International Conference on Computer Vision","first-page":"2070","article-title":"Multi-task self-supervised visual learning","author":"Doersch","year":"2017"},{"year":"2017","series-title":"Vse++: Improving visual-semantic embeddings with hard negatives","author":"Faghri","key":"10.1016\/j.jag.2022.103071_b13"},{"issue":"1","key":"10.1016\/j.jag.2022.103071_b14","first-page":"2030","article-title":"Domain-adversarial training of neural networks","volume":"17","author":"Ganin","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.jag.2022.103071_b15","article-title":"Generative adversarial nets","volume":"27","author":"Goodfellow","year":"2014","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.jag.2022.103071_b16","series-title":"2019 IEEE\/CVF International Conference on Computer Vision","first-page":"6390","article-title":"Scaling and benchmarking self-supervised visual representation learning","author":"Goyal","year":"2019"},{"year":"2020","series-title":"Bootstrap your own latent: A new approach to self-supervised learning","author":"Grill","key":"10.1016\/j.jag.2022.103071_b17"},{"issue":"11","key":"10.1016\/j.jag.2022.103071_b18","doi-asserted-by":"crossref","first-page":"4644","DOI":"10.1109\/JSTARS.2019.2949220","article-title":"Jointly learning of visual and auditory: A new approach for RS image and audio cross-modal retrieval","volume":"12","author":"Guo","year":"2019","journal-title":"IEEE J. Sel. Top. Appl. Earth Obs. Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b19","doi-asserted-by":"crossref","unstructured":"He,\u00a0K., Zhang,\u00a0X., Ren,\u00a0S., Sun,\u00a0J., 2016. Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"issue":"3","key":"10.1016\/j.jag.2022.103071_b20","doi-asserted-by":"crossref","first-page":"1047","DOI":"10.1109\/TCYB.2018.2879846","article-title":"MHTN: Modal-adversarial hybrid transfer network for cross-modal retrieval","volume":"50","author":"Huang","year":"2018","journal-title":"IEEE Trans. Cybern."},{"article-title":"Deep visual-semantic alignments for generating image descriptions","year":"2015","series-title":"Computer Vision & Pattern Recognition","author":"Karpathy","key":"10.1016\/j.jag.2022.103071_b21"},{"year":"2017","series-title":"Unsupervised machine translation using monolingual corpora only","author":"Lample","key":"10.1016\/j.jag.2022.103071_b22"},{"key":"10.1016\/j.jag.2022.103071_b23","doi-asserted-by":"crossref","unstructured":"Lee,\u00a0K.H., Chen,\u00a0X., Hua,\u00a0G., Hu,\u00a0H., He,\u00a0X., 2018. Stacked cross attention for image-text matching. In: Proceedings of the European Conference on Computer Vision. ECCV, pp. 201\u2013216).","DOI":"10.1007\/978-3-030-01225-0_13"},{"issue":"11","key":"10.1016\/j.jag.2022.103071_b24","doi-asserted-by":"crossref","first-page":"6521","DOI":"10.1109\/TGRS.2018.2839705","article-title":"Learning source-invariant deep hashing convolutional neural networks for cross-source remote sensing image retrieval","volume":"56","author":"Li","year":"2018","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"issue":"2","key":"10.1016\/j.jag.2022.103071_b25","doi-asserted-by":"crossref","first-page":"950","DOI":"10.1109\/TGRS.2017.2756911","article-title":"Large-scale remote sensing image retrieval by deep hashing neural networks","volume":"56","author":"Li","year":"2018","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b26","doi-asserted-by":"crossref","unstructured":"Lin,\u00a0R., Xiao,\u00a0J., Fan,\u00a0J., 2018. Nextvlad: An efficient neural network to aggregate frame-level features for large-scale video classification. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops.","DOI":"10.1007\/978-3-030-11018-5_19"},{"issue":"4","key":"10.1016\/j.jag.2022.103071_b27","doi-asserted-by":"crossref","first-page":"3420","DOI":"10.1109\/TGRS.2020.3007533","article-title":"Deep hash learning for remote sensing image retrieval","volume":"59","author":"Liu","year":"2021","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"year":"2018","series-title":"Efficient low-rank multimodal fusion with modality-specific factors","author":"Liu","key":"10.1016\/j.jag.2022.103071_b28"},{"issue":"19","key":"10.1016\/j.jag.2022.103071_b29","doi-asserted-by":"crossref","first-page":"4834","DOI":"10.3390\/rs14194834","article-title":"Diffusion model with detail complement for super-resolution of remote sensing","volume":"14","author":"Liu","year":"2022","journal-title":"Remote Sens."},{"issue":"4","key":"10.1016\/j.jag.2022.103071_b30","doi-asserted-by":"crossref","first-page":"2183","DOI":"10.1109\/TGRS.2017.2776321","article-title":"Exploring models and data for remote sensing image caption generation","volume":"56","author":"Lu","year":"2017","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b31","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1016\/j.isprsjprs.2022.03.019","article-title":"Beyond single receptive field: A receptive field fusion-and-stratification network for airborne laser scanning point cloud classification","volume":"188","author":"Mao","year":"2022","journal-title":"ISPRS J. Photogramm. Remote Sens."},{"year":"2022","series-title":"Bidirectional feature globalization for few-shot semantic segmentation of 3D point cloud scenes","author":"Mao","key":"10.1016\/j.jag.2022.103071_b32"},{"key":"10.1016\/j.jag.2022.103071_b33","series-title":"2018 10th IAPR Workshop on Pattern Recognition in Remote Sensing","first-page":"1","article-title":"Deep cross-modal retrieval for remote sensing image and audio","author":"Mao","year":"2018"},{"year":"2018","series-title":"Representation learning with contrastive predictive coding","author":"Oord","key":"10.1016\/j.jag.2022.103071_b34"},{"key":"10.1016\/j.jag.2022.103071_b35","first-page":"1","article-title":"Historical information-guided class-incremental semantic segmentation in remote sensing images","volume":"60","author":"Rong","year":"2022","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"issue":"1","key":"10.1016\/j.jag.2022.103071_b36","doi-asserted-by":"crossref","first-page":"248","DOI":"10.1109\/TGRS.2016.2604680","article-title":"Structure tensor Riemannian statistical models for CBIR and classification of remote sensing images","volume":"55","author":"Rosu","year":"2017","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b37","series-title":"IGARSS 2018-2018 IEEE International Geoscience and Remote Sensing Symposium","first-page":"4539","article-title":"Deep metric and hash-code learning for content-based retrieval of remote sensing images","author":"Roy","year":"2018"},{"issue":"5","key":"10.1016\/j.jag.2022.103071_b38","doi-asserted-by":"crossref","first-page":"2288","DOI":"10.1109\/36.868886","article-title":"Interactive learning and probabilistic retrieval in remote sensing image archives","volume":"38","author":"Schroder","year":"2000","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b39","series-title":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"4779","article-title":"Natural TTS synthesis by conditioning wavenet on mel spectrogram predictions","author":"Shen","year":"2018"},{"key":"10.1016\/j.jag.2022.103071_b40","article-title":"Asymmetric hash code learning for remote sensing image retrieval","volume":"60","author":"Song","year":"2022","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.jag.2022.103071_b41","article-title":"Joint optic disc and cup segmentation based on multi-scale feature analysis and attention pyramid architecture for glaucoma screening","volume":"103","author":"Sun","year":"2021","journal-title":"Neural Comput. Appl."},{"year":"2019","series-title":"BERT rediscovers the classical NLP pipeline","author":"Tenney","key":"10.1016\/j.jag.2022.103071_b42"},{"key":"10.1016\/j.jag.2022.103071_b43","first-page":"30","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"10.1016\/j.jag.2022.103071_b44","doi-asserted-by":"crossref","unstructured":"Wang,\u00a0Z., Liu,\u00a0X., Li,\u00a0H., Sheng,\u00a0L., Yan,\u00a0J., Wang,\u00a0X., Shao,\u00a0J., 2019. Camp: Cross-modal adaptive message passing for text-image retrieval. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 5764\u20135773).","DOI":"10.1109\/ICCV.2019.00586"},{"key":"10.1016\/j.jag.2022.103071_b45","series-title":"2021 IEEE International Geoscience and Remote Sensing Symposium","first-page":"2855","article-title":"Cross-modal feature fusion retrieval for remote sensing image-voice retrieval","author":"Yang","year":"2021"},{"key":"10.1016\/j.jag.2022.103071_b46","series-title":"2021 6th International Conference on Computational Intelligence and Applications","first-page":"149","article-title":"Speech emotion recognition based on secondary feature reconstruction","author":"Yuan","year":"2021"},{"key":"10.1016\/j.jag.2022.103071_b47","doi-asserted-by":"crossref","DOI":"10.1109\/TGRS.2022.3207171","article-title":"Learning to evaluate performance of multi-modal semantic localization","author":"Yuan","year":"2022","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"journal-title":"IEEE Trans. Geosci. Remote Sens.","article-title":"A lightweight multi-scale crossmodal text-image retrieval method in remote sensing","year":"2022","author":"Yuan","key":"10.1016\/j.jag.2022.103071_b48"},{"journal-title":"IEEE Trans. Geosci. Remote Sens.","article-title":"Remote sensing cross-modal text-image retrieval based on global and local information","year":"2022","author":"Yuan","key":"10.1016\/j.jag.2022.103071_b49"},{"journal-title":"IEEE Trans. Geosci. Remote Sens.","article-title":"Exploring a fine-grained multi-scale method for cross-modal remote sensing image retrieval","year":"2022","author":"Yuan","key":"10.1016\/j.jag.2022.103071_b50"},{"issue":"9","key":"10.1016\/j.jag.2022.103071_b51","doi-asserted-by":"crossref","first-page":"390","DOI":"10.3390\/ijgi8090390","article-title":"Using vehicle synthesis generative adversarial networks to improve vehicle detection in remote sensing images","volume":"8","author":"Zheng","year":"2019","journal-title":"ISPRS Int. J. Geo-Inf."}],"container-title":["International Journal of Applied Earth Observation and Geoinformation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156984322200259X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156984322200259X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,5,25]],"date-time":"2024-05-25T04:43:05Z","timestamp":1716612185000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S156984322200259X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12]]},"references-count":51,"alternative-id":["S156984322200259X"],"URL":"https:\/\/doi.org\/10.1016\/j.jag.2022.103071","relation":{},"ISSN":["1569-8432"],"issn-type":[{"type":"print","value":"1569-8432"}],"subject":[],"published":{"date-parts":[[2022,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"MCRN: A Multi-source Cross-modal Retrieval Network for remote sensing","name":"articletitle","label":"Article Title"},{"value":"International Journal of Applied Earth Observation and Geoinformation","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.jag.2022.103071","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2022 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"103071"}}