{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T05:40:23Z","timestamp":1730698823062,"version":"3.28.0"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100004731","name":"Natural Science Foundation of Zhejiang Province","doi-asserted-by":"publisher","award":["LQ22F020007,LD24F020005,LDT23F0202,LDT23F02021F02"],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100004731","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62206250,62036009,62276237,62001418,62371421"],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Ten Thousand Talent Program of Zhejiang Province"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680598","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"9970-9979","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Semantic-Aware and Quality-Aware Interaction Network for Blind Video Quality Assessment"],"prefix":"10.1145","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-9025-9520","authenticated-orcid":false,"given":"Jianjun","family":"Xiang","sequence":"first","affiliation":[{"name":"Zhejiang University of Technology, Hangzhou, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-8302-1338","authenticated-orcid":false,"given":"Yuanjie","family":"Dang","sequence":"additional","affiliation":[{"name":"Zhejiang University of Technology, Hangzhou, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-6122-0574","authenticated-orcid":false,"given":"Peng","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University of Technology, Hangzhou, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-2077-9608","authenticated-orcid":false,"given":"Ronghua","family":"Liang","sequence":"additional","affiliation":[{"name":"Zhejiang University of Technology, Hangzhou, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-2555-343X","authenticated-orcid":false,"given":"Ruohong","family":"Huan","sequence":"additional","affiliation":[{"name":"Zhejiang University of Technology, Hangzhou, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-4545-7197","authenticated-orcid":false,"given":"Nan","family":"Gao","sequence":"additional","affiliation":[{"name":"Zhejiang University of Technology, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"International Conference on Machine Learning","volume":"2","author":"Bertasius Gedas","year":"2021","unstructured":"Gedas Bertasius, Heng Wang, and Lorenzo Torresani. 2021. Is space-time attention all you need for video understanding?. In International Conference on Machine Learning, Vol. 2. 4."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3088505"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413717"},{"volume-title":"Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio.","year":"2014","author":"Cho Kyunghyun","key":"e_1_3_2_1_5_1","unstructured":"Kyunghyun Cho, Bart Van Merri\u00ebnboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014. Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2984879"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"volume-title":"International Conference on Learning Representations.","year":"2020","author":"Dosovitskiy Alexey","key":"e_1_3_2_1_8_1","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. 2020. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413502"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2707479"},{"volume-title":"VQEG meeting","year":"2000","author":"Experts Video Quality","key":"e_1_3_2_1_12_1","unstructured":"Video Quality Experts Group et al. 2000. Final report from the video quality experts group on the validation of objective models of video quality assessment. In VQEG meeting, Ottawa, Canada, March, 2000."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/QoMEX.2017.7965673"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2967829"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.167"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10618-020-00710-y"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_14"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2923051"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413845"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611860"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3164467"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351028"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01408-w"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2568752"},{"volume-title":"Proceedings of the Asian Conference on Computer Vision. 2562--2579","year":"2022","author":"Li Zutong","key":"e_1_3_2_1_26_1","unstructured":"Zutong Li and Lei Yang. 2022. DCVQE: A Hierarchical Transformer for Video Quality Assessment. In Proceedings of the Asian Conference on Computer Vision. 2562--2579."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2011.01.005"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/485"},{"volume-title":"Conviqt: Contrastive video quality estimator","year":"2023","author":"Madhusudana Pavan C","key":"e_1_3_2_1_31_1","unstructured":"Pavan C Madhusudana, Neil Birkbeck, Yilin Wang, Balu Adsumilli, and Alan C Bovik. 2023. Conviqt: Contrastive video quality estimator. IEEE Transactions on Image Processing (2023)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2015.2502725"},{"key":"e_1_3_2_1_33_1","volume-title":"CVD2014?A database for evaluating no-reference video quality assessment algorithms. IEEE Transactions on Image Processing","volume":"25","author":"Nuutinen Mikko","year":"2016","unstructured":"Mikko Nuutinen, Toni Virtanen, Mikko Vaahteranoksa, Tero Vuori, Pirkko Oittinen, and Jukka H\u00e4kkinen. 2016. CVD2014?A database for evaluating no-reference video quality assessment algorithms. IEEE Transactions on Image Processing, Vol. 25, 7 (2016), 3073--3086."},{"key":"e_1_3_2_1_34_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2014.2299154"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5946613"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2869673"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548329"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3632178"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3072221"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/OJSP.2021.3090333"},{"key":"e_1_3_2_1_42_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in Neural Information Processing Systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01323"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20068-7_31"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3319332"},{"volume-title":"Discovqa: temporal distortion-content transformers for video quality assessment","year":"2023","author":"Wu Haoning","key":"e_1_3_2_1_46_1","unstructured":"Haoning Wu, Chaofeng Chen, Liang Liao, Jingwen Hou, Wenxiu Sun, Qiong Yan, and Weisi Lin. 2023. Discovqa: temporal distortion-content transformers for video quality assessment. IEEE Transactions on Circuits and Systems for Video Technology (2023)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01843"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611737"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3030049"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00126"},{"volume-title":"STAN: Spatio-Temporal Alignment Network for No-Reference Video Quality Assessment. In International Conference on Artificial Neural Networks. 160--171","year":"2023","author":"Yang Zhengyi","key":"e_1_3_2_1_51_1","unstructured":"Zhengyi Yang, Yuanjie Dang, Jianjun Xiang, and Peng Chen. 2023. STAN: Spatio-Temporal Alignment Network for No-Reference Video Quality Assessment. In International Conference on Artificial Neural Networks. 160--171."},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 14019--14029","year":"2021","author":"Ying Zhenqiang","key":"e_1_3_2_1_52_1","unstructured":"Zhenqiang Ying, Maniratnam Mandal, Deepti Ghadiyaram, and Alan Bovik. 2021. Patch-VQ:'Patching Up'the video quality problem. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 14019--14029."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475368"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2011.2172591"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612023"},{"volume-title":"A spatial-temporal video quality assessment method via comprehensive HVS simulation","year":"2023","author":"Zhang Ao-Xiang","key":"e_1_3_2_1_56_1","unstructured":"Ao-Xiang Zhang, Yuan-Gen Wang, Weixuan Tang, Leida Li, and Sam Kwong. 2023. A spatial-temporal video quality assessment method via comprehensive HVS simulation. IEEE Transactions on Cybernetics (2023)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3061932"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00174"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00137"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680598","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T05:05:42Z","timestamp":1730696742000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680598"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":59,"alternative-id":["10.1145\/3664647.3680598","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680598","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}