{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,23]],"date-time":"2024-09-23T04:06:27Z","timestamp":1727064387627},"publisher-location":"Cham","reference-count":46,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030012397"},{"type":"electronic","value":"9783030012403"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01240-3_37","type":"book-chapter","created":{"date-parts":[[2018,10,6]],"date-time":"2018-10-06T04:36:08Z","timestamp":1538800568000},"page":"609-625","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":61,"title":["Flow-Grounded Spatial-Temporal Video Prediction from Still Images"],"prefix":"10.1007","author":[{"given":"Yijun","family":"Li","sequence":"first","affiliation":[]},{"given":"Chen","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Jimei","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Zhaowen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Ming-Hsuan","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,5]]},"reference":[{"key":"37_CR1","doi-asserted-by":"publisher","first-page":"15276","DOI":"10.1038\/ncomms15276","volume":"8","author":"M Ekman","year":"2017","unstructured":"Ekman, M., Kok, P., de Lange, F.P.: Time-compressed preplay of anticipated events in human primary visual cortex. Nat. Commun. 8, 15276 (2017)","journal-title":"Nat. Commun."},{"unstructured":"Mathieu, M., Couprie, C., LeCun, Y.: Deep multi-scale video prediction beyond mean square error. In: ICLR (2016)","key":"37_CR2"},{"unstructured":"Xingjian, S., Chen, Z., Wang, H., Yeung, D.Y., Wong, W.K., Woo, W.C.: Convolutional LSTM network: a machine learning approach for precipitation nowcasting. In: NIPS (2015)","key":"37_CR3"},{"unstructured":"Villegas, R., Yang, J., Hong, S., Lin, X., Lee, H.: Decomposing motion and content for natural video sequence prediction. In: ICLR (2017)","key":"37_CR4"},{"unstructured":"Denton, E., Birodkar, V.: Unsupervised learning of disentangled representations from video. In: NIPS (2017)","key":"37_CR5"},{"doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3D convolutional networks. In: ICCV (2015)","key":"37_CR6","DOI":"10.1109\/ICCV.2015.510"},{"key":"37_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"835","DOI":"10.1007\/978-3-319-46478-7_51","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Walker","year":"2016","unstructured":"Walker, J., Doersch, C., Gupta, A., Hebert, M.: An uncertain future: forecasting from static images using variational autoencoders. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 835\u2013851. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_51"},{"unstructured":"Xue, T., Wu, J., Bouman, K., Freeman, B.: Visual dynamics: probabilistic future frame synthesis via cross convolutional networks. In: NIPS (2016)","key":"37_CR8"},{"key":"37_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"707","DOI":"10.1007\/978-3-642-15552-9_51","volume-title":"Computer Vision \u2013 ECCV 2010","author":"J Yuen","year":"2010","unstructured":"Yuen, J., Torralba, A.: A data-driven approach for event prediction. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010. LNCS, vol. 6312, pp. 707\u2013720. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15552-9_51"},{"key":"37_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"689","DOI":"10.1007\/978-3-319-10578-9_45","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T Lan","year":"2014","unstructured":"Lan, T., Chen, T.-C., Savarese, S.: A hierarchical representation for future action prediction. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8691, pp. 689\u2013704. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10578-9_45"},{"issue":"2","key":"37_CR11","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1007\/s11263-013-0683-3","volume":"107","author":"M Hoai","year":"2014","unstructured":"Hoai, M., De la Torre, F.: Max-margin early event detectors. IJCV 107(2), 191\u2013202 (2014)","journal-title":"IJCV"},{"key":"37_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1007\/978-3-642-33765-9_15","volume-title":"Computer Vision \u2013 ECCV 2012","author":"KM Kitani","year":"2012","unstructured":"Kitani, K.M., Ziebart, B.D., Bagnell, J.A., Hebert, M.: Activity forecasting. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7575, pp. 201\u2013214. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33765-9_15"},{"doi-asserted-by":"crossref","unstructured":"Vondrick, C., Pirsiavash, H., Torralba, A.: Anticipating visual representations from unlabeled video. In: CVPR (2016)","key":"37_CR13","DOI":"10.1109\/CVPR.2016.18"},{"doi-asserted-by":"crossref","unstructured":"Walker, J., Gupta, A., Hebert, M.: Dense optical flow prediction from a static image. In: ICCV (2015)","key":"37_CR14","DOI":"10.1109\/ICCV.2015.281"},{"unstructured":"Srivastava, N., Mansimov, E., Salakhudinov, R.: Unsupervised learning of video representations using LSTMS. In: ICML (2015)","key":"37_CR15"},{"unstructured":"Oh, J., Guo, X., Lee, H., Lewis, R.L., Singh, S.: Action-conditional video prediction using deep networks in atari games. In: NIPS (2015)","key":"37_CR16"},{"unstructured":"Babaeizadeh, M., Finn, C., Erhan, D., Campbell, R.H., Levine, S.: Stochastic variational video prediction. In: ICLR (2018)","key":"37_CR17"},{"doi-asserted-by":"crossref","unstructured":"Finn, C., Levine, S.: Deep visual foresight for planning robot motion. In: ICRA (2017)","key":"37_CR18","DOI":"10.1109\/ICRA.2017.7989324"},{"unstructured":"Vondrick, C., Pirsiavash, H., Torralba, A.: Generating videos with scene dynamics. In: NIPS (2016)","key":"37_CR19"},{"doi-asserted-by":"crossref","unstructured":"Liang, X., Lee, L., Dai, W., Xing, E.P.: Dual motion GAN for future-flow embedded video prediction. In: ICCV (2017)","key":"37_CR20","DOI":"10.1109\/ICCV.2017.194"},{"doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M.Y., Yang, X., Kautz, J.: Mocogan: decomposing motion and content for video generation. arXiv preprint arXiv:1707.04993 (2017)","key":"37_CR21","DOI":"10.1109\/CVPR.2018.00165"},{"unstructured":"Finn, C., Goodfellow, I., Levine, S.: Unsupervised learning for physical interaction through video prediction. In: NIPS (2016)","key":"37_CR22"},{"doi-asserted-by":"crossref","unstructured":"Vondrick, C., Torralba, A.: Generating the future with adversarial transformers. In: CVPR (2017)","key":"37_CR23","DOI":"10.1109\/CVPR.2017.319"},{"doi-asserted-by":"crossref","unstructured":"Chao, Y.W., Yang, J., Price, B., Cohen, S., Deng, J.: Forecasting human dynamics from static images. In: CVPR (2017)","key":"37_CR24","DOI":"10.1109\/CVPR.2017.388"},{"unstructured":"Villegas, R., Yang, J., Zou, Y., Sohn, S., Lin, X., Lee, H.: Learning to generate long-term future via hierarchical prediction. In: ICML (2017)","key":"37_CR25"},{"issue":"2","key":"37_CR26","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/A:1021669406132","volume":"51","author":"G Doretto","year":"2003","unstructured":"Doretto, G., Chiuso, A., Wu, Y.N., Soatto, S.: Dynamic textures. IJCV 51(2), 91\u2013109 (2003)","journal-title":"IJCV"},{"key":"37_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1007\/978-3-540-24671-8_48","volume-title":"Computer Vision - ECCV 2004","author":"L Yuan","year":"2004","unstructured":"Yuan, L., Wen, F., Liu, C., Shum, H.-Y.: Synthesizing dynamic texture with closed-loop linear dynamic system. In: Pajdla, T., Matas, J. (eds.) ECCV 2004. LNCS, vol. 3022, pp. 603\u2013616. Springer, Heidelberg (2004). https:\/\/doi.org\/10.1007\/978-3-540-24671-8_48"},{"doi-asserted-by":"crossref","unstructured":"Xie, J., Zhu, S.C., Wu, Y.N.: Synthesizing dynamic patterns by spatial-temporal generative convnet. In: CVPR (2017)","key":"37_CR28","DOI":"10.1109\/CVPR.2017.119"},{"unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. In: ICLR (2014)","key":"37_CR29"},{"unstructured":"Sohn, K., Lee, H., Yan, X.: Learning structured output representation using deep conditional generative models. In: NIPS (2015)","key":"37_CR30"},{"unstructured":"Reed, S.E., Zhang, Y., Zhang, Y., Lee, H.: Deep visual analogy-making. In: NIPS (2015)","key":"37_CR31"},{"key":"37_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1007\/978-3-319-46493-0_18","volume-title":"Computer Vision \u2013 ECCV 2016","author":"T Zhou","year":"2016","unstructured":"Zhou, T., Tulsiani, S., Sun, W., Malik, J., Efros, A.A.: View synthesis by appearance flow. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 286\u2013301. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_18"},{"doi-asserted-by":"crossref","unstructured":"Liu, Z., Yeh, R., Tang, X., Liu, Y., Agarwala, A.: Video frame synthesis using deep voxel flow. In: ICCV (2017)","key":"37_CR33","DOI":"10.1109\/ICCV.2017.478"},{"doi-asserted-by":"crossref","unstructured":"Park, E., Yang, J., Yumer, E., Ceylan, D., Berg, A.C.: Transformation-grounded image generation network for novel 3D view synthesis. In: CVPR (2017)","key":"37_CR34","DOI":"10.1109\/CVPR.2017.82"},{"unstructured":"Chopra, S., Hadsell, R., LeCun, Y.: Learning a similarity metric discriminatively, with application to face verification. In: CVPR (2005)","key":"37_CR35"},{"key":"37_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1007\/978-3-319-46475-6_43","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Johnson","year":"2016","unstructured":"Johnson, J., Alahi, A., Fei-Fei, L.: Perceptual losses for real-time style transfer and super-resolution. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 694\u2013711. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_43"},{"unstructured":"Dosovitskiy, A., Brox, T.: Generating images with perceptual similarity metrics based on deep networks. In: NIPS (2016)","key":"37_CR37"},{"unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: ICLR (2015)","key":"37_CR38"},{"issue":"Nov","key":"37_CR39","first-page":"2579","volume":"9","author":"L van der Maaten","year":"2008","unstructured":"van der Maaten, L., Hinton, G.: Visualizing data using t-SNE. JMLR 9(Nov), 2579\u20132605 (2008)","journal-title":"JMLR"},{"unstructured":"Wu, Z., et al.: 3D shapenets: a deep representation for volumetric shapes. In: CVPR (2015)","key":"37_CR40"},{"doi-asserted-by":"crossref","unstructured":"Ranjan, A., Black, M.J.: Optical flow estimation using a spatial pyramid network. In: CVPR (2017)","key":"37_CR41","DOI":"10.1109\/CVPR.2017.291"},{"doi-asserted-by":"crossref","unstructured":"Schuldt, C., Laptev, I., Caputo, B.: Recognizing human actions: a local SVM approach. In: ICPR (2004)","key":"37_CR42","DOI":"10.1109\/ICPR.2004.1334462"},{"doi-asserted-by":"crossref","unstructured":"Gao, R., Xiong, B., Grauman, K.: Im2Flow: motion hallucination from static images for action recognition. arXiv preprint arXiv:1712.04109 (2017)","key":"37_CR43","DOI":"10.1109\/CVPR.2018.00622"},{"doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep networks as a perceptual metric. In: CVPR (2018)","key":"37_CR44","DOI":"10.1109\/CVPR.2018.00068"},{"unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: NIPS (2012)","key":"37_CR45"},{"issue":"4","key":"37_CR46","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1145\/2897824.2925942","volume":"35","author":"YH Tsai","year":"2016","unstructured":"Tsai, Y.H., Shen, X., Lin, Z., Sunkavalli, K., Yang, M.H.: Sky is not the limit: semantic-aware sky replacement. ACM Trans. Graph. 35(4), 149\u2013159 (2016)","journal-title":"ACM Trans. Graph."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01240-3_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,6]],"date-time":"2022-10-06T00:05:28Z","timestamp":1665014728000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01240-3_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012397","9783030012403"],"references-count":46,"URL":"http:\/\/dx.doi.org\/10.1007\/978-3-030-01240-3_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"5 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}