{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,3,8]],"date-time":"2024-03-08T02:39:39Z","timestamp":1709865579947},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2018,4,30]],"date-time":"2018-04-30T00:00:00Z","timestamp":1525046400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2018,7]]},"DOI":"10.1007\/s11227-018-2391-9","type":"journal-article","created":{"date-parts":[[2018,4,30]],"date-time":"2018-04-30T15:11:25Z","timestamp":1525101085000},"page":"3415-3440","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Load balancing in reducers for skewed data in MapReduce systems by using scalable simple random sampling"],"prefix":"10.1007","volume":"74","author":[{"given":"Elaheh","family":"Gavagsaz","sequence":"first","affiliation":[]},{"given":"Ali","family":"Rezaee","sequence":"additional","affiliation":[]},{"given":"Hamid","family":"Haj Seyyed Javadi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,4,30]]},"reference":[{"key":"2391_CR1","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1016\/j.csi.2017.01.004","volume":"54","author":"J Akoka","year":"2017","unstructured":"Akoka J, Comyn-Wattiau I, Laoufi N (2017) Research on big data\u2014a systematic mapping study. Comput Stand Interfaces 54:105\u2013115. \n https:\/\/doi.org\/10.1016\/j.csi.2017.01.004","journal-title":"Comput Stand Interfaces"},{"issue":"3","key":"2391_CR2","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1016\/j.bushor.2017.01.002","volume":"60","author":"A Alharthi","year":"2017","unstructured":"Alharthi A, Krotov V, Bowman M (2017) Addressing barriers to big data. Bus Horizons 60(3):285\u2013292. \n https:\/\/doi.org\/10.1016\/j.bushor.2017.01.002","journal-title":"Bus Horizons"},{"issue":"3","key":"2391_CR3","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1109\/TETC.2014.2330519","volume":"2","author":"A Fahad","year":"2014","unstructured":"Fahad A, Alshatri N, Tari Z, Alamri A, Khalil I, Zomaya AY, Foufou S, Bouras A (2014) A survey of clustering algorithms for big data: taxonomy and empirical analysis. IEEE Trans Emerg Top Comput 2(3):267\u2013279. \n https:\/\/doi.org\/10.1109\/TETC.2014.2330519","journal-title":"IEEE Trans Emerg Top Comput"},{"issue":"3","key":"2391_CR4","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1016\/j.bushor.2017.01.004","volume":"60","author":"I Lee","year":"2017","unstructured":"Lee I (2017) Big data: dimensions, evolution, impacts, and challenges. Bus Horizons 60(3):293\u2013303. \n https:\/\/doi.org\/10.1016\/j.bushor.2017.01.004","journal-title":"Bus Horizons"},{"key":"2391_CR5","unstructured":"Big Data (2018) \n https:\/\/en.wikipedia.org\/wiki\/Big_data"},{"key":"2391_CR6","doi-asserted-by":"crossref","unstructured":"Wu H (2017) Big data management the mass weather logs. In: Smart Computing and Communication, pp 122\u2013132","DOI":"10.1007\/978-3-319-52015-5_13"},{"key":"2391_CR7","doi-asserted-by":"publisher","first-page":"167","DOI":"10.5121\/ijdps.2012.3113","volume":"3","author":"M Vaidya","year":"2012","unstructured":"Vaidya M (2012) Parallel processing of cluster by MapReduce. Int J Distrib Parallel Syst 3:167\u2013179. \n https:\/\/doi.org\/10.5121\/ijdps.2012.3113","journal-title":"Int J Distrib Parallel Syst"},{"issue":"2","key":"2391_CR8","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1016\/j.compeleceng.2013.07.001","volume":"40","author":"Y Xu","year":"2014","unstructured":"Xu Y, Qu W, Li Z, Liu Z, Ji C, Li Y, Li H (2014) Balancing reducer workload for skewed data using sampling-based partitioning. Comput Electr Eng 40(2):675\u2013687. \n https:\/\/doi.org\/10.1016\/j.compeleceng.2013.07.001","journal-title":"Comput Electr Eng"},{"key":"2391_CR9","doi-asserted-by":"publisher","unstructured":"Gufler B, Augsten N, Reiser A, Kemper A (2012) Load balancing in MapReduce based on scalable cardinality estimates. In: IEEE 28th International Conference on Data Engineering, pp 522\u2013533. \n https:\/\/doi.org\/10.1109\/icde.2012.58","DOI":"10.1109\/icde.2012.58"},{"key":"2391_CR10","unstructured":"Meng X (2013) Scalable simple random sampling and stratified sampling. In: Proceedings of the 30th International Conference on International Conference on Machine Learning, Vol. 28, pp III-531\u2013III-539"},{"key":"2391_CR11","unstructured":"DeWitt DJ, Naughton JF, Schneider DA, Seshadri S (1992) Practical skew handling in parallel joins. In: Proceedings of the 18th International Conference on Very Large Data Bases, pp 27\u201340"},{"issue":"12","key":"2391_CR12","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1109\/71.250116","volume":"4","author":"JW Stamos","year":"1993","unstructured":"Stamos JW, Young HC (1993) A symmetric fragment and replicate algorithm for distributed joins. IEEE Trans Parallel Distrib Syst 4(12):1345\u20131354. \n https:\/\/doi.org\/10.1109\/71.250116","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"2391_CR13","doi-asserted-by":"publisher","unstructured":"Le Y, Liu J, Erg\u00fcn F, Wang D (2014) Online load balancing for MapReduce with skewed data input. In: IEEE Conference on Computer Communications IEEE INFOCOM 2014, pp 2004\u20132012. \n https:\/\/doi.org\/10.1109\/infocom.2014.6848141","DOI":"10.1109\/infocom.2014.6848141"},{"issue":"1","key":"2391_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2830544.2830546","volume":"17","author":"D Karapiperis","year":"2015","unstructured":"Karapiperis D, Verykios VS (2015) Load-balancing the distance computations in record linkage. SIGKDD Explor Newsl 17(1):1\u20137. \n https:\/\/doi.org\/10.1145\/2830544.2830546","journal-title":"SIGKDD Explor Newsl"},{"key":"2391_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2017.03.013","author":"J Li","year":"2017","unstructured":"Li J, Liu Y, Pan J, Zhang P, Chen W, Wang L (2017) Map-balance-reduce: an improved parallel programming model for load balancing of MapReduce. Future Gener Comput Syst. \n https:\/\/doi.org\/10.1016\/j.future.2017.03.013","journal-title":"Future Gener Comput Syst"},{"key":"2391_CR16","unstructured":"Vu L, Alaghband G (2015) A load balancing parallel method for frequent pattern mining on multi-core cluster. In: Proceedings of the Symposium on High Performance Computing, pp 49\u201358"},{"key":"2391_CR17","doi-asserted-by":"publisher","unstructured":"Kwon Y, Balazinska M, Howe B, Rolia J (2010) Skew-resistant parallel processing of feature-extracting scientific user-defined functions. In: Proceedings of the 1st ACM Symposium on Cloud Computing, pp 75\u201386. \n https:\/\/doi.org\/10.1145\/1807128.1807140","DOI":"10.1145\/1807128.1807140"},{"key":"2391_CR18","doi-asserted-by":"publisher","unstructured":"Ramakrishnan SR, Swart G, Urmanov A (2012) Balancing reducer skew in MapReduce work-loads using progressive sampling. In: Proceedings of the Third ACM Symposium on Cloud Computing, pp 1\u201314. \n https:\/\/doi.org\/10.1145\/2391229.2391245","DOI":"10.1145\/2391229.2391245"},{"key":"2391_CR19","unstructured":"Gufler B, Augsten N, Reiser A, Kemper A (2011) Handling data skew in MapReduce. In: Proceedings of the 1st International Conference on Cloud Computing and Services Science, CLOSER 2011, pp 1\u20136"},{"key":"2391_CR20","doi-asserted-by":"publisher","unstructured":"Ibrahim S, Jin H, Lu L, Wu S, He B, Qi L (2010) LEEN: locality\/fairness-aware key partitioning for MapReduce in the Cloud. In: 2010 IEEE Second International Conference on Cloud Computing Technology and Science, pp 17\u201324. \n https:\/\/doi.org\/10.1109\/cloudcom.2010.25","DOI":"10.1109\/cloudcom.2010.25"},{"key":"2391_CR21","doi-asserted-by":"publisher","unstructured":"Kwon Y, Balazinska M, Howe B, Rolia J (2012) SkewTune: mitigating skew in mapreduce applications. In: Proceedings of the 2012 ACM SIGMOD International Conference on Management of Data, pp 25\u201336. \n https:\/\/doi.org\/10.1145\/2213836.2213840","DOI":"10.1145\/2213836.2213840"},{"key":"2391_CR22","doi-asserted-by":"publisher","unstructured":"Martha VS, Zhao W, Xu X (2013) h-MapReduce: a framework for workload balancing in MapReduce. In: 2013 IEEE 27th International Conference on Advanced Information Networking and Applications (AINA), pp 637\u2013644. \n https:\/\/doi.org\/10.1109\/aina.2013.48","DOI":"10.1109\/aina.2013.48"},{"issue":"9","key":"2391_CR23","doi-asserted-by":"publisher","first-page":"2520","DOI":"10.1109\/TPDS.2014.2350972","volume":"26","author":"Q Chen","year":"2015","unstructured":"Chen Q, Yao J, Xiao Z (2015) LIBRA: lightweight data skew mitigation in MapReduce. IEEE Trans Parallel Distrib Syst 26(9):2520\u20132533. \n https:\/\/doi.org\/10.1109\/TPDS.20-14.2350972","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"2391_CR24","doi-asserted-by":"publisher","unstructured":"Xu Y, Zou P, Qu W, Li Z, Li K, Cui X (2012) Sampling-based partitioning in MapReduce for skewed data. In: 2012 Seventh China Grid Annual Conference, pp 1\u20138. \n https:\/\/doi.org\/10.1109\/chinagrid.2012.18","DOI":"10.1109\/chinagrid.2012.18"},{"key":"2391_CR25","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1016\/j.future.2016.06.027","volume":"78","author":"Z Tang","year":"2018","unstructured":"Tang Z, Zhang X, Li K, Li K (2018) An intermediate data placement algorithm for load balancing in Spark computing environment. Future Gener Comput Syst 78:287\u2013301. \n https:\/\/doi.org\/10.1016\/j.future.2016.06.027","journal-title":"Future Gener Comput Syst"},{"key":"2391_CR26","volume-title":"Probability and statistics for engineering and the sciences","author":"JL Devore","year":"2011","unstructured":"Devore JL (2011) Probability and statistics for engineering and the sciences. Nelson Education, Scarborough"},{"key":"2391_CR27","unstructured":"Estimating a Proportion for a small, finite population (2018) \n https:\/\/onlinecourses.science.psu.edu\/stat414\/node\/264"},{"key":"2391_CR28","volume-title":"Probability statistics for engineers and scientists","author":"REMRH Walpole","year":"2011","unstructured":"Walpole REMRH, Myers SL, Ye K (2011) Probability statistics for engineers and scientists. Pearson Prentice Hall, Upper Saddle River"},{"issue":"1","key":"2391_CR29","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1145\/3147.3165","volume":"11","author":"JS Vitter","year":"1985","unstructured":"Vitter JS (1985) Random sampling with a reservoir. ACM Trans Math Softw 11(1):37\u201357. \n https:\/\/doi.org\/10.1145\/3147.3165","journal-title":"ACM Trans Math Softw"},{"issue":"3","key":"2391_CR30","doi-asserted-by":"publisher","first-page":"261","DOI":"10.2307\/2346966","volume":"26","author":"AB Sunter","year":"1977","unstructured":"Sunter AB (1977) List sequential sampling with equal or unequal probabilities without placement. J R Stat Soc Ser C (Appl Stat) 26(3):261\u2013268. \n https:\/\/doi.org\/10.2307\/2346966","journal-title":"J R Stat Soc Ser C (Appl Stat)"},{"issue":"4","key":"2391_CR31","doi-asserted-by":"publisher","first-page":"448","DOI":"10.1016\/S0022-0000(73)80033-9","volume":"7","author":"M Blum","year":"1973","unstructured":"Blum M, Floyd RW, Pratt V, Rivest RL, Tarjan RE (1973) Time bounds for selection. J Comput Syst Sci 7(4):448\u2013461. \n https:\/\/doi.org\/10.1016\/S0022-0000(73)80033-9","journal-title":"J Comput Syst Sci"},{"key":"2391_CR32","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1016\/S0167-5060(08)70356-X","volume-title":"Annals of discrete mathematics","author":"RL Graham","year":"1979","unstructured":"Graham RL, Lawler EL, Lenstra JK, Kan AHGR (1979) Optimization and approximation in deterministic sequencing and scheduling: a survey. In: Hammer PL, Johnson EL, Korte BH (eds) Annals of discrete mathematics, vol 5. Elsevier, Amsterdam, pp 287\u2013326. \n https:\/\/doi.org\/10.1016\/S0167-5060(08)70356-X"},{"issue":"2","key":"2391_CR33","doi-asserted-by":"publisher","first-page":"416","DOI":"10.1137\/0117039","volume":"17","author":"R Graham","year":"1969","unstructured":"Graham R (1969) Bounds on multiprocessing timing anomalies. SIAM J Appl Math 17(2):416\u2013429. \n https:\/\/doi.org\/10.1137\/0117039","journal-title":"SIAM J Appl Math"},{"key":"2391_CR34","volume-title":"Algorithm design","author":"J Kleinberg","year":"2006","unstructured":"Kleinberg J, Tardos \u00c9 (2006) Algorithm design. Pearson\/Addison-Wesley, Boston"},{"key":"2391_CR35","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511921735","volume-title":"The design of approximation algorithms","author":"DP Williamson","year":"2011","unstructured":"Williamson DP, Shmoys DB (2011) The design of approximation algorithms. Cambridge University Press, Cambridge"},{"key":"2391_CR36","unstructured":"Jimmy L (2009) The curse of Zipf and limits to parallelization: a look at the stragglers problem in MapReduce. In: Proceedings of LSDS-IR Workshop"},{"key":"2391_CR37","volume-title":"Human behavior and the principle of least effort: an introduction to human ecology","author":"GK Zipf","year":"1949","unstructured":"Zipf GK (1949) Human behavior and the principle of least effort: an introduction to human ecology. Addison-Wesley Press, Boston"},{"key":"2391_CR38","unstructured":"Apache Spark Examples (2017) \n https:\/\/spark.apache.org\/examples.html"},{"key":"2391_CR39","unstructured":"Range Partitioner (2017) \n https:\/\/spark.apache.org\/docs\/2.0.0\/api\/java\/org\/apache\/spark\/RangePartitioner.html"},{"issue":"7066","key":"2391_CR40","doi-asserted-by":"publisher","first-page":"1200","DOI":"10.1136\/bmj.313.7066.1200","volume":"313","author":"DG Altman","year":"1996","unstructured":"Altman DG, Bland JM (1996) Statistics notes: detecting skewness from summary information. BMJ 313(7066):1200","journal-title":"BMJ"},{"key":"2391_CR41","doi-asserted-by":"publisher","unstructured":"Khatami Z, Hong S, Lee J, Depner S, Chafi H, Ramanujam J, Kaiser H (2017) A load-balanced parallel and distributed sorting algorithm implemented with PGX.D. In: 2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp 1317\u20131324. \n https:\/\/doi.org\/10.1109\/IPDPSW.2017.30","DOI":"10.1109\/IPDPSW.2017.30"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-018-2391-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-018-2391-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-018-2391-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,4,30]],"date-time":"2019-04-30T12:48:59Z","timestamp":1556628539000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-018-2391-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4,30]]},"references-count":41,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2018,7]]}},"alternative-id":["2391"],"URL":"http:\/\/dx.doi.org\/10.1007\/s11227-018-2391-9","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,4,30]]},"assertion":[{"value":"30 April 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}