iBet uBet web content aggregator. Adding the entire web to your favor.
iBet uBet web content aggregator. Adding the entire web to your favor.



Link to original content: https://api.crossref.org/works/10.1145/3644815.3644950
{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T05:25:23Z","timestamp":1732253123727,"version":"3.28.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,14]]},"DOI":"10.1145\/3644815.3644950","type":"proceedings-article","created":{"date-parts":[[2024,6,11]],"date-time":"2024-06-11T17:28:38Z","timestamp":1718126918000},"page":"166-171","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["(Why) Is My Prompt Getting Worse? Rethinking Regression Testing for Evolving LLM APIs"],"prefix":"10.1145","author":[{"ORCID":"http:\/\/orcid.org\/0009-0002-3376-179X","authenticated-orcid":false,"given":"Wanqin","family":"Ma","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-5016-7296","authenticated-orcid":false,"given":"Chenyang","family":"Yang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, Pennsylvania, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-4450-4572","authenticated-orcid":false,"given":"Christian","family":"K\u00e4stner","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, Pennsylvania, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3495784"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2702123.2702509"},{"key":"e_1_3_2_1_3_1","unstructured":"Anthropic. 2023. Claude. https:\/\/claude.ai\/"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338906.3340442"},{"key":"e_1_3_2_1_5_1","volume-title":"Lin (Eds.)","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, et al. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 1877--1901."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581268"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Lingjiao Chen Matei Zaharia and James Zou. 2023. How is ChatGPT's behavior changing over time? arXiv:2307.09009 [cs.CL]","DOI":"10.1162\/99608f92.5317da47"},{"volume-title":"Jeffrey Sorensen, Lucas Dixon, Lucy Vasserman, and nithum.","year":"2019","author":"Borkan Daniel","key":"e_1_3_2_1_8_1","unstructured":"cjadams, Daniel Borkan, inversion, Jeffrey Sorensen, Lucas Dixon, Lucy Vasserman, and nithum. 2019. Jigsaw Unintended Bias in Toxicity Classification. https:\/\/kaggle.com\/competitions\/jigsaw-unintended-bias-in-toxicity-classification"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3409688"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME.2019.00051"},{"volume-title":"Domino: Discovering systematic errors with cross-modal embeddings. arXiv preprint arXiv:2203.14960","year":"2022","author":"Eyuboglu Sabri","key":"e_1_3_2_1_11_1","unstructured":"Sabri Eyuboglu, Maya Varma, Khaled Saab, Jean-Benoit Delbrouck, Christopher Lee-Messer, Jared Dunnmon, James Zou, and Christopher R\u00e9. 2022. Domino: Discovering systematic errors with cross-modal embeddings. arXiv preprint arXiv:2203.14960 (2022)."},{"volume-title":"InCoder: A Generative Model for Code Infilling and Synthesis. In The Eleventh International Conference on Learning Representations.","year":"2023","author":"Fried Daniel","key":"e_1_3_2_1_12_1","unstructured":"Daniel Fried, Armen Aghajanyan, Jessy Lin, Sida Wang, Eric Wallace, Freda Shi, Ruiqi Zhong, Scott Yih, Luke Zettlemoyer, and Mike Lewis. 2023. InCoder: A Generative Model for Code Infilling and Synthesis. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_13_1","unstructured":"Google. 2023. Bard. https:\/\/bard.google.com\/chat"},{"key":"e_1_3_2_1_14_1","unstructured":"Google. 2023. Using machine learning to reduce toxicity online. https:\/\/www.perspectiveapi.com\/"},{"volume-title":"Deceiving google's perspective api built for detecting toxic comments. arXiv preprint arXiv:1702.08138","year":"2017","author":"Hosseini Hossein","key":"e_1_3_2_1_15_1","unstructured":"Hossein Hosseini, Sreeram Kannan, Baosen Zhang, and Radha Poovendran. 2017. Deceiving google's perspective api built for detecting toxic comments. arXiv preprint arXiv:1702.08138 (2017)."},{"volume-title":"Challenges and applications of large language models. arXiv preprint arXiv:2307.10169","year":"2023","author":"Kaddour Jean","key":"e_1_3_2_1_16_1","unstructured":"Jean Kaddour, Joshua Harris, Maximilian Mozes, Herbie Bradley, Roberta Raileanu, and Robert McHardy. 2023. Challenges and applications of large language models. arXiv preprint arXiv:2307.10169 (2023)."},{"key":"e_1_3_2_1_17_1","unstructured":"Christian K\u00e4stner. 2022. Machine Learning in Production: From Models to Products."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICWS.2013.48"},{"volume-title":"Article 195 (jan","year":"2023","author":"Liu Pengfei","key":"e_1_3_2_1_19_1","unstructured":"Pengfei Liu, Weizhe Yuan, Jinlan Fu, Zhengbao Jiang, Hiroaki Hayashi, and Graham Neubig. 2023. Pre-Train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing. ACM Comput. Surv. 55, 9, Article 195 (jan 2023), 35 pages."},{"volume-title":"Instruction Position Matters in Sequence Generation with Large Language Models. ArXiv abs\/2308.12097","year":"2023","author":"Liu Yanjun","key":"e_1_3_2_1_20_1","unstructured":"Yanjun Liu, Xianfeng Zeng, Fandong Meng, and Jie Zhou. 2023. Instruction Position Matters in Sequence Generation with Large Language Models. ArXiv abs\/2308.12097 (2023). https:\/\/api.semanticscholar.org\/CorpusID:261076308"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.556"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510111"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Abhishek Mishra. 2019. Machine learning in the AWS cloud: Add intelligence to applications with Amazon Sagemaker and Amazon Rekognition. https:\/\/aws.amazon.com\/rekognition\/","DOI":"10.1002\/9781119556749"},{"volume-title":"Bum Chul Kwon, and Chris Bryan","year":"2023","author":"Mishra Aditi","key":"e_1_3_2_1_24_1","unstructured":"Aditi Mishra, Utkarsh Soni, Anjana Arunkumar, Jinbin Huang, Bum Chul Kwon, and Chris Bryan. 2023. PromptAid: Prompt Exploration, Perturbation, Testing and Iteration using Visual Analytics for Large Language Models. arXiv preprint arXiv:2304.01964 (2023)."},{"volume-title":"Proceedings of the 27th International Conference on Computational Linguistics. Association for Computational Linguistics, 2340--2353","year":"2018","author":"Naik Aakanksha","key":"e_1_3_2_1_25_1","unstructured":"Aakanksha Naik, Abhilasha Ravichander, Norman Sadeh, Carolyn Rose, and Graham Neubig. 2018. Stress Test Evaluation for Natural Language Inference. In Proceedings of the 27th International Conference on Computational Linguistics. Association for Computational Linguistics, 2340--2353."},{"key":"e_1_3_2_1_26_1","unstructured":"OpenAI. 2023. ChatGPT. https:\/\/chat.openai.com\/"},{"key":"e_1_3_2_1_27_1","unstructured":"OpenAI. 2023. Deprecations - OpenAI API. https:\/\/platform.openai.com\/docs\/deprecations"},{"key":"e_1_3_2_1_28_1","unstructured":"OpenAI. 2023. GPT-3.5 Documentation. Retrieved from. https:\/\/platform.openai.com\/docs\/models\/gpt-3-5"},{"volume-title":"LLM is Like a Box of Chocolates: the Non-determinism of ChatGPT in Code Generation. arXiv preprint arXiv:2308.02828","year":"2023","author":"Ouyang Shuyin","key":"e_1_3_2_1_29_1","unstructured":"Shuyin Ouyang, Jie M Zhang, Mark Harman, and Meng Wang. 2023. LLM is Like a Box of Chocolates: the Non-determinism of ChatGPT in Code Generation. arXiv preprint arXiv:2308.02828 (2023)."},{"key":"e_1_3_2_1_30_1","unstructured":"radiator57. 2023. Experiencing Decreased Performance with ChatGPT-4. https:\/\/community.openai.com\/t\/experiencing-decreased-performance-with-chatgpt-4\/234269"},{"volume-title":"Testing language models (and prompts) like we test software. Medium (May","year":"2023","author":"Ribeiro Marco Tulio","key":"e_1_3_2_1_31_1","unstructured":"Marco Tulio Ribeiro. 2023. Testing language models (and prompts) like we test software. Medium (May 2023). https:\/\/towardsdatascience.com\/testing-large-language-models-like-we-test-software-92745d28a359"},{"volume-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","author":"Ribeiro Marco Tulio","key":"e_1_3_2_1_32_1","unstructured":"Marco Tulio Ribeiro and Scott Lundberg. 2022. Adaptive Testing and Debugging of NLP Models. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Smaranda Muresan, Preslav Nakov, and Aline Villavicencio (Eds.). Association for Computational Linguistics, 3253--3267."},{"volume-title":"Beyond Accuracy: Behavioral Testing of NLP Models with CheckList. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics","year":"2020","author":"Ribeiro Marco Tulio","key":"e_1_3_2_1_33_1","unstructured":"Marco Tulio Ribeiro, Tongshuang Wu, Carlos Guestrin, and Sameer Singh. 2020. Beyond Accuracy: Behavioral Testing of NLP Models with CheckList. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, Dan Jurafsky, Joyce Chai, Natalie Schluter, and Joel Tetreault (Eds.). Association for Computational Linguistics, 4902--4912."},{"edition":"10","volume-title":"Software Engineering","author":"Sommerville Ian","key":"e_1_3_2_1_34_1","unstructured":"Ian Sommerville. 2015. Software Engineering (10th ed.). Pearson."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377811.3380420"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510206"},{"volume-title":"ChatLog: Recording and Analyzing ChatGPT Across Time. arXiv preprint arXiv:2304.14106","year":"2023","author":"Tu Shangqing","key":"e_1_3_2_1_37_1","unstructured":"Shangqing Tu, Chunyang Li, Jifan Yu, Xiaozhi Wang, Lei Hou, and Juanzi Li. 2023. ChatLog: Recording and Analyzing ChatGPT Across Time. arXiv preprint arXiv:2304.14106 (2023)."},{"volume-title":"Toxicity detection with generative prompt-based inference. arXiv preprint arXiv:2205.12390","year":"2022","author":"Wang Yau-Shian","key":"e_1_3_2_1_38_1","unstructured":"Yau-Shian Wang and Yingshan Chang. 2022. Toxicity detection with generative prompt-based inference. arXiv preprint arXiv:2205.12390 (2022)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1073"},{"volume-title":"Proceedings of the AAAI-23 Workshop on Artificial Intelligence Safety (SafeAI)","year":"2023","author":"Yang Chenyang","key":"e_1_3_2_1_40_1","unstructured":"Chenyang Yang, Rachel A Brower-Sinning, Grace Lewis, Christian K\u00e4stner, and Tongshuang Wu. 2023. Capabilities for Better ML Engineering. In Proceedings of the AAAI-23 Workshop on Artificial Intelligence Safety (SafeAI) (Washington, DC)."},{"volume-title":"Beyond Testers' Biases: Guiding Model Testing with Knowledge Bases using LLMs. (12","year":"2023","author":"Yang Chenyang","key":"e_1_3_2_1_41_1","unstructured":"Chenyang Yang, Rishabh Rustogi, Rachel Brower-Sinning, Grace A Lewis, Christian K\u00e4stner, and Tongshuang Wu. 2023. Beyond Testers' Biases: Guiding Model Testing with Knowledge Bases using LLMs. (12 2023). http:\/\/arxiv.org\/abs\/2310.09668"},{"volume-title":"Case study research: Design and methods","author":"Yin Robert K","key":"e_1_3_2_1_42_1","unstructured":"Robert K Yin. 2009. Case study research: Design and methods. Vol. 5. sage."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581388"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1316"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"12706","author":"Zhao Zihao","year":"2021","unstructured":"Zihao Zhao, Eric Wallace, Shi Feng, Dan Klein, and Sameer Singh. 2021. Calibrate Before Use: Improving Few-shot Performance of Language Models. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 12697--12706."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.131"}],"event":{"name":"CAIN 2024: IEEE\/ACM 3rd International Conference on AI Engineering - Software Engineering for AI","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"],"location":"Lisbon Portugal","acronym":"CAIN 2024"},"container-title":["Proceedings of the IEEE\/ACM 3rd International Conference on AI Engineering - Software Engineering for AI"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3644815.3644950","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T16:26:30Z","timestamp":1732206390000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3644815.3644950"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,14]]},"references-count":46,"alternative-id":["10.1145\/3644815.3644950","10.1145\/3644815"],"URL":"http:\/\/dx.doi.org\/10.1145\/3644815.3644950","relation":{},"subject":[],"published":{"date-parts":[[2024,4,14]]},"assertion":[{"value":"2024-06-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}