@inproceedings{le-etal-2023-reducing,
title = "Reducing cohort bias in natural language understanding systems with targeted self-training scheme",
author = "Le, Dieu-thu and
Hernandez, Gabriela and
Chen, Bei and
Bradford, Melanie",
editor = "Sitaram, Sunayana and
Beigman Klebanov, Beata and
Williams, Jason D",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-industry.53",
doi = "10.18653/v1/2023.acl-industry.53",
pages = "552--560",
abstract = "Bias in machine learning models can be an issue when the models are trained on particular types of data that do not generalize well, causing under performance in certain groups of users. In this work, we focus on reducing the bias related to new customers in a digital voice assistant system. It is observed that natural language understanding models often have lower performance when dealing with requests coming from new users rather than experienced users. To mitigate this problem, we propose a framework that consists of two phases (1) a fixing phase with four active learning strategies used to identify important samples coming from new users, and (2) a self training phase where a teacher model trained from the first phase is used to annotate semi-supervised samples to expand the training data with relevant cohort utterances. We explain practical strategies that involve an identification of representative cohort-based samples through density clustering as well as employing implicit customer feedbacks to improve new customers{'} experience. We demonstrate the effectiveness of our approach in a real world large scale voice assistant system for two languages, German and French through both offline experiments as well as A/B testings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="le-etal-2023-reducing">
<titleInfo>
<title>Reducing cohort bias in natural language understanding systems with targeted self-training scheme</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dieu-thu</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriela</namePart>
<namePart type="family">Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bei</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Melanie</namePart>
<namePart type="family">Bradford</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beata</namePart>
<namePart type="family">Beigman Klebanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Williams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Bias in machine learning models can be an issue when the models are trained on particular types of data that do not generalize well, causing under performance in certain groups of users. In this work, we focus on reducing the bias related to new customers in a digital voice assistant system. It is observed that natural language understanding models often have lower performance when dealing with requests coming from new users rather than experienced users. To mitigate this problem, we propose a framework that consists of two phases (1) a fixing phase with four active learning strategies used to identify important samples coming from new users, and (2) a self training phase where a teacher model trained from the first phase is used to annotate semi-supervised samples to expand the training data with relevant cohort utterances. We explain practical strategies that involve an identification of representative cohort-based samples through density clustering as well as employing implicit customer feedbacks to improve new customers’ experience. We demonstrate the effectiveness of our approach in a real world large scale voice assistant system for two languages, German and French through both offline experiments as well as A/B testings.</abstract>
<identifier type="citekey">le-etal-2023-reducing</identifier>
<identifier type="doi">10.18653/v1/2023.acl-industry.53</identifier>
<location>
<url>https://aclanthology.org/2023.acl-industry.53</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>552</start>
<end>560</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reducing cohort bias in natural language understanding systems with targeted self-training scheme
%A Le, Dieu-thu
%A Hernandez, Gabriela
%A Chen, Bei
%A Bradford, Melanie
%Y Sitaram, Sunayana
%Y Beigman Klebanov, Beata
%Y Williams, Jason D.
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F le-etal-2023-reducing
%X Bias in machine learning models can be an issue when the models are trained on particular types of data that do not generalize well, causing under performance in certain groups of users. In this work, we focus on reducing the bias related to new customers in a digital voice assistant system. It is observed that natural language understanding models often have lower performance when dealing with requests coming from new users rather than experienced users. To mitigate this problem, we propose a framework that consists of two phases (1) a fixing phase with four active learning strategies used to identify important samples coming from new users, and (2) a self training phase where a teacher model trained from the first phase is used to annotate semi-supervised samples to expand the training data with relevant cohort utterances. We explain practical strategies that involve an identification of representative cohort-based samples through density clustering as well as employing implicit customer feedbacks to improve new customers’ experience. We demonstrate the effectiveness of our approach in a real world large scale voice assistant system for two languages, German and French through both offline experiments as well as A/B testings.
%R 10.18653/v1/2023.acl-industry.53
%U https://aclanthology.org/2023.acl-industry.53
%U https://doi.org/10.18653/v1/2023.acl-industry.53
%P 552-560
Markdown (Informal)
[Reducing cohort bias in natural language understanding systems with targeted self-training scheme](https://aclanthology.org/2023.acl-industry.53) (Le et al., ACL 2023)
ACL