@inproceedings{kaing-etal-2024-robust,
title = "Robust Neural Machine Translation for Abugidas by Glyph Perturbation",
author = "Kaing, Hour and
Ding, Chenchen and
Tanaka, Hideki and
Utiyama, Masao",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.eacl-short.27/",
pages = "311--318",
abstract = "Neural machine translation (NMT) systems are vulnerable when trained on limited data. This is a common scenario in low-resource tasks in the real world. To increase robustness, a solution is to intently add realistic noise in the training phase. Noise simulation using text perturbation has been proven to be efficient in writing systems that use Latin letters. In this study, we further explore perturbation techniques on more complex abugida writing systems, for which the visual similarity of complex glyphs is considered to capture the essential nature of these writing systems. Besides the generated noise, we propose a training strategy to improve robustness. We conducted experiments on six languages: Bengali, Hindi, Myanmar, Khmer, Lao, and Thai. By overcoming the introduced noise, we obtained non-degenerate NMT systems with improved robustness for low-resource tasks for abugida glyphs."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kaing-etal-2024-robust">
<titleInfo>
<title>Robust Neural Machine Translation for Abugidas by Glyph Perturbation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hour</namePart>
<namePart type="family">Kaing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenchen</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hideki</namePart>
<namePart type="family">Tanaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masao</namePart>
<namePart type="family">Utiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural machine translation (NMT) systems are vulnerable when trained on limited data. This is a common scenario in low-resource tasks in the real world. To increase robustness, a solution is to intently add realistic noise in the training phase. Noise simulation using text perturbation has been proven to be efficient in writing systems that use Latin letters. In this study, we further explore perturbation techniques on more complex abugida writing systems, for which the visual similarity of complex glyphs is considered to capture the essential nature of these writing systems. Besides the generated noise, we propose a training strategy to improve robustness. We conducted experiments on six languages: Bengali, Hindi, Myanmar, Khmer, Lao, and Thai. By overcoming the introduced noise, we obtained non-degenerate NMT systems with improved robustness for low-resource tasks for abugida glyphs.</abstract>
<identifier type="citekey">kaing-etal-2024-robust</identifier>
<location>
<url>https://aclanthology.org/2024.eacl-short.27/</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>311</start>
<end>318</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Robust Neural Machine Translation for Abugidas by Glyph Perturbation
%A Kaing, Hour
%A Ding, Chenchen
%A Tanaka, Hideki
%A Utiyama, Masao
%Y Graham, Yvette
%Y Purver, Matthew
%S Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F kaing-etal-2024-robust
%X Neural machine translation (NMT) systems are vulnerable when trained on limited data. This is a common scenario in low-resource tasks in the real world. To increase robustness, a solution is to intently add realistic noise in the training phase. Noise simulation using text perturbation has been proven to be efficient in writing systems that use Latin letters. In this study, we further explore perturbation techniques on more complex abugida writing systems, for which the visual similarity of complex glyphs is considered to capture the essential nature of these writing systems. Besides the generated noise, we propose a training strategy to improve robustness. We conducted experiments on six languages: Bengali, Hindi, Myanmar, Khmer, Lao, and Thai. By overcoming the introduced noise, we obtained non-degenerate NMT systems with improved robustness for low-resource tasks for abugida glyphs.
%U https://aclanthology.org/2024.eacl-short.27/
%P 311-318
Markdown (Informal)
[Robust Neural Machine Translation for Abugidas by Glyph Perturbation](https://aclanthology.org/2024.eacl-short.27/) (Kaing et al., EACL 2024)
ACL
- Hour Kaing, Chenchen Ding, Hideki Tanaka, and Masao Utiyama. 2024. Robust Neural Machine Translation for Abugidas by Glyph Perturbation. In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers), pages 311–318, St. Julian’s, Malta. Association for Computational Linguistics.