@inproceedings{zheng-etal-2021-alignment-agnostic,
title = "An Alignment-Agnostic Model for {C}hinese Text Error Correction",
author = "Zheng, Liying and
Deng, Yue and
Song, Weishun and
Xu, Liang and
Xiao, Jing",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.30",
doi = "10.18653/v1/2021.findings-emnlp.30",
pages = "321--326",
abstract = "This paper investigates how to correct Chinese text errors with types of mistaken, missing and redundant characters, which are common for Chinese native speakers. Most existing models based on detect-correct framework can correct mistaken characters, but cannot handle missing or redundant characters due to inconsistency between model inputs and outputs. Although Seq2Seq-based or sequence tagging methods provide solutions to the three error types and achieved relatively good results in English context, they do not perform well in Chinese context according to our experiments. In our work, we propose a novel alignment-agnostic detect-correct framework that can handle both text aligned and non-aligned situations and can serve as a cold start model when no annotation data are provided. Experimental results on three datasets demonstrate that our method is effective and achieves a better performance than most recent published models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zheng-etal-2021-alignment-agnostic">
<titleInfo>
<title>An Alignment-Agnostic Model for Chinese Text Error Correction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liying</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weishun</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper investigates how to correct Chinese text errors with types of mistaken, missing and redundant characters, which are common for Chinese native speakers. Most existing models based on detect-correct framework can correct mistaken characters, but cannot handle missing or redundant characters due to inconsistency between model inputs and outputs. Although Seq2Seq-based or sequence tagging methods provide solutions to the three error types and achieved relatively good results in English context, they do not perform well in Chinese context according to our experiments. In our work, we propose a novel alignment-agnostic detect-correct framework that can handle both text aligned and non-aligned situations and can serve as a cold start model when no annotation data are provided. Experimental results on three datasets demonstrate that our method is effective and achieves a better performance than most recent published models.</abstract>
<identifier type="citekey">zheng-etal-2021-alignment-agnostic</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.30</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.30</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>321</start>
<end>326</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Alignment-Agnostic Model for Chinese Text Error Correction
%A Zheng, Liying
%A Deng, Yue
%A Song, Weishun
%A Xu, Liang
%A Xiao, Jing
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F zheng-etal-2021-alignment-agnostic
%X This paper investigates how to correct Chinese text errors with types of mistaken, missing and redundant characters, which are common for Chinese native speakers. Most existing models based on detect-correct framework can correct mistaken characters, but cannot handle missing or redundant characters due to inconsistency between model inputs and outputs. Although Seq2Seq-based or sequence tagging methods provide solutions to the three error types and achieved relatively good results in English context, they do not perform well in Chinese context according to our experiments. In our work, we propose a novel alignment-agnostic detect-correct framework that can handle both text aligned and non-aligned situations and can serve as a cold start model when no annotation data are provided. Experimental results on three datasets demonstrate that our method is effective and achieves a better performance than most recent published models.
%R 10.18653/v1/2021.findings-emnlp.30
%U https://aclanthology.org/2021.findings-emnlp.30
%U https://doi.org/10.18653/v1/2021.findings-emnlp.30
%P 321-326
Markdown (Informal)
[An Alignment-Agnostic Model for Chinese Text Error Correction](https://aclanthology.org/2021.findings-emnlp.30) (Zheng et al., Findings 2021)
ACL