@inproceedings{hu-etal-2018-detecting,
title = "Detecting Syntactic Features of Translated {C}hinese",
author = {Hu, Hai and
Li, Wen and
K{\"u}bler, Sandra},
editor = "Brooke, Julian and
Flekova, Lucie and
Koppel, Moshe and
Solorio, Thamar",
booktitle = "Proceedings of the Second Workshop on Stylistic Variation",
month = jun,
year = "2018",
address = "New Orleans",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-1603",
doi = "10.18653/v1/W18-1603",
pages = "20--28",
abstract = "We present a machine learning approach to distinguish texts translated to Chinese (by humans) from texts originally written in Chinese, with a focus on a wide range of syntactic features. Using Support Vector Machines (SVMs) as classifier on a genre-balanced corpus in translation studies of Chinese, we find that constituent parse trees and dependency triples as features without lexical information perform very well on the task, with an F-measure above 90{\%}, close to the results of lexical n-gram features, without the risk of learning topic information rather than translation features. Thus, we claim syntactic features alone can accurately distinguish translated from original Chinese. Translated Chinese exhibits an increased use of determiners, subject position pronouns, NP + {``}的{''} as NP modifiers, multiple NPs or VPs conjoined by ''、'', among other structures. We also interpret the syntactic features with reference to previous translation studies in Chinese, particularly the usage of pronouns.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hu-etal-2018-detecting">
<titleInfo>
<title>Detecting Syntactic Features of Translated Chinese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hai</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wen</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Kübler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Stylistic Variation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Brooke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flekova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moshe</namePart>
<namePart type="family">Koppel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a machine learning approach to distinguish texts translated to Chinese (by humans) from texts originally written in Chinese, with a focus on a wide range of syntactic features. Using Support Vector Machines (SVMs) as classifier on a genre-balanced corpus in translation studies of Chinese, we find that constituent parse trees and dependency triples as features without lexical information perform very well on the task, with an F-measure above 90%, close to the results of lexical n-gram features, without the risk of learning topic information rather than translation features. Thus, we claim syntactic features alone can accurately distinguish translated from original Chinese. Translated Chinese exhibits an increased use of determiners, subject position pronouns, NP + “的” as NP modifiers, multiple NPs or VPs conjoined by ”、”, among other structures. We also interpret the syntactic features with reference to previous translation studies in Chinese, particularly the usage of pronouns.</abstract>
<identifier type="citekey">hu-etal-2018-detecting</identifier>
<identifier type="doi">10.18653/v1/W18-1603</identifier>
<location>
<url>https://aclanthology.org/W18-1603</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>20</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Syntactic Features of Translated Chinese
%A Hu, Hai
%A Li, Wen
%A Kübler, Sandra
%Y Brooke, Julian
%Y Flekova, Lucie
%Y Koppel, Moshe
%Y Solorio, Thamar
%S Proceedings of the Second Workshop on Stylistic Variation
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans
%F hu-etal-2018-detecting
%X We present a machine learning approach to distinguish texts translated to Chinese (by humans) from texts originally written in Chinese, with a focus on a wide range of syntactic features. Using Support Vector Machines (SVMs) as classifier on a genre-balanced corpus in translation studies of Chinese, we find that constituent parse trees and dependency triples as features without lexical information perform very well on the task, with an F-measure above 90%, close to the results of lexical n-gram features, without the risk of learning topic information rather than translation features. Thus, we claim syntactic features alone can accurately distinguish translated from original Chinese. Translated Chinese exhibits an increased use of determiners, subject position pronouns, NP + “的” as NP modifiers, multiple NPs or VPs conjoined by ”、”, among other structures. We also interpret the syntactic features with reference to previous translation studies in Chinese, particularly the usage of pronouns.
%R 10.18653/v1/W18-1603
%U https://aclanthology.org/W18-1603
%U https://doi.org/10.18653/v1/W18-1603
%P 20-28
Markdown (Informal)
[Detecting Syntactic Features of Translated Chinese](https://aclanthology.org/W18-1603) (Hu et al., Style-Var 2018)
ACL