@inproceedings{fomicheva-bel-2016-using,
title = "Using Contextual Information for Machine Translation Evaluation",
author = "Fomicheva, Marina and
Bel, N{\'u}ria",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1437",
pages = "2755--2761",
abstract = "Automatic evaluation of Machine Translation (MT) is typically approached by measuring similarity between the candidate MT and a human reference translation. An important limitation of existing evaluation systems is that they are unable to distinguish candidate-reference differences that arise due to acceptable linguistic variation from the differences induced by MT errors. In this paper we present a new metric, UPF-Cobalt, that addresses this issue by taking into consideration the syntactic contexts of candidate and reference words. The metric applies a penalty when the words are similar but the contexts in which they occur are not equivalent. In this way, Machine Translations (MTs) that are different from the human translation but still essentially correct are distinguished from those that share high number of words with the reference but alter the meaning of the sentence due to translation errors. The results show that the method proposed is indeed beneficial for automatic MT evaluation. We report experiments based on two different evaluation tasks with various types of manual quality assessment. The metric significantly outperforms state-of-the-art evaluation systems in varying evaluation settings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fomicheva-bel-2016-using">
<titleInfo>
<title>Using Contextual Information for Machine Translation Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Fomicheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Núria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic evaluation of Machine Translation (MT) is typically approached by measuring similarity between the candidate MT and a human reference translation. An important limitation of existing evaluation systems is that they are unable to distinguish candidate-reference differences that arise due to acceptable linguistic variation from the differences induced by MT errors. In this paper we present a new metric, UPF-Cobalt, that addresses this issue by taking into consideration the syntactic contexts of candidate and reference words. The metric applies a penalty when the words are similar but the contexts in which they occur are not equivalent. In this way, Machine Translations (MTs) that are different from the human translation but still essentially correct are distinguished from those that share high number of words with the reference but alter the meaning of the sentence due to translation errors. The results show that the method proposed is indeed beneficial for automatic MT evaluation. We report experiments based on two different evaluation tasks with various types of manual quality assessment. The metric significantly outperforms state-of-the-art evaluation systems in varying evaluation settings.</abstract>
<identifier type="citekey">fomicheva-bel-2016-using</identifier>
<location>
<url>https://aclanthology.org/L16-1437</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>2755</start>
<end>2761</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Contextual Information for Machine Translation Evaluation
%A Fomicheva, Marina
%A Bel, Núria
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F fomicheva-bel-2016-using
%X Automatic evaluation of Machine Translation (MT) is typically approached by measuring similarity between the candidate MT and a human reference translation. An important limitation of existing evaluation systems is that they are unable to distinguish candidate-reference differences that arise due to acceptable linguistic variation from the differences induced by MT errors. In this paper we present a new metric, UPF-Cobalt, that addresses this issue by taking into consideration the syntactic contexts of candidate and reference words. The metric applies a penalty when the words are similar but the contexts in which they occur are not equivalent. In this way, Machine Translations (MTs) that are different from the human translation but still essentially correct are distinguished from those that share high number of words with the reference but alter the meaning of the sentence due to translation errors. The results show that the method proposed is indeed beneficial for automatic MT evaluation. We report experiments based on two different evaluation tasks with various types of manual quality assessment. The metric significantly outperforms state-of-the-art evaluation systems in varying evaluation settings.
%U https://aclanthology.org/L16-1437
%P 2755-2761
Markdown (Informal)
[Using Contextual Information for Machine Translation Evaluation](https://aclanthology.org/L16-1437) (Fomicheva & Bel, LREC 2016)
ACL