@inproceedings{zhou-kubler-2021-delexicalized,
title = "Delexicalized Cross-lingual Dependency Parsing for {X}ibe",
author = {Zhou, He and
K{\"u}bler, Sandra},
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.182",
pages = "1626--1635",
abstract = "Manually annotating a treebank is time-consuming and labor-intensive. We conduct delexicalized cross-lingual dependency parsing experiments, where we train the parser on one language and test on our target language. As our test case, we use Xibe, a severely under-resourced Tungusic language. We assume that choosing a closely related language as the source language will provide better results than more distant relatives. However, it is not clear how to determine those closely related languages. We investigate three different methods: choosing the typologically closest language, using LangRank, and choosing the most similar language based on perplexity. We train parsing models on the selected languages using UDify and test on different genres of Xibe data. The results show that languages selected based on typology and perplexity scores outperform those predicted by LangRank; Japanese is the optimal source language. In determining the source language, proximity to the target language is more important than large training sizes. Parsing is also influenced by genre differences, but they have little influence as long as the training data is at least as complex as the target.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-kubler-2021-delexicalized">
<titleInfo>
<title>Delexicalized Cross-lingual Dependency Parsing for Xibe</title>
</titleInfo>
<name type="personal">
<namePart type="given">He</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Kübler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Manually annotating a treebank is time-consuming and labor-intensive. We conduct delexicalized cross-lingual dependency parsing experiments, where we train the parser on one language and test on our target language. As our test case, we use Xibe, a severely under-resourced Tungusic language. We assume that choosing a closely related language as the source language will provide better results than more distant relatives. However, it is not clear how to determine those closely related languages. We investigate three different methods: choosing the typologically closest language, using LangRank, and choosing the most similar language based on perplexity. We train parsing models on the selected languages using UDify and test on different genres of Xibe data. The results show that languages selected based on typology and perplexity scores outperform those predicted by LangRank; Japanese is the optimal source language. In determining the source language, proximity to the target language is more important than large training sizes. Parsing is also influenced by genre differences, but they have little influence as long as the training data is at least as complex as the target.</abstract>
<identifier type="citekey">zhou-kubler-2021-delexicalized</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.182</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>1626</start>
<end>1635</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Delexicalized Cross-lingual Dependency Parsing for Xibe
%A Zhou, He
%A Kübler, Sandra
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F zhou-kubler-2021-delexicalized
%X Manually annotating a treebank is time-consuming and labor-intensive. We conduct delexicalized cross-lingual dependency parsing experiments, where we train the parser on one language and test on our target language. As our test case, we use Xibe, a severely under-resourced Tungusic language. We assume that choosing a closely related language as the source language will provide better results than more distant relatives. However, it is not clear how to determine those closely related languages. We investigate three different methods: choosing the typologically closest language, using LangRank, and choosing the most similar language based on perplexity. We train parsing models on the selected languages using UDify and test on different genres of Xibe data. The results show that languages selected based on typology and perplexity scores outperform those predicted by LangRank; Japanese is the optimal source language. In determining the source language, proximity to the target language is more important than large training sizes. Parsing is also influenced by genre differences, but they have little influence as long as the training data is at least as complex as the target.
%U https://aclanthology.org/2021.ranlp-1.182
%P 1626-1635
Markdown (Informal)
[Delexicalized Cross-lingual Dependency Parsing for Xibe](https://aclanthology.org/2021.ranlp-1.182) (Zhou & Kübler, RANLP 2021)
ACL