@inproceedings{mukherjee-kubler-2017-similarity,
title = "Similarity Based Genre Identification for {POS} Tagging Experts {\&} Dependency Parsing",
author = {Mukherjee, Atreyee and
K{\"u}bler, Sandra},
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-049-6_068",
doi = "10.26615/978-954-452-049-6_068",
pages = "519--526",
abstract = "POS tagging and dependency parsing achieve good results for homogeneous datasets. However, these tasks are much more difficult on heterogeneous datasets. In (Mukherjee et al. 2016, 2017), we address this issue by creating genre experts for both POS tagging and parsing. We use topic modeling to automatically separate training and test data into genres and to create annotation experts per genre by training separate models for each topic. However, this approach assumes that topic modeling is performed jointly on training and test sentences each time a new test sentence is encountered. We extend this work by assigning new test sentences to their genre expert by using similarity metrics. We investigate three different types of methods: 1) based on words highly associated with a genre by the topic modeler, 2) using a k-nearest neighbor classification approach, and 3) using perplexity to determine the closest topic. The results show that the choice of similarity metric has an effect on results and that we can reach comparable accuracies to the joint topic modeling in POS tagging and dependency parsing, thus providing a viable and efficient approach to POS tagging and parsing a sentence by its genre expert.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mukherjee-kubler-2017-similarity">
<titleInfo>
<title>Similarity Based Genre Identification for POS Tagging Experts & Dependency Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atreyee</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Kübler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>POS tagging and dependency parsing achieve good results for homogeneous datasets. However, these tasks are much more difficult on heterogeneous datasets. In (Mukherjee et al. 2016, 2017), we address this issue by creating genre experts for both POS tagging and parsing. We use topic modeling to automatically separate training and test data into genres and to create annotation experts per genre by training separate models for each topic. However, this approach assumes that topic modeling is performed jointly on training and test sentences each time a new test sentence is encountered. We extend this work by assigning new test sentences to their genre expert by using similarity metrics. We investigate three different types of methods: 1) based on words highly associated with a genre by the topic modeler, 2) using a k-nearest neighbor classification approach, and 3) using perplexity to determine the closest topic. The results show that the choice of similarity metric has an effect on results and that we can reach comparable accuracies to the joint topic modeling in POS tagging and dependency parsing, thus providing a viable and efficient approach to POS tagging and parsing a sentence by its genre expert.</abstract>
<identifier type="citekey">mukherjee-kubler-2017-similarity</identifier>
<identifier type="doi">10.26615/978-954-452-049-6_068</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>519</start>
<end>526</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Similarity Based Genre Identification for POS Tagging Experts & Dependency Parsing
%A Mukherjee, Atreyee
%A Kübler, Sandra
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F mukherjee-kubler-2017-similarity
%X POS tagging and dependency parsing achieve good results for homogeneous datasets. However, these tasks are much more difficult on heterogeneous datasets. In (Mukherjee et al. 2016, 2017), we address this issue by creating genre experts for both POS tagging and parsing. We use topic modeling to automatically separate training and test data into genres and to create annotation experts per genre by training separate models for each topic. However, this approach assumes that topic modeling is performed jointly on training and test sentences each time a new test sentence is encountered. We extend this work by assigning new test sentences to their genre expert by using similarity metrics. We investigate three different types of methods: 1) based on words highly associated with a genre by the topic modeler, 2) using a k-nearest neighbor classification approach, and 3) using perplexity to determine the closest topic. The results show that the choice of similarity metric has an effect on results and that we can reach comparable accuracies to the joint topic modeling in POS tagging and dependency parsing, thus providing a viable and efficient approach to POS tagging and parsing a sentence by its genre expert.
%R 10.26615/978-954-452-049-6_068
%U https://doi.org/10.26615/978-954-452-049-6_068
%P 519-526
Markdown (Informal)
[Similarity Based Genre Identification for POS Tagging Experts & Dependency Parsing](https://doi.org/10.26615/978-954-452-049-6_068) (Mukherjee & Kübler, RANLP 2017)
ACL