@inproceedings{weegar-etal-2016-impact,
title = "The impact of simple feature engineering in multilingual medical {NER}",
author = "Weegar, Rebecka and
Casillas, Arantza and
Diaz de Ilarraza, Arantza and
Oronoz, Maite and
P{\'e}rez, Alicia and
Gojenola, Koldo",
editor = "Rumshisky, Anna and
Roberts, Kirk and
Bethard, Steven and
Naumann, Tristan",
booktitle = "Proceedings of the Clinical Natural Language Processing Workshop ({C}linical{NLP})",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-4201",
pages = "1--6",
abstract = "The goal of this paper is to examine the impact of simple feature engineering mechanisms before applying more sophisticated techniques to the task of medical NER. Sometimes papers using scientifically sound techniques present raw baselines that could be improved adding simple and cheap features. This work focuses on entity recognition for the clinical domain for three languages: English, Swedish and Spanish. The task is tackled using simple features, starting from the window size, capitalization, prefixes, and moving to POS and semantic tags. This work demonstrates that a simple initial step of feature engineering can improve the baseline results significantly. Hence, the contributions of this paper are: first, a short list of guidelines well supported with experimental results on three languages and, second, a detailed description of the relevance of these features for medical NER.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="weegar-etal-2016-impact">
<titleInfo>
<title>The impact of simple feature engineering in multilingual medical NER</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rebecka</namePart>
<namePart type="family">Weegar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arantza</namePart>
<namePart type="family">Casillas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arantza</namePart>
<namePart type="family">Diaz de Ilarraza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Oronoz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alicia</namePart>
<namePart type="family">Pérez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koldo</namePart>
<namePart type="family">Gojenola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Clinical Natural Language Processing Workshop (ClinicalNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Naumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The goal of this paper is to examine the impact of simple feature engineering mechanisms before applying more sophisticated techniques to the task of medical NER. Sometimes papers using scientifically sound techniques present raw baselines that could be improved adding simple and cheap features. This work focuses on entity recognition for the clinical domain for three languages: English, Swedish and Spanish. The task is tackled using simple features, starting from the window size, capitalization, prefixes, and moving to POS and semantic tags. This work demonstrates that a simple initial step of feature engineering can improve the baseline results significantly. Hence, the contributions of this paper are: first, a short list of guidelines well supported with experimental results on three languages and, second, a detailed description of the relevance of these features for medical NER.</abstract>
<identifier type="citekey">weegar-etal-2016-impact</identifier>
<location>
<url>https://aclanthology.org/W16-4201</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>1</start>
<end>6</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The impact of simple feature engineering in multilingual medical NER
%A Weegar, Rebecka
%A Casillas, Arantza
%A Diaz de Ilarraza, Arantza
%A Oronoz, Maite
%A Pérez, Alicia
%A Gojenola, Koldo
%Y Rumshisky, Anna
%Y Roberts, Kirk
%Y Bethard, Steven
%Y Naumann, Tristan
%S Proceedings of the Clinical Natural Language Processing Workshop (ClinicalNLP)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F weegar-etal-2016-impact
%X The goal of this paper is to examine the impact of simple feature engineering mechanisms before applying more sophisticated techniques to the task of medical NER. Sometimes papers using scientifically sound techniques present raw baselines that could be improved adding simple and cheap features. This work focuses on entity recognition for the clinical domain for three languages: English, Swedish and Spanish. The task is tackled using simple features, starting from the window size, capitalization, prefixes, and moving to POS and semantic tags. This work demonstrates that a simple initial step of feature engineering can improve the baseline results significantly. Hence, the contributions of this paper are: first, a short list of guidelines well supported with experimental results on three languages and, second, a detailed description of the relevance of these features for medical NER.
%U https://aclanthology.org/W16-4201
%P 1-6
Markdown (Informal)
[The impact of simple feature engineering in multilingual medical NER](https://aclanthology.org/W16-4201) (Weegar et al., ClinicalNLP 2016)
ACL