@inproceedings{pinnis-etal-2016-designing,
title = "Designing a Speech Corpus for the Development and Evaluation of Dictation Systems in {L}atvian",
author = "Pinnis, M{\=a}rcis and
Salimbajevs, Askars and
Auzi{\c{n}}a, Ilze",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1124",
pages = "775--780",
abstract = "In this paper the authors present a speech corpus designed and created for the development and evaluation of dictation systems in Latvian. The corpus consists of over nine hours of orthographically annotated speech from 30 different speakers. The corpus features spoken commands that are common for dictation systems for text editors. The corpus is evaluated in an automatic speech recognition scenario. Evaluation results in an ASR dictation scenario show that the addition of the corpus to the acoustic model training data in combination with language model adaptation allows to decrease the WER by up to relative 41.36{\%} (or 16.83{\%} in absolute numbers) compared to a baseline system without language model adaptation. Contribution of acoustic data augmentation is at relative 12.57{\%} (or 3.43{\%} absolute).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pinnis-etal-2016-designing">
<titleInfo>
<title>Designing a Speech Corpus for the Development and Evaluation of Dictation Systems in Latvian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mārcis</namePart>
<namePart type="family">Pinnis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Askars</namePart>
<namePart type="family">Salimbajevs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilze</namePart>
<namePart type="family">Auziņa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper the authors present a speech corpus designed and created for the development and evaluation of dictation systems in Latvian. The corpus consists of over nine hours of orthographically annotated speech from 30 different speakers. The corpus features spoken commands that are common for dictation systems for text editors. The corpus is evaluated in an automatic speech recognition scenario. Evaluation results in an ASR dictation scenario show that the addition of the corpus to the acoustic model training data in combination with language model adaptation allows to decrease the WER by up to relative 41.36% (or 16.83% in absolute numbers) compared to a baseline system without language model adaptation. Contribution of acoustic data augmentation is at relative 12.57% (or 3.43% absolute).</abstract>
<identifier type="citekey">pinnis-etal-2016-designing</identifier>
<location>
<url>https://aclanthology.org/L16-1124</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>775</start>
<end>780</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Designing a Speech Corpus for the Development and Evaluation of Dictation Systems in Latvian
%A Pinnis, Mārcis
%A Salimbajevs, Askars
%A Auziņa, Ilze
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F pinnis-etal-2016-designing
%X In this paper the authors present a speech corpus designed and created for the development and evaluation of dictation systems in Latvian. The corpus consists of over nine hours of orthographically annotated speech from 30 different speakers. The corpus features spoken commands that are common for dictation systems for text editors. The corpus is evaluated in an automatic speech recognition scenario. Evaluation results in an ASR dictation scenario show that the addition of the corpus to the acoustic model training data in combination with language model adaptation allows to decrease the WER by up to relative 41.36% (or 16.83% in absolute numbers) compared to a baseline system without language model adaptation. Contribution of acoustic data augmentation is at relative 12.57% (or 3.43% absolute).
%U https://aclanthology.org/L16-1124
%P 775-780
Markdown (Informal)
[Designing a Speech Corpus for the Development and Evaluation of Dictation Systems in Latvian](https://aclanthology.org/L16-1124) (Pinnis et al., LREC 2016)
ACL