@inproceedings{mcconnaughey-etal-2017-labeled,
title = "The Labeled Segmentation of Printed Books",
author = "McConnaughey, Lara and
Dai, Jennifer and
Bamman, David",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1077",
doi = "10.18653/v1/D17-1077",
pages = "737--747",
abstract = "We introduce the task of book structure labeling: segmenting and assigning a fixed category (such as Table of Contents, Preface, Index) to the document structure of printed books. We manually annotate the page-level structural categories for a large dataset totaling 294,816 pages in 1,055 books evenly sampled from 1750-1922, and present empirical results comparing the performance of several classes of models. The best-performing model, a bidirectional LSTM with rich features, achieves an overall accuracy of 95.8 and a class-balanced macro F-score of 71.4.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mcconnaughey-etal-2017-labeled">
<titleInfo>
<title>The Labeled Segmentation of Printed Books</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lara</namePart>
<namePart type="family">McConnaughey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jennifer</namePart>
<namePart type="family">Dai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Bamman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Hwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Riedel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce the task of book structure labeling: segmenting and assigning a fixed category (such as Table of Contents, Preface, Index) to the document structure of printed books. We manually annotate the page-level structural categories for a large dataset totaling 294,816 pages in 1,055 books evenly sampled from 1750-1922, and present empirical results comparing the performance of several classes of models. The best-performing model, a bidirectional LSTM with rich features, achieves an overall accuracy of 95.8 and a class-balanced macro F-score of 71.4.</abstract>
<identifier type="citekey">mcconnaughey-etal-2017-labeled</identifier>
<identifier type="doi">10.18653/v1/D17-1077</identifier>
<location>
<url>https://aclanthology.org/D17-1077</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>737</start>
<end>747</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Labeled Segmentation of Printed Books
%A McConnaughey, Lara
%A Dai, Jennifer
%A Bamman, David
%Y Palmer, Martha
%Y Hwa, Rebecca
%Y Riedel, Sebastian
%S Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F mcconnaughey-etal-2017-labeled
%X We introduce the task of book structure labeling: segmenting and assigning a fixed category (such as Table of Contents, Preface, Index) to the document structure of printed books. We manually annotate the page-level structural categories for a large dataset totaling 294,816 pages in 1,055 books evenly sampled from 1750-1922, and present empirical results comparing the performance of several classes of models. The best-performing model, a bidirectional LSTM with rich features, achieves an overall accuracy of 95.8 and a class-balanced macro F-score of 71.4.
%R 10.18653/v1/D17-1077
%U https://aclanthology.org/D17-1077
%U https://doi.org/10.18653/v1/D17-1077
%P 737-747
Markdown (Informal)
[The Labeled Segmentation of Printed Books](https://aclanthology.org/D17-1077) (McConnaughey et al., EMNLP 2017)
ACL
- Lara McConnaughey, Jennifer Dai, and David Bamman. 2017. The Labeled Segmentation of Printed Books. In Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pages 737–747, Copenhagen, Denmark. Association for Computational Linguistics.