Language & Literature
Spring 2009 - Present
Description
The study of intertextuality, the shaping of a text’s meaning by other texts, remains a labor-intensive process for the literary critic. Julia Kristeva, who coined the term intertext, suggested, "Any text is constructed as a mosaic of quotations; any text is the absorption and transformation of another". Such transformations range from direct quotations, representing a simple and overt intertextuality, to more complex references that are intentionally or subconsciously absorbed into a text. In the years since Kristeva first drew attention to the phenomenon, the field of its study has become increasingly - in some cases debilitatingly - complex. As this theoretical complexity grows, so does the burden upon the practicing literary critic to verify suspected instances of intertextuality. The critic must command a large corpus of possible contributing works; meanwhile, objective criteria by which intertext may be measured are lacking. Since, in many cases, the problem is one of pattern recognition, the task of locating new relationships between texts and validating suspected ones is a good candidate for automated assistance by computers.
In this work, we propose the use of machine learning and related statistical methods to improve the process by which intertextuality is studied. Specifically, we bring to bear computational techniques from the field of stylistics in order to examine instances where an author who is familiar with a particular corpus deliberately or subconsciously reflects this in discrete passages within his own work. In the feature space, we are particularly interested in the repetitive stylistic nature of sound oriented texts. Through our analysis, we have established that authors make extensive use of repetitive sound to emphasize ideas or phrases, or to construct poetic forms.
A second avenue of research is the application of AI historical document analysis to produce machine readable representations of text from digital images. In archives scattered around the globe, old manuscripts can be found piled up to the ceiling and spread out as far as the eye can see. The amount of writing produced on physical media since antiquity is staggering, and very little of it has been digitized and transcribed into plain text for researchers to study using modern data mining tools. Work in the digital humanities has sought to address this problem by deploying everything from off-the-shelf optical character recognition (OCR) tools to state-of-the-art convolutional neural network-based transcription pipelines. However, such work has been underpinned by the long-standing, yet incorrect, belief that computer vision has solved handwritten document transcription. The open nature of this problem, coupled with a difficult data domain that has largely remained the realm of specialist scholars, makes it a fascinating case study for testing the capabilities of artificial intelligence.
This work is supported by NEH Digital Humanities Start-Up Grant Award No. HD-51570-12, NEH Digital Humanities Advancement Grant No. HAA-258767-18, and the Andrew W. Mellon Foundation
Publications
- "The Paleographer’s Eye ex machina:, , , , ,
Using Computer Vision to Assist Humanists in Scribal Hand Identification,"Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision
(WACV),January 2024.[pdf][bibtex]@inproceedings{grieggs2024paleographer,
title={The Paleographer's Eye ex machina:
Using Computer Vision To Assist Humanists in Scribal Hand Identification},
author={Grieggs, Samuel and
Henderson, CEM and
Sobecki, Sebastian and
Gillespie, Alexandra and
Scheirer, Walter},
booktitle={Proceedings of the IEEE/CVF Winter Conference
on Applications of Computer Vision (WACV)},
pages={7177--7186},
year={2024}
}
- "Automated Transcription of Gə'əz Manuscripts Using Deep Learning,", , , ,
, , , , ,
, ,Digital Humanities Quarterly,August 2023.[pdf] [code][bibtex]@article{Grieggs_DHQ2023,
author = {Samuel Grieggs and
Jessica Lockhart and
Alexandra Atiya and
Gelila Tilahun and
Suzanne Akbari and
Eyob Derillo and
Jarod Jacobs
Christine Kwon
Michael Gervers
Steve Delamarter
Walter J. Scheirer},
title = {Automated Transcription of Gə'əz Manuscripts Using Deep Learning},
journal = {Digital Humanities Quarterly},
volume = {17},
number = {3},
month = {August},
year = {2023}
}
- "The Tesserae Intertext Service,", , , , ,Digital Humanities Quarterly,April 2022.
- "Measuring Human Perception to Improve Handwritten Document Transcription,", , , , , ,
, ,IEEE Transactions on Pattern Analysis and Machine Intelligence (T-PAMI),Accepted for Publicatiton in June 2021.[pdf][bibtex]@article{DBLP:journals/corr/abs-1904-03734,
author = {Samuel Grieggs and
Bingyu Shen and
Greta Rauch and
Pei Li and
Jiaqi Ma and
David Chiang and
Brian Price and
Walter J. Scheirer},
title = {Measuring Human Perception to Improve Handwritten Document
Transcription},
journal = {CoRR},
volume = {abs/1904.03734},
year = {2020},
url = {http://arxiv.org/abs/1904.03734},
archivePrefix = {arXiv},
eprint = {1904.03734},
}
- "Practical Text Phylogeny for Real-World Settings,", , ,IEEE Access,December 2018.
- "Coupling Story to Visualization: Using Textual Analysis as a Bridge Between Data, , , ,
and Interpretation,"Proceedings of the ACM International Conference on Intelligent User Interfaces (IUI),March 2018.[pdf][bibtex]@inProceedings{MetoyerA18,
author = {Ronald Metoyer and
Qiyu Zhi and
Bart Janczuk and
Walter J. Scheirer},
title = {Coupling Story to Visualization: Using Textual Analysis as a Bridge Between Data and Interpretation},
booktitle = {ACM International Conference on Intelligent User Interfaces (IUI)},
year = {2018}
}
- "Authorship Attribution for Social Media Forensics,", , , , , , , ,IEEE Transactions on Information Forensics and Security (T-IFS),January 2017.[pdf][bibtex]@article{Scheirer_2017_TIFS,
author = {Anderson Rocha and Walter J. Scheirer and Thiago Cavalcante and Antonio Theophilo
and Bingyu Shen and Ariadne R. B. Carvalho and Efstathios Stamatatos},
title = {Authorship Attribution for Social Media Forensics},
journal = {IEEE Transactions on Information Forensics and Security (T-IFS)},
volume = {12},
issue = {1},
month = {January},
year = {2017}
}
- "The Sense of a Connection: Automatic Tracing of Intertextuality by Meaning,", , ,Digital Scholarship in the Humanities (DSH),April 2016.[pdf][bibtex]@article{Scheirer_2016_LLC,
author = {Walter J. Scheirer and Christopher W. Forstall and Neil Coffee},
title = {The Sense of a Connection: Automatic Tracing of Intertextuality by Meaning},
journal = {Digital Scholarship in the Humanities (DSH)},
volume = {31},
issue = {1},
month = {April},
year = {2016}
}
- "Evidence of Intertextuality: Investigating Paul the Deacon's Angustae Vitae,", , ,Literary & Linguistic Computing (LLC),September 2011.[pdf][bibtex]@article{Forstall_2011_LLC,
author = {Christopher W. Forstall and Sarah Jacobson and Walter J. Scheirer},
title = {Evidence of Intertextuality: Investigating Paul the Deacon's Angustae Vitae},
journal = {Literary \& Linguistic Computing (LLC)},
month = {September},
volume = {26},
issue = {3},
pages = {285--296},
year = {2011}
} - "Features from Frequency: Authorship and Stylistic Analysis Using Repetitive Sound,", ,Proceedings of the 4th Annual Chicago Colloquium on Digital Humanities and Computer Science (DHCS),November 2009.[pdf][bibtex]@InProceedings{Forstall_2009_DHCS,
author = {Christopher W. Forstall and Walter J. Scheirer},
title = {Features from Frequency: Authorship and Stylistic Analysis Using Repetitive Sound},
booktitle = {The 4th Annual Chicago Colloquium on Digital Humanities and Computer Science (DHCS)},
month = {November},
year = {2009}
}
Abstracts
- "Verba Volant, Scripta Manent: Approaching the Automatic Transcription of Medieval Manuscript,", , , , , , ,
, ,Digital Humanities 2018 (DH),July 2018.[html][bibtex]@InProceedings{Grieggs_2018_DH,
author = {Samuel Grieggs and
Bingyu Shen and
Hildegund Müller and
Christine Ascik and
Erik Ellis and
Mihow McKenny and
Nikolas Churik and
Emily Mahan and
Walter J. Scheirer},
title = {Verba Volant, Scripta Manent: Approaching the
Automatic Transcription of Medieval Manuscripts},
booktitle = {Digital Humanities 2018 (DH)},
month = {July},
year = {2018}
} - "Euterpe's Hidden Song: Patterns in Elegy,", ,Digital Humanities 2014 (DH),July 2014.
- "Modelling the Interpretation of Literary Allusion with Machine Learning Techniques,", , , , , , ,Digital Humanities 2013 (DH),July 2013.[pdf][bibtex]@InProceedings{Coffee_2013_DH,
author = {Neil Coffee and James Gawley and Christopher W. Forstall and Walter J. Scheirer and
David Johnson and Jason J. Corso and Brian Parks},
title = {Modelling the Interpretation of Literary Allusion with Machine Learning Techniques},
booktitle = {Digital Humanities 2013 (DH)},
month = {July},
year = {2013}
} - "Revealing Hidden Patterns in the Meter of Homer's Iliad,", ,The 7th Annual Chicago Colloquium on Digital Humanities and Computer Science (DHCS),November 2012.[pdf][bibtex]@InProceedings{Forstall_2012_DHCS,
author = {Christopher W. Forstall and Walter J. Scheirer},
title = {Revealing Hidden Patterns in the Meter of Homer's Iliad},
booktitle = {The 7th Annual Chicago Colloquium on Digital Humanities and Computer Science (DHCS)},
month = {November},
year = {2012}
} - "Visualizing Sound as Functional n-grams in Homeric Greek Poetry,", ,Digital Humanities 2011 (DH),June 2011.
- "A Statistical Study of Latin Elegiac Couplets,", ,The 5th Annual Chicago Colloquium on Digital Humanities and Computer Science (DHCS),November 2010.[pdf][bibtex]@InProceedings{Forstall_2010_DHCS,
author = {Christopher W. Forstall and Walter J. Scheirer},
title = {A Statistical Study of Latin Elegiac Couplets},
booktitle = {The 5th Annual Chicago Colloquium on Digital Humanities and Computer Science (DHCS)},
month = {November},
year = {2010}
}
Presentations
Posters
Code
- The Tesserae code is available on GitHub