@inproceedings{mara15c,
author = {Bogacz, Bartosz and Massa, Judith and Mara, Hubert},
title = {Homogenization of 2D & 3D Document Formats for Cuneiform Script Analysis},
year = {2015},
isbn = {9781450336024},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2809544.2809549},
doi = {10.1145/2809544.2809549},
abstract = {In the Digital Humanities, text sources can be digitized using various methods resulting in different data representations of related documents. This presents an increased challenge for clay tablets with cuneiform script, which is one of the oldest scripts written by hand, used in the ancient Middle East for more than three millennia. Using a 3D-Scanner to acquire a cuneiform tablet or a manually created line tracing are two completely different representations of the same type of text source. Additionally, a line tracing can be born-digital as a vector graphic or it can be a raster image of a drawing with ink on paper. Each representation is typically processed with its own tool-set and the textual analysis is therefore limited to a certain type of digital representation. In this work we present a work-flow for unification of the three most common graphical representations of cuneiform tablets. The first approach vectorizes the manually created retro-digitized tracings by skeletonization and applies pattern matching to extract the wedges, which are the radical elements of cuneiform script. Secondly, the born-digital drawings also require pattern matching as the curved lines are set differently by each draftsperson. Due to the density of wedges, a subsequent conflict resolution is applied to both types of line drawings. As cuneiform characters are written by hand in 3D, we show the segmentation and extraction of wedges from high-resolution 3D-models. The result is one representation exported as Scalable Vector Graphic (SVG), which is used for character retrieval for a future Optical Character Recognition (OCR) as ultimate goal.},
booktitle = {Proceedings of the 3rd International Workshop on Historical Document Imaging and Processing},
pages = {115–122},
numpages = {8},
keywords = {3D-Scanning, Conflict Resolution, Cuneiform script, Data Homogenization, Data Normalization, Digital Humanities, Feature Extraction, Segmentation, Pattern Matching, Skeletonization, 3D Handwritten Script},
location = {Gammarth, Tunisia},
series = {HIP '15}
}