% pubman genre = conference-paper @inproceedings{item_3259068, title = {{CLDFBench: Give your cross-linguistic data a lift}}, author = {Forkel, Robert and List, Johann-Mattis}, language = {eng}, isbn = {979-10-95546-34-4}, doi = {10.17613/8t0e-w639}, publisher = {European Language Resources Association (ELRA)}, year = {2020}, date = {2020}, abstract = {{While the amount of cross-linguistic data is onstantly increasing, most datasets produced today and in the past cannot be considered{\textless}br{\textgreater}FAIR (findable, accessible, interoperable, and reproducible). To remedy this and to increase the comparability of cross-linguistic resources,{\textless}br{\textgreater}it is not enough to set up standards and best practices for data to be collected in the future. We also need consistent workflows for the {\textquotedblleft}retro-standardization{\textquotedblright} of data that has been published during the past decades and centuries. With the Cross-Linguistic Data Formats initiative, first standards for cross-linguistic data have been presented and successfully tested. So far, however, CLDF creation was hampered by the fact that it required a considerable degree of omputational proficiency. With cldfbench, we introduce a framework for the retro-standardization of legacy data and the curation of new datasets that drastically simplifies the creation of CLDF by providing a consistent, reproducible workflow that rigorously supports version control and long term archiving of research data and code. The framework is distributed in form of a Python package along with usage information and examples for best practice. This study introduces the new framework and illustrates how it can be applied by showing how a resource containing structural and lexical data for Sinitic languages can be efficiently retro-standardized and analyzed.}}, booktitle = {{Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)}}, editor = {Calzolari, Nicoletta and B{\'e}chet, Fr{\'e}d{\'e}ric and Blache, Philippe and Choukri, Khalid and Cieri, Christopher and Declerck, Thierry and Goggi, Sara and Ishara, Hitoshi and Maegaard, Bente and Mariani, H{\'e}l{\`e}ne Mazo and Moreno, Asuncion and Odijk, Jan and Piperidis, Stelios}, pages = {6995--7002}, address = {Marseille}, note = {12th Conference on Language Resources and Evaluation [postponed due to Corona]}, }