% pubman genre = article @article{item_3670091, title = {{The Indo-European Cognate Relationships dataset}}, author = {Anderson, Cormac and Scarborough, Matthew and Jocz, Lechos{\l}aw and K{\"u}mmel, Martin Joachim and J{\"u}gel, Thomas and Irslinger, Britta and Pooth, Roland and Liljegren, Henrik and Strand, Richard F. and Haig, Geoffrey and Geupel, Ulrich and Macak, Martin and Kim, Ronald I. and Anonby, Erik and Pronk, Tijmen and Belyaev, Oleg and Dewey-Findell, Tonya Kim and Boutilier, Matthew and Freiberg, Cassandra and Tegethoff, Robert and Serangeli, Matilde and Stro{\'n}ski, Krzysztof and Falileyev, Alexander and Liosis, Nikos and Schulte, Kim and Gupta, Ganesh Kumar and Izadifar, Raheleh and Markus, Patrycja and Williams, Nicholas and Loi, Simone and Sims-Williams, Nicholas and Findell, Martin and Adibifar, Shirin and Abete, Giovanni and Atanasov, Petar and Baiwir, Esther and Bastardas, Maria-Reina and Benkato, Adam and Bevevino, Lisa Shugert and Buchi, {\'E}va and Cadorini, Giorgio and Cathcart, Chundra and Cheveau, Lo{\"\i}c and Christodoulou, Charalambos and Delorme, J{\'e}r{\'e}mie and Dworkin, Steven N. and Ekici, Deniz and Farridnejad, Shervin and Gheitasi, Mojtaba and Hammarstr{\"o}m, Harald and Hewitt, Steve and Khan, Afsar Ali and Khan, Muhammad Kamal and Khokhlova, Liudmila and Kim, Deborah and Lewin, Christopher and Lushaj, Borana and Mahmoudveysi, Parvin and Mahommadirad, Masoud and Mersch, Sam and Mustafa, Baydaa and Nemati, Fatemeh and Nourzaei, Maryam and Muircheartaigh, Peadar {\'O} and Oogjen, Virginia and Ourang, Muhammed and Pagan, Heather and Palmer, Timothy S. and Pepper, Steve and Purandare, Mandar and Rehman, Khwaja and Rhys, Guto and R{\o}yneland, Unn and Sagar, Muhammad Zaman and Sandstedt, Jade J{\o}rgen and Steensland, Lars and Taheri-Ardali, Mortaza and Talebi-Dastenaei, Mahnaz and Tittel, Sabine and Tresoldi, Tiago and de Vaan, Michiel and Verkerk, Annemarie and Versloot, Arjen and Videsott, Paul and Vuleti{\'c}, Nikola and Widmer, Manuel and Zeini, Arash and Bibiko, Hans-J{\"o}rg and Runge, Fiona and Gray, Russell D. and Heggarty, Paul}, language = {eng}, issn = {2052-4463}, doi = {10.1038/s41597-025-05445-3}, publisher = {Nature Publishing Group}, address = {London, United Kingdom}, year = {2025}, date = {2025}, abstract = {{The Indo-European Cognate Relationships (IE-CoR) dataset is an open-access relational dataset showing how related, inherited words ({\textquoteleft}cognates{\textquoteright}) pattern across 160 languages of the Indo-European family. IE-CoR is intended as a benchmark dataset for computational research into the evolution of the Indo-European languages. It is structured around 170 reference meanings in core lexicon, and contains 25731 lexeme entries, analysed into 4981 cognate sets. Novel, dedicated structures are used to code all known cases of horizontal transfer. All 13 main documented clades of Indo-European, and their main subclades, are well represented. Time calibration data for each language are also included, as are relevant geographical and social metadata. Data collection was performed by an expert consortium of 89 linguists drawing on 355 cited sources. The dataset is extendable to further languages and meanings and follows the Cross-Linguistic Data Format (CLDF) protocols for linguistic data. It is designed to be interoperable with other cross-linguistic datasets and catalogues, and provides a reference framework for similar initiatives for other language families.}}, contents = {Background {\&} Summary Background: the Indo-European languages and phylogenetic research Summary of the IE-CoR Dataset. Background: research in cognacy, etymology and lexical semantics. Methods Overview of methodology Language sample. Meaning sample. Lexeme determination: Overview. Lexeme determination: Synonymy. Lexeme determination: Meaning definitions. Cognate determination. Loanwords. Dataset Data Records Technical Validation Usage Notes Code availability}, journal = {{Scientific Data}}, volume = {12}, eid = {1541}, }