% pubman genre = article @article{item_3588234, title = {{Detecting linguistic variation with geographic sampling}}, author = {Koile, Ezequiel and Moroz, George}, issn = {2049-7547}, doi = {10.1017/jlg.2024.8}, year = {2024}, abstract = {{Geolectal variation is often present in settings where one language is spoken across a vast geographic area. This can be found in phonological, morphosyntactic, and lexical features. For practical reasons, it is not always possible to conduct fieldwork in every single location of interest in order to obtain the full pattern of variation, and a sample of them must be chosen. We propose and test a method for sampling these locations, with the goal of obtaining a distribution of typological features representative of the whole area. We apply k-means and hierarchical clustering algorithms for defining this sample, based on their geographic distribution. We test our methods against simulated data with several spatial configurations, and also against real data from Circassian dialects (Northwest Caucasian). Our results show an efficiency significantly higher than random sampling for detecting this variation, which makes our method profitable to fieldworkers when designing their research.}}, journal = {{Journal of Linguistic Geography}}, pages = {1--8}, }