@article{27543cd5054b4148b33d9728a758facb,
title = "Big Data Analytical Approaches to the NACC Dataset: Aiding Preclinical Trial Enrichment",
abstract = "Background: Clinical trials increasingly aim to retard disease progression during presymptomatic phases of Mild Cognitive Impairment (MCI) and thus recruiting study participants at high risk for developing MCI is critical for cost-effective prevention trials. However, accurately identifying those who are destined to develop MCI is difficult. Collecting biomarkers is often expensive. Methods: We used only noninvasive clinical variables collected in the National Alzheimer's Coordinating Center (NACC) Uniform Data Sets version 2.0 and applied machine learning techniques to build a low-cost and accurate Mild Cognitive Impairment (MCI) conversion prediction calculator. Cross-validation and bootstrap were used to select as few variables as possible accurately predicting MCI conversion within 4 years. Results: A total of 31,872 unique subjects, 748 clinical variables, and additional 128 derived variables in NACC data sets were used. About 15 noninvasive clinical variables are identified for predicting MCI/aMCI/naMCI converters, respectively. Over 75% Receiver Operating Characteristic Area Under the Curves (ROC AUC) was achieved. By bootstrap we created a simple spreadsheet calculator which estimates the probability of developing MCI within 4 years with a 95% confidence interval. Conclusions: We achieved reasonably high prediction accuracy using only clinical variables. The approach used here could be useful for study enrichment in preclinical trials where enrolling participants at risk of cognitive decline is critical for proving study efficacy, and also for developing a shorter assessment battery.",
keywords = "National Alzheimer's Coordinating Center Uniform Data Set (NACC UDS), ROC AUC, bootstrap, dementia, incidence, machine learning, mild cognitive impairment, prediction, study enrichment",
author = "Ming Lin and Pinghua Gong and Tao Yang and Jieping Ye and Albin, {Roger L.} and Dodge, {Hiroko H.}",
note = "Funding Information: The NACC database is funded by NIA/NIH Grant U01 AG016976. NACC data are contributed by the NIA funded ADCs: P30 AG019610 (PI Eric Reiman, MD), P30 AG013846 (PI Neil Kowall, MD), P50 AG008702 (PI Scott Small, MD), P50 AG025688 (PI Allan Levey, MD, PhD), P50 AG047266 (PI Todd Golde, MD, PhD), P30 AG010133 (PI Andrew Saykin, PsyD), P50 AG005146 (PI Marilyn Albert, PhD), P50 AG005134 (PI Bradley Hyman, MD, PhD), P50 AG016574 (PI Ronald Petersen, MD, PhD), P50 AG005138 (PI Mary Sano, PhD), P30 AG008051 (PI Thomas Wisniewski, MD), P30 AG013854 (PI M. Marsel Mesulam, MD), P30 AG008017 (PI Jeffrey Kaye, MD), P30 AG010161 (PI David Bennett, MD), P50 AG047366 (PI Victor Henderson, MD, MS), P30 AG010129 (PI Charles DeCarli, MD), P50 AG016573 (PI Frank LaFerla, PhD), P50 AG005131 (PI James Brewer, MD, PhD), P50 AG023501 (PI Bruce Miller, MD), P30 AG035982 (PI Russell Swerdlow, MD), P30 AG028383 (PI Linda Van Eldik, PhD), P30 AG053760 (PI Henry Paulson, MD, PhD), P30 AG010124 (PI John Trojanowski, MD, PhD), P50 AG005133 (PI Oscar Lopez, MD), P50 AG005142 (PI Helena Chui, MD), P30 AG012300 (PI Roger Rosenberg, MD), P30 AG049638 (PI Suzanne Craft, PhD), P50 AG005136 (PI Thomas Grabowski, MD), P50 AG033514 (PI Sanjay Asthana, MD, FRCP), P50 AG005681 (PI John Morris, MD), and P50 AG047270 (PI Stephen Strittmatter, MD, PhD). (Stephen Strittmatter, MD, PhD). Funding Information: The following grants supported the current study. National Institute on Aging (P30AG053760, P30AG008017 and R01AG051710) and National Science Foundation (III- 1539991 and III-1539722). The authors also thank Dr Lilah M. Besser at NACC in generating an appropriate data set for this study. The NACC database is funded by NIA/NIH Grant U01 AG016976. NACC data are contributed by the NIA funded ADCs: P30 AG019610 (PI Eric Reiman, MD), P30 AG013846 (PI Neil Kowall, MD), P50 AG008702 (PI Scott Small, MD), P50 AG025688 (PI Allan Levey, MD, PhD), P50 AG047266 (PI Todd Golde, MD, PhD), P30 AG010133 (PI Andrew Saykin, PsyD), P50 AG005146 (PIMarilyn Albert, PhD), P50 AG005134 (PI Bradley Hyman, MD, PhD), P50 AG016574 (PI Ronald Petersen, MD, PhD), P50 AG005138 (PI Mary Sano, PhD), P30 AG008051 (PI Thomas Wisniewski, MD), P30 AG013854 (PI M. Marsel Mesulam, MD), P30 AG008017 (PI Jeffrey Kaye, MD), P30 AG010161 (PI David Bennett, MD), P50 AG047366 (PI Victor Henderson, MD, MS), P30 AG010129 (PI Charles DeCarli, MD), P50 AG016573 (PI Frank LaFerla, PhD), P50 AG005131 (PI James Brewer, MD, PhD), P50 AG023501 (PI Bruce Miller, MD), P30 AG035982 (PI Russell Swerdlow, MD), P30 AG028383 (PI Linda Van Eldik, PhD), P30 AG053760 (PI Henry Paulson, MD, PhD), P30 AG010124 (PI John Trojanowski, MD, PhD), P50 AG005133 (PI Oscar Lopez, MD), P50 AG005142 (PI Helena Chui, MD), P30 AG012300 (PI Roger Rosenberg, MD), P30 AG049638 (PI Suzanne Craft, PhD), P50 AG005136 (PI Thomas Grabowski, MD), P50 AG033514 (PI Sanjay Asthana, MD, FRCP), P50 AG005681 (PI John Morris, MD), and P50 AG047270 (PI Stephen Strittmatter, MD, PhD). (Stephen Strittmatter, MD, PhD). Funding Information: The following grants supported the current study. National Institute on Aging (P30AG053760, P30AG008017 and R01AG051710) and National Science Foundation (III-1539991 and III-1539722). The authors also thank Dr Lilah M. Besser at NACC in generating an appropriate data set for this study. Publisher Copyright: {\textcopyright} 2017 Wolters Kluwer Health, Inc. All rights reserved.",
year = "2018",
doi = "10.1097/WAD.0000000000000228",
language = "English (US)",
volume = "32",
pages = "18--27",
journal = "Alzheimer Disease and Associated Disorders",
issn = "0893-0341",
publisher = "Lippincott Williams and Wilkins",
number = "1",
}