@article{3dfc1ce138e94b6bb75bedfd5568c354,

title = "Parallel globally optimal structure learning of Bayesian networks",

abstract = "Given n random variables and a set of m observations of each of the n variables, the Bayesian network structure learning problem is to learn a directed acyclic graph (DAG) on the n variables such that the implied joint probability distribution best explains the set of observations. Bayesian networks are widely used in many fields including data mining and computational biology. Globally optimal (exact) structure learning of Bayesian networks takes O(n2 · 2n) time plus the cost of O(n · 2n) evaluations of an applicationspecific scoring function whose run-time is at least linear in m. In this paper, we present a parallel algorithm for exact structure learning of a Bayesian network that is communication- efficient and workoptimal up to O(1/n · 2n) processors. We further extend this algorithm to the important restricted case of structure learning with bounded node in-degree and investigate the performance gains achievable because of limiting node in-degree. We demonstrate the applicability of our method by implementation on an IBM Blue Gene/P system and an AMD Opteron InfiniBand cluster and present experimental results that characterize run-time behavior with respect to the number of variables, number of observations, and the bound on in-degree.",

keywords = "Bayesian networks, Graphical models, Machine learning, Parallel algorithm, Structure learning",

author = "Olga Nikolova and Jaroslaw Zola and Srinivas Aluru",

note = "Funding Information: The authors would like to thank Brian Smith from IBM Corporation for carrying out the experimental runs on the Blue Gene/P system located at IBM Rochester. This work is supported in part by the US National Science Foundation under CCF-0811840 , CCF-0751157 , a Swarnajayanti Fellowship from the Government of India, as well as, through TeraGrid resources provided by TACC under TG-ASC110004. Funding Information: Srinivas Aluru is the Ross Martin Mehl and Marylyne Munas Mehl Professor of Computer Engineering at Iowa State University, and Professor of Computer Science and Engineering at Indian Institute of Technology Bombay. Earlier, he held faculty positions at Syracuse University and New Mexico State University. Aluru conducts research in high performance computing, parallel algorithms and applications, bioinformatics and systems biology, combinatorial scientific computing, and applied algorithms. He is a recipient of the NSF Career award, Iowa State University Foundation award for outstanding achievement in research, Swarnajayanti Fellowship from Government of India, two best paper awards (IPDPS 2006 and CSB 2005), and two best paper finalist recognitions (SC 2007 and SC 2002). He serves on the editorial boards of the Journal of Parallel and Distributed Computing, IEEE Transactions on Parallel and Distributed Systems, International Journal of Data Mining and Bioinformatics , and Journal of Computing by the Computer Society of India. He served on numerous program committees in parallel processing and computational biology, including serving as program chair for BCB 2013, IC3 2011, HiPC 2007, program co-chair for BiCoB 2008, and program vice chair for ICPP 2012, BIBM 2009, SC 2008, IPDPS 2007, ICPP 2007 and HiPC 2006. He co-chairs an annual workshop on High Performance Computational Biology ( www.hicomb.org ) and edited a comprehensive handbook on computational molecular biology, published in 2005. He is a Fellow of the American Association for the Advancement of Science (AAAS) and the Institute of Electrical and Electronics Engineers (IEEE).",

year = "2013",

doi = "10.1016/j.jpdc.2013.04.001",

language = "English (US)",

volume = "73",

pages = "1039--1048",

journal = "Journal of Parallel and Distributed Computing",

issn = "0743-7315",

publisher = "Academic Press Inc.",

number = "8",

}