@article{55ac9af02ced4559aa2cfea35872cadb,
title = "Combining accurate tumor genome simulation with crowdsourcing to benchmark somatic structural variant detection",
abstract = "Background: The phenotypes of cancer cells are driven in part by somatic structural variants. Structural variants can initiate tumors, enhance their aggressiveness, and provide unique therapeutic opportunities. Whole-genome sequencing of tumors can allow exhaustive identification of the specific structural variants present in an individual cancer, facilitating both clinical diagnostics and the discovery of novel mutagenic mechanisms. A plethora of somatic structural variant detection algorithms have been created to enable these discoveries; however, there are no systematic benchmarks of them. Rigorous performance evaluation of somatic structural variant detection methods has been challenged by the lack of gold standards, extensive resource requirements, and difficulties arising from the need to share personal genomic information. Results: To facilitate structural variant detection algorithm evaluations, we create a robust simulation framework for somatic structural variants by extending the BAMSurgeon algorithm. We then organize and enable a crowdsourced benchmarking within the ICGC-TCGA DREAM Somatic Mutation Calling Challenge (SMC-DNA). We report here the results of structural variant benchmarking on three different tumors, comprising 204 submissions from 15 teams. In addition to ranking methods, we identify characteristic error profiles of individual algorithms and general trends across them. Surprisingly, we find that ensembles of analysis pipelines do not always outperform the best individual method, indicating a need for new ways to aggregate somatic structural variant detection approaches. Conclusions: The synthetic tumors and somatic structural variant detection leaderboards remain available as a community benchmarking resource, and BAMSurgeon is available at https://github.com/adamewing/bamsurgeon.",
keywords = "Benchmarking, Cancer genomics, Crowdsourcing, Simulation, Somatic mutations, Structural variants, Whole-genome sequencing",
author = "{ICGC-TCGA DREAM Somatic Mutation Calling Challenge Participants} and Lee, {Anna Y.} and Ewing, {Adam D.} and Kyle Ellrott and Yin Hu and Houlahan, {Kathleen E.} and Bare, {J. Christopher} and Espiritu, {Shadrielle Melijah G.} and Vincent Huang and Kristen Dang and Zechen Chong and Cristian Caloian and Yamaguchi, {Takafumi N.} and Kellen, {Michael R.} and Ken Chen and Norman, {Thea C.} and Friend, {Stephen H.} and Justin Guinney and Gustavo Stolovitzky and David Haussler and Margolin, {Adam A.} and Stuart, {Joshua M.} and Boutros, {Paul C.} and Barnes, {Bret D.} and Inanc Birol and Xiaoyu Chen and Readman Chiu and Cox, {Anthony J.} and Li Ding and Fritz, {Markus H.Y.} and Andrey Grigoriev and Faraz Hach and Kawash, {Joseph K.} and Korbel, {Jan O.} and Semyon Kruglyak and Yang Liao and Andrew McPherson and Nip, {Ka Ming} and Tobias Rausch and Sahinalp, {S. Cenk} and Iman Sarrafi and Saunders, {Christopher T.} and Ole Schulz-Trieglaff and Richard Shaw and Wei Shi and Smith, {Sean D.} and Lei Song and Difei Wang and Kai Ye",
note = "Funding Information: This study was conducted with the support of the Ontario Institute for Cancer Research to P.C.B. through funding provided by the Government of Ontario. This work was supported by Prostate Cancer Canada and is proudly funded by the Movember Foundation—Grant #RS2014-01. This study was conducted with the support of Movember funds through Prostate Cancer Canada and with the additional support of the Ontario Institute for Cancer Research, funded by the Government of Ontario. This project was supported by Genome Canada through a Large-Scale Applied Project contract to P.C.B., S.P. Shah, and R.D. Morin. This work was supported by the Discovery Frontiers: Advancing Big Data Science in Genomics Research program, which is jointly funded by the Natural Sciences and Engineering Research Council (NSERC) of Canada, the Canadian Institutes of Health Research (CIHR), Genome Canada, and the Canada Foundation for Innovation (CFI). P.C.B. was supported by a Terry Fox Research Institute New Investigator Award and a CIHR New Investigator Award. K.E.H. was supported by a CIHR Computational Biology Undergraduate Summer Student Health Research Award. A.D.E was supported by an Australian Research Council Discovery Early Career Researcher Award DE150101117 and by the Mater Foundation. The following National Institutes of Health (NIH) grants supported this work: R01-CA180778 (J.M.S.) and U24-CA143858 (J.M.S.). The funders played no role in study design, data collection, data analysis, data interpretation, or in writing of this manuscript. Publisher Copyright: {\textcopyright} 2018 The Author(s).",
year = "2018",
month = nov,
day = "6",
doi = "10.1186/s13059-018-1539-5",
language = "English (US)",
volume = "19",
journal = "Genome Biology",
issn = "1474-7596",
publisher = "BioMed Central",
number = "1",
}