forked from compgenomr/book
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbook.bib
executable file
·5144 lines (4930 loc) · 347 KB
/
book.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@book{friedman2001elements,
title={The elements of statistical learning},
author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
volume={1},
number={10},
year={2001},
publisher={Springer series in statistics New York}
}
@article{hsu2003practical,
title={A practical guide to support vector classification},
author={Hsu, Chih-Wei and Chang, Chih-Chung and Lin, Chih-Jen and others},
year={2003},
publisher={Taipei}
}
@article{lecun2015deep,
title={Deep learning},
author={LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
journal={nature},
volume={521},
number={7553},
pages={436},
year={2015},
publisher={Nature Publishing Group}
}
@inproceedings{boser1992svm,
title={A training algorithm for optimal margin classifiers},
author={Boser, Bernhard E and Guyon, Isabelle M and Vapnik, Vladimir N},
booktitle={Proceedings of the fifth annual workshop on Computational learning theory},
pages={144--152},
year={1992},
organization={ACM}
}
@article{friedman2003mart,
title={Multiple additive regression trees with application in epidemiology},
author={Friedman, Jerome H and Meulman, Jacqueline J},
journal={Statistics in medicine},
volume={22},
number={9},
pages={1365--1381},
year={2003},
publisher={Wiley Online Library}
}
@article{elith2008brt,
title={A working guide to boosted regression trees},
author={Elith, Jane and Leathwick, John R and Hastie, Trevor},
journal={Journal of Animal Ecology},
volume={77},
number={4},
pages={802--813},
year={2008},
publisher={Wiley Online Library}
}
@article{friedman2001gbm,
title={Greedy function approximation: a gradient boosting machine},
author={Friedman, Jerome H},
journal={Annals of statistics},
pages={1189--1232},
year={2001},
publisher={JSTOR}
}
@inproceedings{chen2016xgboost,
title={Xgboost: A scalable tree boosting system},
author={Chen, Tianqi and Guestrin, Carlos},
booktitle={Proceedings of the 22nd acm sigkdd international conference on knowledge discovery and data mining},
pages={785--794},
year={2016},
organization={ACM}
}
@article{horvath2013dna,
title={DNA methylation age of human tissues and cell types},
author={Horvath, Steve},
journal={Genome biology},
volume={14},
number={10},
pages={3156},
year={2013},
publisher={BioMed Central}
}
@article{numata2012dna,
title={DNA methylation signatures in development and aging of the human prefrontal cortex},
author={Numata, Shusuke and Ye, Tianzhang and Hyde, Thomas M and Guitart-Navarro, Xavier and Tao, Ran and Wininger, Michael and Colantuoni, Carlo and Weinberger, Daniel R and Kleinman, Joel E and Lipska, Barbara K},
journal={The American Journal of Human Genetics},
volume={90},
number={2},
pages={260--272},
year={2012},
publisher={Elsevier}
}
@article{zou2005regularization,
title={Regularization and variable selection via the elastic net},
author={Zou, Hui and Hastie, Trevor},
journal={Journal of the royal statistical society: series B (statistical methodology)},
volume={67},
number={2},
pages={301--320},
year={2005},
publisher={Wiley Online Library}
}
@article{friedman2010regularization,
title={Regularization paths for generalized linear models via coordinate descent},
author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Rob},
journal={Journal of statistical software},
volume={33},
number={1},
pages={1},
year={2010},
publisher={NIH Public Access}
}
@article{tibshirani1996regression,
title={Regression shrinkage and selection via the lasso},
author={Tibshirani, Robert},
journal={Journal of the Royal Statistical Society: Series B (Methodological)},
volume={58},
number={1},
pages={267--288},
year={1996},
publisher={Wiley Online Library}
}
@article{hoerl1970ridge,
title={Ridge regression: Biased estimation for nonorthogonal problems},
author={Hoerl, Arthur E and Kennard, Robert W},
journal={Technometrics},
volume={12},
number={1},
pages={55--67},
year={1970},
publisher={Taylor \& Francis Group}
}
@article{breiman2001random,
title={Random forests},
author={Breiman, Leo},
journal={Machine learning},
volume={45},
number={1},
pages={5--32},
year={2001},
publisher={Springer}
}
@article{smote,
title={SMOTE: synthetic minority over-sampling technique},
author={Chawla, Nitesh V and Bowyer, Kevin W and Hall, Lawrence O and Kegelmeyer, W Philip},
journal={Journal of artificial intelligence research},
volume={16},
pages={321--357},
year={2002}
}
@Article{enhancerImbalance,
Author="Libbrecht, M. W. and Noble, W. S. ",
Title="{{M}achine learning applications in genetics and genomics}",
Journal="Nat. Rev. Genet.",
Year="2015",
Volume="16",
Number="6",
Pages="321--332",
Month="Jun"
}
@article{mcr,
title={All Models are Wrong but many are Useful: Variable Importance for Black-Box, Proprietary, or Misspecified Prediction Models, using Model Class Reliance},
author={Fisher, Aaron and Rudin, Cynthia and Dominici, Francesca},
journal={arXiv preprint arXiv:1801.01489},
year={2018}
}
@Article{dalex,
title = {DALEX: Explainers for Complex Predictive Models in R},
author = {Przemyslaw Biecek},
journal = {Journal of Machine Learning Research},
year = {2018},
volume = {19},
pages = {1-5},
number = {84},
url = {http://jmlr.org/papers/v19/18-416.html},
}
@Article{pmid20399149,
Author="Noushmehr, H. and Weisenberger, D. J. and Diefes, K. and Phillips, H. S. and Pujara, K. and Berman, B. P. and Pan, F. and Pelloski, C. E. and Sulman, E. P. and Bhat, K. P. and Verhaak, R. G. and Hoadley, K. A. and Hayes, D. N. and Perou, C. M. and Schmidt, H. K. and Ding, L. and Wilson, R. K. and Van Den Berg, D. and Shen, H. and Bengtsson, H. and Neuvial, P. and Cope, L. M. and Buckley, J. and Herman, J. G. and Baylin, S. B. and Laird, P. W. and Aldape, K. ",
Title="{{I}dentification of a {C}p{G} island methylator phenotype that defines a distinct subgroup of glioma}",
Journal="Cancer Cell",
Year="2010",
Volume="17",
Number="5",
Pages="510--522",
Month="May"
}
@Article{pmid25750696,
Author="Kourou, K. and Exarchos, T. P. and Exarchos, K. P. and Karamouzis, M. V. and Fotiadis, D. I. ",
Title="{{M}achine learning applications in cancer prognosis and prediction}",
Journal="Comput Struct Biotechnol J",
Year="2015",
Volume="13",
Pages="8--17"
}
@Article{pmid30247488,
Author="Poplin, R. and Chang, P. C. and Alexander, D. and Schwartz, S. and Colthurst, T. and Ku, A. and Newburger, D. and Dijamco, J. and Nguyen, N. and Afshar, P. T. and Gross, S. S. and Dorfman, L. and McLean, C. Y. and DePristo, M. A. ",
Title="{{A} universal {S}{N}{P} and small-indel variant caller using deep neural networks}",
Journal="Nat. Biotechnol.",
Year="2018",
Volume="36",
Number="10",
Pages="983--987",
Month="11"
}
% 26301843
@Article{pmid26301843,
Author="Zhou, J. and Troyanskaya, O. G. ",
Title="{{P}redicting effects of noncoding variants with deep learning-based sequence model}",
Journal="Nat. Methods",
Year="2015",
Volume="12",
Number="10",
Pages="931--934",
Month="Oct"
}
@Article{pmid21428770,
Author="Wang, L. and McLeod, H. L. and Weinshilboum, R. M. ",
Title="{{G}enomics and drug response}",
Journal="N. Engl. J. Med.",
Year="2011",
Volume="364",
Number="12",
Pages="1144--1153",
Month="Mar"
}
@Article{pmid22328731,
Author="Fernandez, M. and Miranda-Saavedra, D. ",
Title="{{G}enome-wide enhancer prediction from epigenetic signatures using genetic algorithm-optimized support vector machines}",
Journal="Nucleic Acids Res.",
Year="2012",
Volume="40",
Number="10",
Pages="e77",
Month="May"
}
@Article{pmid22950368,
Author="Dong, X. and Greven, M. C. and Kundaje, A. and Djebali, S. and Brown, J. B. and Cheng, C. and Gingeras, T. R. and Gerstein, M. and Guigo, R. and Birney, E. and Weng, Z. ",
Title="{{M}odeling gene expression using chromatin features in various cellular contexts}",
Journal="Genome Biol.",
Year="2012",
Volume="13",
Number="9",
Pages="R53",
Month="Jun"
}
@Article{pmid12364589,
Author="Mathe, C. and Sagot, M. F. and Schiex, T. and Rouze, P. ",
Title="{{C}urrent methods of gene prediction, their strengths and weaknesses}",
Journal="Nucleic Acids Res.",
Year="2002",
Volume="30",
Number="19",
Pages="4103--4117",
Month="Oct"
}
@ARTICLE{Park2014-sr,
title = "{MethylSig}: a whole genome {DNA} methylation analysis pipeline",
author = "Park, Yongseok and Figueroa, Maria E and Rozek, Laura S and
Sartor, Maureen A",
abstract = "MOTIVATION: DNA methylation plays critical roles in gene
regulation and cellular specification without altering DNA
sequences. The wide application of reduced representation
bisulfite sequencing (RRBS) and whole genome bisulfite sequencing
(bis-seq) opens the door to study DNA methylation at single CpG
site resolution. One challenging question is how best to test for
significant methylation differences between groups of biological
samples in order to minimize false positive findings. RESULTS: We
present a statistical analysis package, methylSig, to analyse
genome-wide methylation differences between samples from
different treatments or disease groups. MethylSig takes into
account both read coverage and biological variation by utilizing
a beta-binomial approach across biological samples for a CpG site
or region, and identifies relevant differences in CpG
methylation. It can also incorporate local information to improve
group methylation level and/or variance estimation for
experiments with small sample size. A permutation study based on
data from enhanced RRBS samples shows that methylSig maintains a
well-calibrated type-I error when the number of samples is three
or more per group. Our simulations show that methylSig has higher
sensitivity compared with several alternative methods. The use of
methylSig is illustrated with a comparison of different subtypes
of acute leukemia and normal bone marrow samples. AVAILABILITY:
methylSig is available as an R package at
http://sartorlab.ccmb.med.umich.edu/software. SUPPLEMENTARY
INFORMATION: Supplementary data are available at Bioinformatics
online.",
journal = "Bioinformatics",
volume = 30,
number = 17,
pages = "2414--2422",
month = sep,
year = 2014,
language = "en"
}
@ARTICLE{Xie2013-cf,
title = "Epigenomic analysis of multilineage differentiation of human
embryonic stem cells",
author = "Xie, Wei and Schultz, Matthew D and Lister, Ryan and Hou,
Zhonggang and Rajagopal, Nisha and Ray, Pradipta and Whitaker,
John W and Tian, Shulan and Hawkins, R David and Leung, Danny and
Yang, Hongbo and Wang, Tao and Lee, Ah Young and Swanson, Scott A
and Zhang, Jiuchun and Zhu, Yun and Kim, Audrey and Nery, Joseph
R and Urich, Mark A and Kuan, Samantha and Yen, Chia-An and
Klugman, Sarit and Yu, Pengzhi and Suknuntha, Kran and Propson,
Nicholas E and Chen, Huaming and Edsall, Lee E and Wagner, Ulrich
and Li, Yan and Ye, Zhen and Kulkarni, Ashwinikumar and Xuan,
Zhenyu and Chung, Wen-Yu and Chi, Neil C and Antosiewicz-Bourget,
Jessica E and Slukvin, Igor and Stewart, Ron and Zhang, Michael Q
and Wang, Wei and Thomson, James A and Ecker, Joseph R and Ren,
Bing",
abstract = "Epigenetic mechanisms have been proposed to play crucial roles in
mammalian development, but their precise functions are only
partially understood. To investigate epigenetic regulation of
embryonic development, we differentiated human embryonic stem
cells into mesendoderm, neural progenitor cells, trophoblast-like
cells, and mesenchymal stem cells and systematically
characterized DNA methylation, chromatin modifications, and the
transcriptome in each lineage. We found that promoters that are
active in early developmental stages tend to be CG rich and
mainly engage H3K27me3 upon silencing in nonexpressing lineages.
By contrast, promoters for genes expressed preferentially at
later stages are often CG poor and primarily employ DNA
methylation upon repression. Interestingly, the early
developmental regulatory genes are often located in large genomic
domains that are generally devoid of DNA methylation in most
lineages, which we termed DNA methylation valleys (DMVs). Our
results suggest that distinct epigenetic mechanisms regulate
early and late stages of ES cell differentiation.",
journal = "Cell",
volume = 153,
number = 5,
pages = "1134--1148",
month = may,
year = 2013,
language = "en"
}
@ARTICLE{Xie2013-ol,
title = "Dynamic trans-acting factor colocalization in human cells",
author = "Xie, Dan and Boyle, Alan P and Wu, Linfeng and Zhai, Jie and
Kawli, Trupti and Snyder, Michael",
abstract = "Different trans-acting factors (TFs) collaborate and act in
concert at distinct loci to perform accurate regulation of their
target genes. To date, the cobinding of TF pairs has been
investigated in a limited context both in terms of the number of
factors within a cell type and across cell types and the extent
of combinatorial colocalizations. Here, we use an approach to
analyze TF colocalization within a cell type and across multiple
cell lines at an unprecedented level. We extend this approach
with large-scale mass spectrometry analysis of
immunoprecipitations of 50 TFs. Our combined approach reveals
large numbers of interesting TF-TF associations. We observe
extensive change in TF colocalizations both within a cell type
exposed to different conditions and across multiple cell types.
We show distinct functional annotations and properties of
different TF cobinding patterns and provide insights into the
complex regulatory landscape of the cell.",
journal = "Cell",
volume = 155,
number = 3,
pages = "713--724",
month = oct,
year = 2013,
language = "en"
}
@ARTICLE{Landan2012-id,
title = "Epigenetic polymorphism and the stochastic formation of
differentially methylated regions in normal and cancerous tissues",
author = "Landan, Gilad and Cohen, Netta Mendelson and Mukamel, Zohar and
Bar, Amir and Molchadsky, Alina and Brosh, Ran and Horn-Saban,
Shirley and Zalcenstein, Daniela Amann and Goldfinger, Naomi and
Zundelevich, Adi and Gal-Yam, Einav Nili and Rotter, Varda and
Tanay, Amos",
journal = "Nat. Genet.",
volume = 44,
number = 11,
pages = "1207--1214",
year = 2012
}
@ARTICLE{Bock2012-zm,
title = "Analysing and interpreting {DNA} methylation data",
author = "Bock, Christoph",
abstract = "DNA methylation is an epigenetic mark that has suspected
regulatory roles in a broad range of biological processes and
diseases. The technology is now available for studying DNA
methylation genome-wide, at a high resolution and in a large
number of samples. This Review discusses relevant concepts,
computational methods and software tools for analysing and
interpreting DNA methylation data. It focuses not only on the
bioinformatic challenges of large epigenome-mapping projects and
epigenome-wide association studies but also highlights software
tools that make genome-wide DNA methylation mapping more
accessible for laboratories with limited bioinformatics
experience.",
journal = "Nat. Rev. Genet.",
volume = 13,
number = 10,
pages = "705--719",
month = oct,
year = 2012,
language = "en"
}
@ARTICLE{Akalin2015-yk,
title = "genomation: a toolkit to summarize, annotate and visualize
genomic intervals",
author = "Akalin, Altuna and Franke, Vedran and Vlahovi{\v c}ek, Kristian
and Mason, Christopher E and Sch{\"u}beler, Dirk",
abstract = "UNLABELLED: Biological insights can be obtained through
computational integration of genomics data sets consisting of
diverse types of information. The integration is often hampered
by a large variety of existing file formats, often containing
similar information, and the necessity to use complicated tools
to achieve the desired results. We have built an R package,
genomation, to expedite the extraction of biological information
from high throughput data. The package works with a variety of
genomic interval file types and enables easy summarization and
annotation of high throughput data sets with given genomic
annotations. AVAILABILITY AND IMPLEMENTATION: The software is
currently distributed under MIT artistic license and freely
available at http://bioinformatics.mdc-berlin.de/genomation, and
through the Bioconductor framework. CONTACT:
journal = "Bioinformatics",
volume = 31,
number = 7,
pages = "1127--1129",
month = apr,
year = 2015
}
@MISC{Wreczycka2017-yt,
title = "{HOT} or not: Examining the basis of high-occupancy target regions",
author = "Wreczycka, Katarzyna and Franke, Vedran and Uyar, Bora and Wurmus,
Ricardo and Akalin, Altuna",
year = 2017
}
@ARTICLE{Akalin2012-ve,
title = "Base-pair resolution {DNA} methylation sequencing reveals
profoundly divergent epigenetic landscapes in acute myeloid
leukemia",
author = "Akalin, Altuna and Garrett-Bakelman, Francine E and Kormaksson,
Matthias and Busuttil, Jennifer and Zhang, Lu and Khrebtukova,
Irina and Milne, Thomas A and Huang, Yongsheng and Biswas,
Debabrata and Hess, Jay L and Allis, C David and Roeder, Robert G
and Valk, Peter J M and L{\"o}wenberg, Bob and Delwel, Ruud and
Fernandez, Hugo F and Paietta, Elisabeth and Tallman, Martin S
and Schroth, Gary P and Mason, Christopher E and Melnick, Ari and
Figueroa, Maria E",
abstract = "We have developed an enhanced form of reduced representation
bisulfite sequencing with extended genomic coverage, which
resulted in greater capture of DNA methylation information of
regions lying outside of traditional CpG islands. Applying this
method to primary human bone marrow specimens from patients with
Acute Myelogeneous Leukemia (AML), we demonstrated that
genetically distinct AML subtypes display diametrically opposed
DNA methylation patterns. As compared to normal controls, we
observed widespread hypermethylation in IDH mutant AMLs,
preferentially targeting promoter regions and CpG islands
neighboring the transcription start sites of genes. In contrast,
AMLs harboring translocations affecting the MLL gene displayed
extensive loss of methylation of an almost mutually exclusive set
of CpGs, which instead affected introns and distal intergenic CpG
islands and shores. When analyzed in conjunction with gene
expression profiles, it became apparent that these specific
patterns of DNA methylation result in differing roles in gene
expression regulation. However, despite this subtype-specific DNA
methylation patterning, a much smaller set of CpG sites are
consistently affected in both AML subtypes. Most CpG sites in
this common core of aberrantly methylated CpGs were
hypermethylated in both AML subtypes. Therefore, aberrant DNA
methylation patterns in AML do not occur in a stereotypical
manner but rather are highly specific and associated with
specific driving genetic lesions.",
journal = "PLoS Genet.",
volume = 8,
number = 6,
pages = "e1002781",
month = jun,
year = 2012
}
@ARTICLE{Saito2014-ij,
title = "Bisulfighter: accurate detection of methylated cytosines and
differentially methylated regions",
author = "Saito, Yutaka and Tsuji, Junko and Mituyama, Toutai",
abstract = "Analysis of bisulfite sequencing data usually requires two tasks:
to call methylated cytosines (mCs) in a sample, and to detect
differentially methylated regions (DMRs) between paired samples.
Although numerous tools have been proposed for mC calling,
methods for DMR detection have been largely limited. Here, we
present Bisulfighter, a new software package for detecting mCs
and DMRs from bisulfite sequencing data. Bisulfighter combines
the LAST alignment tool for mC calling, and a novel framework for
DMR detection based on hidden Markov models (HMMs). Unlike
previous attempts that depend on empirical parameters,
Bisulfighter can use the expectation-maximization algorithm for
HMMs to adjust parameters for each data set. We conduct extensive
experiments in which accuracy of mC calling and DMR detection is
evaluated on simulated data with various mC contexts, read
qualities, sequencing depths and DMR lengths, as well as on real
data from a wide range of biological processes. We demonstrate
that Bisulfighter consistently achieves better accuracy than
other published tools, providing greater sensitivity for mCs with
fewer false positives, more precise estimates of mC levels, more
exact locations of DMRs and better agreement of DMRs with gene
expression and DNase I hypersensitivity. The source code is
available at http://epigenome.cbrc.jp/bisulfighter.",
journal = "Nucleic Acids Res.",
volume = 42,
number = 6,
pages = "e45",
month = apr,
year = 2014,
language = "en"
}
@ARTICLE{Frith2012-ne,
title = "A mostly traditional approach improves alignment of
bisulfite-converted {DNA}",
author = "Frith, Martin C and Mori, Ryota and Asai, Kiyoshi",
abstract = "Cytosines in genomic DNA are sometimes methylated. This affects
many biological processes and diseases. The standard way of
measuring methylation is to use bisulfite, which converts
unmethylated cytosines to thymines, then sequence the DNA and
compare it to a reference genome sequence. We describe a method
for the critical step of aligning the DNA reads to the correct
genomic locations. Our method builds on classic alignment
techniques, including likelihood-ratio scores and spaced seeds.
In a realistic benchmark, our method has a better combination of
sensitivity, specificity and speed than nine other
high-throughput bisulfite aligners. This study enables more
accurate and rational analysis of DNA methylation. It also
illustrates how to adapt general-purpose alignment methods to a
special case with distorted base patterns: this should be
informative for other special cases such as ancient DNA and
AT-rich genomes.",
journal = "Nucleic Acids Res.",
volume = 40,
number = 13,
pages = "e100",
month = jul,
year = 2012,
language = "en"
}
@ARTICLE{Hovestadt2014-kd,
title = "Decoding the regulatory landscape of medulloblastoma using {DNA}
methylation sequencing",
author = "Hovestadt, Volker and Jones, David T W and Picelli, Simone and
Wang, Wei and Kool, Marcel and Northcott, Paul A and Sultan, Marc
and Stachurski, Katharina and Ryzhova, Marina and Warnatz,
Hans-J{\"o}rg and Ralser, Meryem and Brun, Sonja and Bunt, Jens
and J{\"a}ger, Natalie and Kleinheinz, Kortine and Erkek, Serap
and Weber, Ursula D and Bartholomae, Cynthia C and von Kalle,
Christof and Lawerenz, Chris and Eils, J{\"u}rgen and Koster, Jan
and Versteeg, Rogier and Milde, Till and Witt, Olaf and Schmidt,
Sabine and Wolf, Stephan and Pietsch, Torsten and Rutkowski,
Stefan and Scheurlen, Wolfram and Taylor, Michael D and Brors,
Benedikt and Felsberg, J{\"o}rg and Reifenberger, Guido and
Borkhardt, Arndt and Lehrach, Hans and Wechsler-Reya, Robert J
and Eils, Roland and Yaspo, Marie-Laure and Landgraf, Pablo and
Korshunov, Andrey and Zapatka, Marc and Radlwimmer, Bernhard and
Pfister, Stefan M and Lichter, Peter",
abstract = "Epigenetic alterations, that is, disruption of DNA methylation
and chromatin architecture, are now acknowledged as a universal
feature of tumorigenesis. Medulloblastoma, a clinically
challenging, malignant childhood brain tumour, is no exception.
Despite much progress from recent genomics studies, with
recurrent changes identified in each of the four distinct tumour
subgroups (WNT-pathway-activated, SHH-pathway-activated, and the
less-well-characterized Group 3 and Group 4), many cases still
lack an obvious genetic driver. Here we present whole-genome
bisulphite-sequencing data from thirty-four human and five murine
tumours plus eight human and three murine normal controls,
augmented with matched whole-genome, RNA and chromatin
immunoprecipitation sequencing data. This comprehensive data set
allowed us to decipher several features underlying the interplay
between the genome, epigenome and transcriptome, and its effects
on medulloblastoma pathophysiology. Most notable were highly
prevalent regions of hypomethylation correlating with increased
gene expression, extending tens of kilobases downstream of
transcription start sites. Focal regions of low methylation
linked to transcription-factor-binding sites shed light on
differential transcriptional networks between subgroups, whereas
increased methylation due to re-normalization of repressed
chromatin in DNA methylation valleys was positively correlated
with gene expression. Large, partially methylated domains
affecting up to one-third of the genome showed increased mutation
rates and gene silencing in a subgroup-specific fashion.
Epigenetic alterations also affected novel medulloblastoma
candidate genes (for example, LIN28B), resulting in alternative
promoter usage and/or differential messenger RNA/microRNA
expression. Analysis of mouse medulloblastoma and precursor-cell
methylation demonstrated a somatic origin for many alterations.
Our data provide insights into the epigenetic regulation of
transcription and genome organization in medulloblastoma
pathogenesis, which are probably also of importance in a wider
developmental and disease context.",
journal = "Nature",
volume = 510,
number = 7506,
pages = "537--541",
month = jun,
year = 2014,
language = "en"
}
@ARTICLE{Stirzaker2014-ao,
title = "Mining cancer methylomes: prospects and challenges",
author = "Stirzaker, Clare and Taberlay, Phillippa C and Statham, Aaron L
and Clark, Susan J",
abstract = "There are over 28 million CpG sites in the human genome.
Assessing the methylation status of each of these sites will be
required to understand fully the role of DNA methylation in
health and disease. Genome-wide analysis, using arrays and
high-throughput sequencing, has enabled assessment of large
fractions of the methylome, but each protocol comes with unique
advantages and disadvantages. Notably, except for whole-genome
bisulfite sequencing, most commonly used genome-wide methods
detect <5\% of all CpG sites. Here, we discuss approaches for
methylome studies and compare genome coverage of promoters,
genes, and intergenic regions, and capacity to quantitate
individual CpG methylation states. Finally, we examine the extent
of published cancer methylomes that have been generated using
genome-wide approaches.",
journal = "Trends Genet.",
volume = 30,
number = 2,
pages = "75--84",
month = feb,
year = 2014,
keywords = "DNA methylation; cancer methylome; epigenetics",
language = "en"
}
@INCOLLECTION{Baubec2016-pt,
title = "{Genome-Wide} Analysis of {DNA} Methylation Patterns by
{High-Throughput} Sequencing",
booktitle = "Field Guidelines for Genetic Experimental Designs in
{High-Throughput} Sequencing",
author = "Baubec, Tuncay and Akalin, Altuna",
pages = "197--221",
year = 2016
}
@ARTICLE{Wang2015-of,
title = "{swDMR}: A Sliding Window Approach to Identify Differentially
Methylated Regions Based on Whole Genome Bisulfite Sequencing",
author = "Wang, Zhen and Li, Xianfeng and Jiang, Yi and Shao, Qianzhi and
Liu, Qi and Chen, Bingyu and Huang, Dongsheng",
abstract = "DNA methylation is a widespread epigenetic modification that
plays an essential role in gene expression through
transcriptional regulation and chromatin remodeling. The
emergence of whole genome bisulfite sequencing (WGBS) represents
an important milestone in the detection of DNA methylation.
Characterization of differential methylated regions (DMRs) is
fundamental as well for further functional analysis. In this
study, we present swDMR (http://sourceforge.net/projects/swDMR/)
for the comprehensive analysis of DMRs from whole genome
methylation profiles by a sliding window approach. It is an
integrated tool designed for WGBS data, which not only implements
accessible statistical methods to perform hypothesis test adapted
to two or more samples without replicates, but false discovery
rate was also controlled by multiple test correction. Downstream
analysis tools were also provided, including cluster, annotation
and visualization modules. In summary, based on WGBS data, swDMR
can produce abundant information of differential methylated
regions. As a convenient and flexible tool, we believe swDMR will
bring us closer to unveil the potential functional regions
involved in epigenetic regulation.",
journal = "PLoS One",
volume = 10,
number = 7,
pages = "e0132866",
month = jul,
year = 2015,
language = "en"
}
@ARTICLE{Booth2012-nl,
title = "Quantitative sequencing of 5-methylcytosine and
5-hydroxymethylcytosine at single-base resolution",
author = "Booth, Michael J and Branco, Miguel R and Ficz, Gabriella and
Oxley, David and Krueger, Felix and Reik, Wolf and
Balasubramanian, Shankar",
abstract = "5-Methylcytosine can be converted to 5-hydroxymethylcytosine
(5hmC) in mammalian DNA by the ten-eleven translocation (TET)
enzymes. We introduce oxidative bisulfite sequencing (oxBS-Seq),
the first method for quantitative mapping of 5hmC in genomic DNA
at single-nucleotide resolution. Selective chemical oxidation of
5hmC to 5-formylcytosine (5fC) enables bisulfite conversion of
5fC to uracil. We demonstrate the utility of oxBS-Seq to map and
quantify 5hmC at CpG islands (CGIs) in mouse embryonic stem (ES)
cells and identify 800 5hmC-containing CGIs that have on average
3.3\% hydroxymethylation. High levels of 5hmC were found in CGIs
associated with transcriptional regulators and in long
interspersed nuclear elements, suggesting that these regions
might undergo epigenetic reprogramming in ES cells. Our results
open new questions on 5hmC dynamics and sequence-specific
targeting by TETs.",
journal = "Science",
volume = 336,
number = 6083,
pages = "934--937",
month = may,
year = 2012,
language = "en"
}
@ARTICLE{Harris2012-hz,
title = "{BRAT-BW}: efficient and accurate mapping of bisulfite-treated
reads",
author = "Harris, Elena Y and Ponts, Nadia and Le Roch, Karine G and
Lonardi, Stefano",
abstract = "SUMMARY: We introduce BRAT-BW, a fast, accurate and
memory-efficient tool that maps bisulfite-treated short reads
(BS-seq) to a reference genome using the FM-index
(Burrows-Wheeler transform). BRAT-BW is significantly more memory
efficient and faster on longer reads than current
state-of-the-art tools for BS-seq data, without compromising on
accuracy. BRAT-BW is a part of a software suite for genome-wide
single base-resolution methylation data analysis that supports
single and paired-end reads and includes a tool for estimation of
methylation level at each cytosine. AVAILABILITY: The software is
available in the public domain at
http://compbio.cs.ucr.edu/brat/.",
journal = "Bioinformatics",
volume = 28,
number = 13,
pages = "1795--1796",
month = jul,
year = 2012,
language = "en"
}
@ARTICLE{Yu2012-wm,
title = "Tet-assisted bisulfite sequencing of 5-hydroxymethylcytosine",
author = "Yu, Miao and Hon, Gary C and Szulwach, Keith E and Song,
Chun-Xiao and Jin, Peng and Ren, Bing and He, Chuan",
abstract = "A complete understanding of the potential function of
5-hydroxymethylcytosine (5-hmC), a DNA cytosine modification in
mammalian cells, requires an accurate single-base resolution
sequencing method. Here we describe a modified
bisulfite-sequencing method, Tet-assisted bisulfite sequencing
(TAB-seq), which can identify 5-hmC at single-base resolution, as
well as determine its abundance at each modification site. This
protocol involves $\beta$-glucosyltransferase
($\beta$-GT)-mediated protection of 5-hmC (glucosylation) and
recombinant mouse Tet1(mTet1)-mediated oxidation of
5-methylcytosine (5-mC) to 5-carboxylcytosine (5-caC). After the
subsequent bisulfite treatment and PCR amplification, both
cytosine and 5-caC (derived from 5-mC) are converted to thymine
(T), whereas 5-hmC reads as C. The treated genomic DNA is
suitable for both whole-genome and locus-specific sequencing. The
entire procedure (which does not include data analysis) can be
completed in 14 d for whole-genome sequencing or 7 d for
locus-specific sequencing.",
journal = "Nat. Protoc.",
volume = 7,
number = 12,
pages = "2159--2170",
month = dec,
year = 2012,
language = "en"
}
@ARTICLE{Zhu2010-ny,
title = "{ChIPpeakAnno}: a Bioconductor package to annotate {ChIP-seq} and
{ChIP-chip} data",
author = "Zhu, Lihua J and Gazin, Claude and Lawson, Nathan D and
Pag{\`e}s, Herv{\'e} and Lin, Simon M and Lapointe, David S and
Green, Michael R",
abstract = "BACKGROUND: Chromatin immunoprecipitation (ChIP) followed by
high-throughput sequencing (ChIP-seq) or ChIP followed by genome
tiling array analysis (ChIP-chip) have become standard
technologies for genome-wide identification of DNA-binding
protein target sites. A number of algorithms have been developed
in parallel that allow identification of binding sites from
ChIP-seq or ChIP-chip datasets and subsequent visualization in
the University of California Santa Cruz (UCSC) Genome Browser as
custom annotation tracks. However, summarizing these tracks can
be a daunting task, particularly if there are a large number of
binding sites or the binding sites are distributed widely across
the genome. RESULTS: We have developed ChIPpeakAnno as a
Bioconductor package within the statistical programming
environment R to facilitate batch annotation of enriched peaks
identified from ChIP-seq, ChIP-chip, cap analysis of gene
expression (CAGE) or any experiments resulting in a large number
of enriched genomic regions. The binding sites annotated with
ChIPpeakAnno can be viewed easily as a table, a pie chart or
plotted in histogram form, i.e., the distribution of distances to
the nearest genes for each set of peaks. In addition, we have
implemented functionalities for determining the significance of
overlap between replicates or binding sites among transcription
factors within a complex, and for drawing Venn diagrams to
visualize the extent of the overlap between replicates.
Furthermore, the package includes functionalities to retrieve
sequences flanking putative binding sites for PCR amplification,
cloning, or motif discovery, and to identify Gene Ontology (GO)
terms associated with adjacent genes. CONCLUSIONS: ChIPpeakAnno
enables batch annotation of the binding sites identified from
ChIP-seq, ChIP-chip, CAGE or any technology that results in a
large number of enriched genomic regions within the statistical
programming environment R. Allowing users to pass their own
annotation data such as a different Chromatin immunoprecipitation
(ChIP) preparation and a dataset from literature, or existing
annotation packages, such as GenomicFeatures and BSgenome,
provides flexibility. Tight integration to the biomaRt package
enables up-to-date annotation retrieval from the BioMart
database.",
journal = "BMC Bioinformatics",
volume = 11,
pages = "237",
month = may,
year = 2010,
language = "en"
}
@ARTICLE{Lister2013-vs,
title = "Global Epigenomic Reconfiguration During Mammalian Brain
Development",
author = "Lister, R and Mukamel, E A and Nery, J R and Urich, M and
Puddifoot, C A and Johnson, N D and Lucero, J and Huang, Y and
Dwork, A J and Schultz, M D and Yu, M and Tonti-Filippini, J and
Heyn, H and Hu, S and Wu, J C and Rao, A and Esteller, M and He, C
and Haghighi, F G and Sejnowski, T J and Behrens, M M and Ecker, J
R",
journal = "Science",
volume = 341,
number = 6146,
pages = "1237905--1237905",
year = 2013
}
@ARTICLE{Feng2014-pd,
title = "A Bayesian hierarchical model to detect differentially methylated
loci from single nucleotide resolution sequencing data",
author = "Feng, Hao and Conneely, Karen N and Wu, Hao",
abstract = "DNA methylation is an important epigenetic modification that has
essential roles in cellular processes including gene regulation,
development and disease and is widely dysregulated in most types
of cancer. Recent advances in sequencing technology have enabled
the measurement of DNA methylation at single nucleotide
resolution through methods such as whole-genome bisulfite
sequencing and reduced representation bisulfite sequencing. In
DNA methylation studies, a key task is to identify differences
under distinct biological contexts, for example, between tumor
and normal tissue. A challenge in sequencing studies is that the
number of biological replicates is often limited by the costs of
sequencing. The small number of replicates leads to unstable
variance estimation, which can reduce accuracy to detect
differentially methylated loci (DML). Here we propose a novel
statistical method to detect DML when comparing two treatment
groups. The sequencing counts are described by a
lognormal-beta-binomial hierarchical model, which provides a
basis for information sharing across different CpG sites. A Wald
test is developed for hypothesis testing at each CpG site.
Simulation results show that the proposed method yields improved
DML detection compared to existing methods, particularly when the
number of replicates is low. The proposed method is implemented
in the Bioconductor package DSS.",
journal = "Nucleic Acids Res.",
volume = 42,
number = 8,
pages = "e69",
month = apr,
year = 2014,
language = "en"
}
@ARTICLE{Ryan2014-im,
title = "Bison: bisulfite alignment on nodes of a cluster",
author = "Ryan, Devon Patrick and Ehninger, Dan",
abstract = "BACKGROUND: DNA methylation changes are associated with a wide
array of biological processes. Bisulfite conversion of DNA
followed by high-throughput sequencing is increasingly being used
to assess genome-wide methylation at single-base resolution. The
relative slowness of most commonly used aligners for processing
such data introduces an unnecessarily long delay between receipt
of raw data and statistical analysis. While this process can be
sped-up by using computer clusters, current tools are not
designed with them in mind and end-users must create such
implementations themselves. RESULTS: Here, we present a novel
BS-seq aligner, Bison, which exploits multiple nodes of a
computer cluster to speed up this process and also has increased
accuracy. Bison is accompanied by a variety of helper programs
and scripts to ease, as much as possible, the process of quality
control and preparing results for statistical analysis by a
variety of popular R packages. Bison is also accompanied by
bison\_herd, a variant of Bison with the same output but that can
scale to a semi-arbitrary number of nodes, with concomitant
increased demands on the underlying message passing interface
implementation. CONCLUSIONS: Bison is a new bisulfite-converted
short-read aligner providing end users easier scalability for
performance gains, more accurate alignments, and a convenient
pathway for quality controlling alignments and converting
methylation calls into a form appropriate for statistical
analysis. Bison and the more scalable bison\_herd are natively
able to utilize multiple nodes of a computer cluster
simultaneously and serve to simplify to the process of creating
analysis pipelines.",
journal = "BMC Bioinformatics",
volume = 15,
pages = "337",
month = oct,
year = 2014,
language = "en"
}
@ARTICLE{Song2013-cr,
title = "A reference methylome database and analysis pipeline to
facilitate integrative and comparative epigenomics",
author = "Song, Qiang and Decato, Benjamin and Hong, Elizabeth E and Zhou,
Meng and Fang, Fang and Qu, Jianghan and Garvin, Tyler and
Kessler, Michael and Zhou, Jun and Smith, Andrew D",
abstract = "DNA methylation is implicated in a surprising diversity of
regulatory, evolutionary processes and diseases in eukaryotes.
The introduction of whole-genome bisulfite sequencing has enabled
the study of DNA methylation at a single-base resolution,
revealing many new aspects of DNA methylation and highlighting
the usefulness of methylome data in understanding a variety of
genomic phenomena. As the number of publicly available
whole-genome bisulfite sequencing studies reaches into the
hundreds, reliable and convenient tools for comparing and
analyzing methylomes become increasingly important. We present
MethPipe, a pipeline for both low and high-level methylome
analysis, and MethBase, an accompanying database of annotated
methylomes from the public domain. Together these resources
enable researchers to extract interesting features from
methylomes and compare them with those identified in public
methylomes in our database.",
journal = "PLoS One",
volume = 8,
number = 12,
pages = "e81148",
month = dec,
year = 2013,
language = "en"
}
@ARTICLE{Schubeler2015-ai,
title = "Function and information content of {DNA} methylation",
author = "Sch{\"u}beler, Dirk",
abstract = "Cytosine methylation is a DNA modification generally associated
with transcriptional silencing. Factors that regulate methylation
have been linked to human disease, yet how they contribute to
malignances remains largely unknown. Genomic maps of DNA
methylation have revealed unexpected dynamics at gene regulatory
regions, including active demethylation by TET proteins at
binding sites for transcription factors. These observations
indicate that the underlying DNA sequence largely accounts for
local patterns of methylation. As a result, this mark is highly
informative when studying gene regulation in normal and diseased
cells, and it can potentially function as a biomarker. Although
these findings challenge the view that methylation is generally
instructive for gene silencing, several open questions remain,
including how methylation is targeted and recognized and in what
context it affects genome readout.",
journal = "Nature",
volume = 517,
number = 7534,
pages = "321--326",
month = jan,
year = 2015,
language = "en"
}
@ARTICLE{Rampal2014-lw,
title = "{DNA} hydroxymethylation profiling reveals that {WT1} mutations
result in loss of {TET2} function in acute myeloid leukemia",
author = "Rampal, Raajit and Alkalin, Altuna and Madzo, Jozef and
Vasanthakumar, Aparna and Pronier, Elodie and Patel, Jay and Li,
Yushan and Ahn, Jihae and Abdel-Wahab, Omar and Shih, Alan and