-
Notifications
You must be signed in to change notification settings - Fork 0
/
methyl_bam_mtx_pipeline.py
87 lines (68 loc) · 2.43 KB
/
methyl_bam_mtx_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import argparse
from multiprocessing import Pool
import methyl_utils
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--cores", type=str)
parser.add_argument("-i", "--indir", type=str)
parser.add_argument("-s", "--sample", type=str)
parser.add_argument("-w", "--window_size", type=int)
parser.add_argument("-m", "--methylation_context", type=str)
parser.add_argument("-r", "--reference_genome_index", type=str)
parser.add_argument("-l", "--limit", default=False, action="store_true")
args = parser.parse_args()
cores = args.cores
indir = args.indir
sample = args.sample
window_size = args.window_size
methylation_context = args.methylation_context
reference_genome_index = args.reference_genome_index
limit = args.limit
sub_batch_N = 100
parts = methyl_utils.find_sub_fastq_parts(indir, sample)
######################################################
"""
args = [(indir, sample, part, methylation_context, limit) for part in parts]
pool = Pool(int(cores))
results = pool.starmap(methyl_utils.save_quad_batch_json, args)
pool.close()
pool.join()
"""
args = [(indir, sample, part, limit) for part in parts]
pool = Pool(int(cores))
results = pool.starmap(methyl_utils.save_quad_batch_from_bam, args)
pool.close()
pool.join()
######################################################
args = [
(indir, sample, str(j + 1).zfill(3), methylation_context)
for j in range(sub_batch_N)
]
pool = Pool(int(cores))
results = pool.starmap(methyl_utils.aggregate_quad_parts, args)
pool.close()
pool.join()
######################################################
chr_idx_dict = methyl_utils.fasta_index_to_windows(reference_genome_index, window_size)
print("chr_idx_dict lenght = ", len(chr_idx_dict))
args = [
(indir, sample, str(j + 1).zfill(3), window_size, chr_idx_dict, methylation_context)
for j in range(sub_batch_N)
]
pool = Pool(int(cores))
results = pool.starmap(methyl_utils.make_count_sparse_mtx_batch_windows, args)
pool.close()
pool.join()
######################################################
methyl_utils.stack_mtx(
indir, sample, window_size, chr_idx_dict, methylation_context, int(cores)
)
######################################################
parts = methyl_utils.find_sub_fastq_parts(indir, sample)
args = [(indir, sample, part, limit) for part in parts]
pool = Pool(int(cores))
results = pool.starmap(methyl_utils.tag_bam_with_barcodes, args)
pool.close()
pool.join()
######################################################
"""
"""