forked from Ensembl/postgap
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpostgap_html_report.py
executable file
·136 lines (99 loc) · 4.19 KB
/
postgap_html_report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#! /usr/bin/env python
"""
Copyright [1999-2019] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License")
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
"""
Please email comments or questions to the public Ensembl
developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
Questions may also be sent to the Ensembl help desk at
<http://www.ensembl.org/Help/Contact>.
"""
import sys
import argparse
from argparse import RawTextHelpFormatter
import os
import csv
from jinja2 import Template
import requests
def main():
#get options
options = get_options()
#assert that files exists
assert os.path.exists(options.result_file), "result file " + options.result_file + " can't be found."
assert os.path.exists(options.template), "HTML template file " + options.template + " can't be found."
#get top 10 genes
top_10_genes, top_10_snps = get_top_10_genes_snps(options.result_file)
#get top 10 pathways
top_10_pathways = get_top_10_pathways(top_10_genes)
#load template and render the html file out
template_file = open(options.template, 'r')
template_data = str(template_file.read())
t = Template(template_data)
output_html=open(options.output, "w+")
output_html.write(t.render(gene_list=top_10_genes, snp_list=top_10_snps, pathway_list=top_10_pathways))
output_html.close
def get_options():
parser = argparse.ArgumentParser(description="Creates a HTML report based on POSTGAP (--output2) results", formatter_class = RawTextHelpFormatter)
requiredNamed = parser.add_argument_group('required arguments')
requiredNamed.add_argument('--output', help='Name of the HTML output file', required=True)
requiredNamed.add_argument('--template', help='Name of the HTML template file', required=True)
requiredNamed.add_argument('--result_file', help='Name of the POSTGAP (--output2) results file', required=True)
options = parser.parse_args()
return options
def get_top_10_genes_snps(result_file):
top_10_genes = [[0 for x in range(4)] for y in range(10)]
top_10_snps = [[0 for x in range(4)] for y in range(10)]
source_file = open(result_file, 'r')
reader = csv.reader(source_file, delimiter='\t')
for row in reader:
gene = row[0]
cluster = row[1].replace('GWAS_Cluster_', '')
snp = row[2]
snp_gene_tissue_posterior = row[3]
tissue = row[4]
gene_cluster_tissue_posterior = row[5]
for i in range(10):
if top_10_genes[i][0] != gene or top_10_genes[i][1] != cluster or top_10_genes[i][2] != tissue:
if top_10_genes[i][3] < gene_cluster_tissue_posterior:
item = [gene, cluster, tissue, gene_cluster_tissue_posterior]
top_10_genes.insert(i, item)
del top_10_genes[-1]
break
for i in range(10):
if top_10_snps[i][0] != snp or top_10_snps[i][1] != gene or top_10_snps[i][2] != tissue:
if top_10_snps[i][3] < snp_gene_tissue_posterior:
item = [snp, gene, tissue, snp_gene_tissue_posterior]
top_10_snps.insert(i, item)
del top_10_snps[-1]
break
return top_10_genes, top_10_snps
def get_top_10_pathways(top_10_genes):
#get the list of genes
gene_list = []
for gene in top_10_genes:
if not gene[0] in gene_list:
gene_list.append(gene[0])
#call reactome
genes_to_analyse = '\n'.join([str(i) for i in gene_list])
headers = {'Content-Type': 'text/plain',}
res = requests.post('https://reactome.org/AnalysisService/identifiers/projection/?pageSize=10&page=1', headers=headers, data=genes_to_analyse)
pathways_jdata = res.json()
top_10_pathways = []
for i in range(10):
pathway_stdi = pathways_jdata['pathways'][i]['stId']
pathway_name = pathways_jdata['pathways'][i]['name']
pathway_score = 1 - float(pathways_jdata['pathways'][i]['entities']['fdr'])
item = [pathway_stdi, pathway_name, pathway_score]
top_10_pathways.insert(i, item)
return top_10_pathways
if __name__ == "__main__":
main()