Skip to content
This repository has been archived by the owner on Feb 28, 2023. It is now read-only.

Commit

Permalink
Clarified BCF filenames
Browse files Browse the repository at this point in the history
  • Loading branch information
dzerbino committed Nov 1, 2016
1 parent 106fba9 commit 1ddadbe
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
DEST_DIR=~/lustre2/CTTV24/databases_test


default: download process
download: create_dir d_GRASP d_Phewas_Catalog d_GWAS_DB d_Fantom5 d_DHS d_Regulome d_1000Genomes
process: GRASP Phewas_Catalog GWAS_DB Fantom5 DHS Regulome tabix 1000Genomes
Expand Down Expand Up @@ -73,13 +74,13 @@ define process_1000Genomes_file
gzip -dc $(1) \
| vcfkeepsamples - `cat ./scripts/preprocessing/CEPH_samples.txt`\
| bcftools convert -Ob \
> ${DEST_DIR}/1000Genomes/CEPH/`basename $(1) | sed -e 's/vcf.gz/bcf.gz/'`;
> ${DEST_DIR}/1000Genomes/CEPH/`basename $(1) | sed -e 's/vcf.gz/bcf/'`;
endef

.PHONY: 1000Genomes

1000Genomes:
$(eval vcf_files := $(wildcard ${DEST_DIR}/raw/1000Genomes/*.vcf.gz))
$(foreach file, $(vcf_files), $(call process_1000Genomes_file, $(file)))
$(eval bcf_files := $(wildcard ${DEST_DIR}/1000Genomes/*.bcf.gz))
$(eval bcf_files := $(wildcard ${DEST_DIR}/1000Genomes/*.bcf))
$(foreach file, $(bcf_files), bcftools index $(file);)
4 changes: 2 additions & 2 deletions scripts/gwas_to_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,7 @@ def calculate_LD_window(snp, window_len=500000,population='CEPH',cutoff=0.5,db=0
chromosome = snp.chrom
region = '{}:{}-{}'.format(chromosome,from_pos,to_pos)

chrom_file = "CEPH.chr%s.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.bcf.gz" % (chromosome)
chrom_file = "ALL.chr%s.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.bcf" % (chromosome)

### Extract this region out from the 1000 genomes BCF

Expand Down Expand Up @@ -1048,7 +1048,7 @@ def get_lds_from_top_gwas(gwas_snp, ld_snps, population='CEPH', region=None,db=0


### Extract the required region from the VCF
chrom_file = '%s.chr%s.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.bcf.gz' % (population, chromosome)
chrom_file = 'ALL.chr%s.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.bcf' % (chromosome)

extract_region_comm = "bcftools view -r %s %s -O z -o region.vcf.gz" % (region, os.path.join(DATABASES_DIR, '1000Genomes', population, chrom_file))
subprocess.call(extract_region_comm.split(" "))
Expand Down

0 comments on commit 1ddadbe

Please sign in to comment.