Skip to content
This repository has been archived by the owner on Jan 27, 2020. It is now read-only.

Commit

Permalink
Merge pull request #607 from szilvajuhos/master
Browse files Browse the repository at this point in the history
GATK4 first round without MuTect1 and indel realignment
  • Loading branch information
maxulysse authored Aug 14, 2018
2 parents 3bc9585 + 9d20c68 commit 924b90a
Show file tree
Hide file tree
Showing 23 changed files with 219 additions and 585 deletions.
16 changes: 10 additions & 6 deletions annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,23 @@ vcfToAnnotate = Channel.create()
vcfNotToAnnotate = Channel.create()

if (annotateVCF == []) {
// we annote all available vcfs by default that we can find in the VariantCalling directory
Channel.empty().mix(
Channel.fromPath("${directoryMap.haplotypecaller}/*.vcf.gz")
.flatten().map{vcf -> ['haplotypecaller', vcf]},
Channel.fromPath("${directoryMap.manta}/*SV.vcf.gz")
.flatten().map{vcf -> ['manta', vcf]},
Channel.fromPath("${directoryMap.mutect1}/*.vcf.gz")
.flatten().map{vcf -> ['mutect1', vcf]},
Channel.fromPath("${directoryMap.mutect2}/*.vcf.gz")
.flatten().map{vcf -> ['mutect2', vcf]},
Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")
Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz") // Strelka only
.flatten().map{vcf -> ['strelka', vcf]},
Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")
Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz") // Strelka with Manta indel candidates
.flatten().map{vcf -> ['strelkabp', vcf]}
).choice(vcfToAnnotate, vcfNotToAnnotate) {
annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
}
} else if (annotateTools == []) {
// alternatively, annotate user-submitted VCFs
list = ""
annotateVCF.each{ list += ",${it}" }
list = list.substring(1)
Expand All @@ -101,6 +101,10 @@ if (annotateVCF == []) {

vcfNotToAnnotate.close()

// as now have the list of VCFs to annotate, the first step is to annotate with allele frequencies, if there are any



(vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4)

vcfForVep = vcfForVep.map {
Expand Down Expand Up @@ -224,11 +228,12 @@ process RunVEP {
finalannotator = annotator == "snpeff" ? 'merge' : 'vep'
genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome
"""
vep \
vep --dir /opt/vep/.vep/ \
-i ${vcf} \
-o ${vcf.simpleName}_VEP.ann.vcf \
--assembly ${genome} \
--cache \
--cache_version 91 \
--database \
--everything \
--filter_common \
Expand Down Expand Up @@ -346,7 +351,6 @@ def helpMessage() {
log.info " Possible values are:"
log.info " haplotypecaller (Annotate HaplotypeCaller output)"
log.info " manta (Annotate Manta output)"
log.info " mutect1 (Annotate MuTect1 output)"
log.info " mutect2 (Annotate MuTect2 output)"
log.info " strelka (Annotate Strelka output)"
log.info " --annotateVCF"
Expand Down
10 changes: 1 addition & 9 deletions buildContainers.nf
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,6 @@ def grabRevision() {
def defineContainersList(){
// Return list of authorized containers
return [
'freebayes',
'gatk',
'gatk4',
'igvtools',
'mutect1',
'picard',
'qctools',
'r-base',
'runallelecount',
'sarek',
Expand All @@ -208,8 +201,7 @@ def helpMessage() {
log.info " --containers: Choose which containers to build"
log.info " Default: all"
log.info " Possible values:"
log.info " all, freebayes, gatk, gatk4, igvtools, mutect1, picard"
log.info " qctools, r-base, runallelecount, sarek, snpeff"
log.info " all, r-base, runallelecount, sarek, snpeff"
log.info " snpeffgrch37, snpeffgrch38, vepgrch37, vepgrch38"
log.info " --docker: Build containers using Docker"
log.info " --help"
Expand Down
25 changes: 12 additions & 13 deletions buildReferences.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ kate: syntax groovy; space-indent on; indent-width 2;
- ProcessReference - Download all references if needed
- DecompressFile - Extract files if needed
- BuildBWAindexes - Build indexes for BWA
- BuildPicardIndex - Build index with Picard
- BuildReferenceIndex - Build index for FASTA refs
- BuildSAMToolsIndex - Build index with SAMTools
- BuildVCFIndex - Build index for VCF files
================================================================================
Expand Down Expand Up @@ -98,7 +98,7 @@ if (params.verbose) ch_decompressedFiles = ch_decompressedFiles.view {

ch_fastaFile = Channel.create()
ch_fastaForBWA = Channel.create()
ch_fastaForPicard = Channel.create()
ch_fastaReference = Channel.create()
ch_fastaForSAMTools = Channel.create()
ch_otherFile = Channel.create()
ch_vcfFile = Channel.create()
Expand All @@ -108,7 +108,7 @@ ch_decompressedFiles
it =~ ".fasta" ? 0 :
it =~ ".vcf" ? 1 : 2}

(ch_fastaForBWA, ch_fastaForPicard, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4)
(ch_fastaForBWA, ch_fastaReference, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4)
(ch_vcfFile, ch_vcfFileToKeep) = ch_vcfFile.into(2)

ch_notCompressedfiles
Expand Down Expand Up @@ -137,29 +137,28 @@ if (params.verbose) bwaIndexes.flatten().view {
"BWA index : ${it.fileName}"
}

process BuildPicardIndex {
process BuildReferenceIndex {
tag {f_reference}

publishDir params.outDir, mode: 'link'

input:
file(f_reference) from ch_fastaForPicard
file(f_reference) from ch_fastaReference

output:
file("*.dict") into ch_picardIndex
file("*.dict") into ch_referenceIndex

script:
"""
java -Xmx${task.memory.toGiga()}g \
-jar \$PICARD_HOME/picard.jar \
gatk --java-options "-Xmx${task.memory.toGiga()}g" \
CreateSequenceDictionary \
REFERENCE=${f_reference} \
OUTPUT=${f_reference.baseName}.dict
--REFERENCE ${f_reference} \
--OUTPUT ${f_reference.baseName}.dict
"""
}

if (params.verbose) ch_picardIndex.view {
"Picard index : ${it.fileName}"
if (params.verbose) ch_referenceIndex.view {
"Reference index : ${it.fileName}"
}

process BuildSAMToolsIndex {
Expand Down Expand Up @@ -196,7 +195,7 @@ process BuildVCFIndex {

script:
"""
\$IGVTOOLS_HOME/igvtools index ${f_reference}
igvtools index ${f_reference}
"""
}

Expand Down
42 changes: 19 additions & 23 deletions configuration/containers.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,52 +9,48 @@

process {
$BuildBWAindexes.container = "${params.repository}/sarek:${params.tag}"
$BuildPicardIndex.container = "${params.repository}/picard:${params.tag}"
$BuildReferenceIndex.container = "${params.repository}/sarek:${params.tag}"
$BuildSAMToolsIndex.container = "${params.repository}/sarek:${params.tag}"
$BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}"
$BuildVCFIndex.container = "${params.repository}/sarek:${params.tag}"
$CompressVCF.container = "${params.repository}/sarek:${params.tag}"
$ConcatVCF.container = "${params.repository}/sarek:${params.tag}"
$CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}"
$GetVersionAll.container = "${params.repository}/qctools:${params.tag}"
$CreateRecalibrationTable.container = "${params.repository}/sarek:${params.tag}"
$GetVersionAll.container = "${params.repository}/sarek:${params.tag}"
$GetVersionAlleleCount.container = "${params.repository}/runallelecount:${params.tag}"
$GetVersionASCAT.container = "${params.repository}/r-base:${params.tag}"
$GetVersionBamQC.container = "${params.repository}/qctools:${params.tag}"
$GetVersionBamQC.container = "${params.repository}/sarek:${params.tag}"
$GetVersionBCFtools.container = "${params.repository}/sarek:${params.tag}"
$GetVersionBWAsamtools.container = "${params.repository}/sarek:${params.tag}"
$GetVersionFastQC.container = "${params.repository}/qctools:${params.tag}"
$GetVersionFreeBayes.container = "${params.repository}/freebayes:${params.tag}"
$GetVersionGATK.container = "${params.repository}/gatk:${params.tag}"
$GetVersionFastQC.container = "${params.repository}/sarek:${params.tag}"
$GetVersionFreeBayes.container = "${params.repository}/sarek:${params.tag}"
$GetVersionGATK.container = "${params.repository}/sarek:${params.tag}"
$GetVersionManta.container = "${params.repository}/sarek:${params.tag}"
$GetVersionPicard.container = "${params.repository}/picard:${params.tag}"
$GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
$GetVersionStrelka.container = "${params.repository}/sarek:${params.tag}"
$GetVersionVCFtools.container = "${params.repository}/qctools:${params.tag}"
$GetVersionVCFtools.container = "${params.repository}/sarek:${params.tag}"
$GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
$IndelRealigner.container = "${params.repository}/gatk:${params.tag}"
$MapReads.container = "${params.repository}/sarek:${params.tag}"
$MarkDuplicates.container = "${params.repository}/picard:${params.tag}"
$MarkDuplicates.container = "${params.repository}/sarek:${params.tag}"
$MergeBams.container = "${params.repository}/sarek:${params.tag}"
$RealignerTargetCreator.container = "${params.repository}/gatk:${params.tag}"
$RecalibrateBam.container = "${params.repository}/gatk:${params.tag}"
$RecalibrateBam.container = "${params.repository}/sarek:${params.tag}"
$RunAlleleCount.container = "${params.repository}/runallelecount:${params.tag}"
$RunAscat.container = "${params.repository}/r-base:${params.tag}"
$RunBamQC.container = "${params.repository}/qctools:${params.tag}"
$RunBamQC.container = "${params.repository}/sarek:${params.tag}"
$RunBcftoolsStats.container = "${params.repository}/sarek:${params.tag}"
$RunConvertAlleleCounts.container = "${params.repository}/r-base:${params.tag}"
$RunFastQC.container = "${params.repository}/qctools:${params.tag}"
$RunFreeBayes.container = "${params.repository}/freebayes:${params.tag}"
$RunGenotypeGVCFs.container = "${params.repository}/gatk:${params.tag}"
$RunHaplotypecaller.container = "${params.repository}/gatk:${params.tag}"
$RunFastQC.container = "${params.repository}/sarek:${params.tag}"
$RunFreeBayes.container = "${params.repository}/sarek:${params.tag}"
$RunGenotypeGVCFs.container = "${params.repository}/sarek:${params.tag}"
$RunHaplotypecaller.container = "${params.repository}/sarek:${params.tag}"
$RunManta.container = "${params.repository}/sarek:${params.tag}"
$RunMultiQC.container = "${params.repository}/qctools:${params.tag}"
$RunMutect1.container = "${params.repository}/mutect1:${params.tag}"
$RunMutect2.container = "${params.repository}/gatk:${params.tag}"
$RunMultiQC.container = "${params.repository}/sarek:${params.tag}"
$RunMutect2.container = "${params.repository}/sarek:${params.tag}"
$RunSamtoolsStats.container = "${params.repository}/sarek:${params.tag}"
$RunSingleManta.container = "${params.repository}/sarek:${params.tag}"
$RunSingleStrelka.container = "${params.repository}/sarek:${params.tag}"
$RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
$RunStrelka.container = "${params.repository}/sarek:${params.tag}"
$RunStrelkaBP.container = "${params.repository}/sarek:${params.tag}"
$RunVcftools.container = "${params.repository}/qctools:${params.tag}"
$RunVcftools.container = "${params.repository}/sarek:${params.tag}"
$RunVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
}
3 changes: 3 additions & 0 deletions configuration/genomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ params {
knownIndels = "${params.genome_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
knownIndelsIndex = "${params.genome_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
snpeffDb = "GRCh38.86"
// This a nasty-looking list of allele-frequencies files. Add/remove files to match to your sets
//AF_files = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf"
//AF_indexes = "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx"
}
'smallGRCh37' {
acLoci = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci"
Expand Down
44 changes: 21 additions & 23 deletions configuration/singularity-path.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,52 +14,50 @@ singularity {

process {
$BuildBWAindexes.container = "${params.containerPath}/sarek-${params.tag}.img"
$BuildPicardIndex.container = "${params.containerPath}/picard-${params.tag}.img"
$BuildReferenceIndex.container = "${params.containerPath}/sarek-${params.tag}.img"
$BuildSAMToolsIndex.container = "${params.containerPath}/sarek-${params.tag}.img"
$BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img"
$BuildVCFIndex.container = "${params.containerPath}/sarek-${params.tag}.img"
$CompressVCF.container = "${params.containerPath}/sarek-${params.tag}.img"
$ConcatVCF.container = "${params.containerPath}/sarek-${params.tag}.img"
$CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img"
$GetVersionAll.container = "${params.containerPath}/qctools-${params.tag}.img"
$CreateRecalibrationTable.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionAll.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionAlleleCount.container = "${params.containerPath}/runallelecount-${params.tag}.img"
$GetVersionASCAT.container = "${params.containerPath}/r-base-${params.tag}.img"
$GetVersionBamQC.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionBamQC.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionBCFtools.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionBWAsamtools.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionFastQC.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionFreeBayes.container = "${params.containerPath}/freebayes-${params.tag}.img"
$GetVersionGATK.container = "${params.containerPath}/gatk-${params.tag}.img"
$GetVersionFastQC.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionFreeBayes.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionGATK.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionManta.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionPicard.container = "${params.containerPath}/picard-${params.tag}.img"
$GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
$GetVersionStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionVCFtools.container = "${params.containerPath}/qctools-${params.tag}.img"
$GetVersionVCFtools.container = "${params.containerPath}/sarek-${params.tag}.img"
$GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
$IndelRealigner.container = "${params.containerPath}/gatk-${params.tag}.img"
$IndelRealigner.container = "${params.containerPath}/sarek-${params.tag}.img"
$MapReads.container = "${params.containerPath}/sarek-${params.tag}.img"
$MarkDuplicates.container = "${params.containerPath}/picard-${params.tag}.img"
$MarkDuplicates.container = "${params.containerPath}/sarek-${params.tag}.img"
$MergeBams.container = "${params.containerPath}/sarek-${params.tag}.img"
$RealignerTargetCreator.container = "${params.containerPath}/gatk-${params.tag}.img"
$RecalibrateBam.container = "${params.containerPath}/gatk-${params.tag}.img"
$RealignerTargetCreator.container = "${params.containerPath}/sarek-${params.tag}.img"
$RecalibrateBam.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunAlleleCount.container = "${params.containerPath}/runallelecount-${params.tag}.img"
$RunAscat.container = "${params.containerPath}/r-base-${params.tag}.img"
$RunBamQC.container = "${params.containerPath}/qctools-${params.tag}.img"
$RunBamQC.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunBcftoolsStats.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunConvertAlleleCounts.container = "${params.containerPath}/r-base-${params.tag}.img"
$RunFastQC.container = "${params.containerPath}/qctools-${params.tag}.img"
$RunFreeBayes.container = "${params.containerPath}/freebayes-${params.tag}.img"
$RunGenotypeGVCFs.container = "${params.containerPath}/gatk-${params.tag}.img"
$RunHaplotypecaller.container = "${params.containerPath}/gatk-${params.tag}.img"
$RunFastQC.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunFreeBayes.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunGenotypeGVCFs.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunHaplotypecaller.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunManta.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunMultiQC.container = "${params.containerPath}/qctools-${params.tag}.img"
$RunMutect1.container = "${params.containerPath}/mutect1-${params.tag}.img"
$RunMutect2.container = "${params.containerPath}/gatk-${params.tag}.img"
$RunMultiQC.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunMutect2.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunSamtoolsStats.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunSingleManta.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunSingleStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
$RunStrelka.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunStrelkaBP.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunVcftools.container = "${params.containerPath}/qctools-${params.tag}.img"
$RunVcftools.container = "${params.containerPath}/sarek-${params.tag}.img"
$RunVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
}
10 changes: 4 additions & 6 deletions configuration/uppmax-localhost.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ process {
$BuildBWAindexes {
memory = {params.totalMemory} // TODO This is likely too high
}
$BuildPicardIndex {
$BuildReferenceIndex {
memory = {params.totalMemory} // TODO This is likely too high
}
$BuildSAMToolsIndex {
Expand All @@ -70,7 +70,9 @@ process {
memory = {params.totalMemory}
}
$MarkDuplicates {
memory = {params.singleCPUMem * 2 * task.attempt}
// Actually the -Xmx value should be kept lower
cpus = 16
memory = {2 * params.singleCPUMem}
}
$MergeBams {
cpus = 16
Expand Down Expand Up @@ -117,10 +119,6 @@ process {
}
$RunMultiQC {
}
$RunMutect1 {
cpus = 1
memory = {params.singleCPUMem * task.attempt}
}
$RunMutect2 {
cpus = 1
memory = {params.singleCPUMem * task.attempt}
Expand Down
Loading

0 comments on commit 924b90a

Please sign in to comment.