From 05e1d00f698ae327d624480310509e3c693a3684 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 16 Mar 2018 10:15:57 +0100 Subject: [PATCH 01/36] change tag to latest --- scripts/do_all.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/do_all.sh b/scripts/do_all.sh index 72f5d98bcb..56f42821c3 100755 --- a/scripts/do_all.sh +++ b/scripts/do_all.sh @@ -4,7 +4,7 @@ set -xeuo pipefail PROFILE=singularity PUSH='' REPOSITORY=maxulysse -TAG=1.3 +TAG=latest TOOL=docker while [[ $# -gt 0 ]] From 3cb3ddef382a7600e9d2105f42be8ceeffebdcab Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 16 Mar 2018 10:16:56 +0100 Subject: [PATCH 02/36] update CHECKLIST --- .github/RELEASE_CHECKLIST.md | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/RELEASE_CHECKLIST.md b/.github/RELEASE_CHECKLIST.md index 21378aebba..5698f49b9f 100644 --- a/.github/RELEASE_CHECKLIST.md +++ b/.github/RELEASE_CHECKLIST.md @@ -3,19 +3,18 @@ This checklist is for our own reference 1. Check that everything is up to date and ready to go 2. Increase version numbers. -3. Update version numbers in code: `main.nf`, `buildContainers.nf`, `buildReferences.nf` -4. If any changes on any containers, match the tag to current version `docker.config`, `singularity.config`, `singularity-path.config`. -5. Build, and get the containers. - - `./scripts/do_all.sh --push` - - `./scripts/do_all.sh --pull` -6. Test against sample data. +3. Update version numbers in code: `annotate.nf`, `buildContainers.nf`, `buildReferences.nf`, `germlineVC.nf`, `main.nf` and `somaticVC.nf` +4. Build, and get the containers. + - `./scripts/do_all.sh --push --tag ` + - `./scripts/do_all.sh --pull --tag ` +5. Test against sample data. - Check for any command line errors - Check version numbers are printed correctly - - `./scripts/test.sh -p docker` - - `./scripts/test.sh -p singularity` - - `./scripts/test.sh -p singularityPath` -7. Commit and push version updates -8. Make a [release](https://github.com/SciLifeLab/CAW/releases) on GitHub - list PRs as changelog. -9. Tweet that new version is released -10. Commit and push. Continue making more awesome :metal: -11. Have fika :cake: + - `./scripts/test.sh -p docker --tag ` + - `./scripts/test.sh -p singularity --tag ` + - `./scripts/test.sh -p singularityPath --tag ` +6. Commit and push version updates +7. Make a [release](https://github.com/SciLifeLab/Sarek/releases) on GitHub - list PRs as changelog. +8. Tweet that new version is released +9. Commit and push. Continue making more awesome :metal: +10. Have fika :cake: From eae2ad9c89db4026ed64207dc7dafaf0280c20bd Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 16 Mar 2018 16:01:59 +0100 Subject: [PATCH 03/36] correct --outDir params --- annotate.nf | 2 +- buildReferences.nf | 2 +- germlineVC.nf | 2 +- main.nf | 2 +- runMultiQC.nf | 2 +- somaticVC.nf | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/annotate.nf b/annotate.nf index 6f33461dcc..5a75c2a0db 100644 --- a/annotate.nf +++ b/annotate.nf @@ -71,7 +71,7 @@ params.noReports = false // Run Sarek in onlyQC mode params.onlyQC = false // outDir is current directory -params.outDir = baseDir +params.outDir = '.' // Step is annotate step = 'annotate' // Not testing diff --git a/buildReferences.nf b/buildReferences.nf index f525bb9a8a..d21a8e3302 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -68,7 +68,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project Date: Mon, 19 Mar 2018 16:43:49 +0100 Subject: [PATCH 04/36] remove UPPMAX specific dirs --- configuration/singularity-path.config | 1 - containers/fastqc/Dockerfile | 3 --- containers/freebayes/Dockerfile | 3 --- containers/gatk/Dockerfile | 3 --- containers/igvtools/Dockerfile | 3 --- containers/multiqc/Dockerfile | 3 --- containers/mutect1/Dockerfile | 3 --- containers/picard/Dockerfile | 3 --- containers/qualimap/Dockerfile | 3 --- containers/r-base/Dockerfile | 3 --- containers/runallelecount/Dockerfile | 3 --- containers/sarek/build.sh | 3 --- containers/snpeff/Dockerfile | 3 --- containers/vepgrch37/Dockerfile | 4 ---- containers/vepgrch38/Dockerfile | 4 ---- 15 files changed, 45 deletions(-) diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config index 2ec1fba63d..f31faeb501 100644 --- a/configuration/singularity-path.config +++ b/configuration/singularity-path.config @@ -12,7 +12,6 @@ vim: syntax=groovy singularity { enabled = true - runOptions = "--bind /scratch" } params { diff --git a/containers/fastqc/Dockerfile b/containers/fastqc/Dockerfile index 567fba22ef..85237cb0d1 100644 --- a/containers/fastqc/Dockerfile +++ b/containers/fastqc/Dockerfile @@ -22,6 +22,3 @@ RUN \ && chmod 755 /opt/FastQC/fastqc \ && ln -s /opt/FastQC/fastqc /usr/local/bin/fastqc \ && rm fastqc_v${FASTQC_VERSION}.zip - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/freebayes/Dockerfile b/containers/freebayes/Dockerfile index f93335064d..efacdde8ee 100644 --- a/containers/freebayes/Dockerfile +++ b/containers/freebayes/Dockerfile @@ -28,6 +28,3 @@ RUN \ && make install \ && cd .. \ && rm -rf freebayes - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/gatk/Dockerfile b/containers/gatk/Dockerfile index a13300bf9a..ae8ad2ed0c 100644 --- a/containers/gatk/Dockerfile +++ b/containers/gatk/Dockerfile @@ -6,6 +6,3 @@ LABEL \ maintainer="maxime.garcia@scilifelab.se" ENV GATK_HOME=/usr - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/igvtools/Dockerfile b/containers/igvtools/Dockerfile index fb96d256fd..1629b64cf0 100644 --- a/containers/igvtools/Dockerfile +++ b/containers/igvtools/Dockerfile @@ -23,6 +23,3 @@ RUN \ && unzip igvtools_${IGVTOOLS_VERSION}.zip \ && rm igvtools_${IGVTOOLS_VERSION}.zip \ && mv IGVTools $IGVTOOLS_HOME - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/multiqc/Dockerfile b/containers/multiqc/Dockerfile index 0411767998..2b85974906 100644 --- a/containers/multiqc/Dockerfile +++ b/containers/multiqc/Dockerfile @@ -4,6 +4,3 @@ LABEL \ author="Maxime Garcia" \ description="MultiQC image used in Sarek" \ maintainer="maxime.garcia@scilifelab.se" - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/mutect1/Dockerfile b/containers/mutect1/Dockerfile index 88f38bbf9c..7068fb963c 100644 --- a/containers/mutect1/Dockerfile +++ b/containers/mutect1/Dockerfile @@ -23,6 +23,3 @@ RUN \ && unzip muTect-${MUTECT_VERSION}-bin.zip -d ${MUTECT_HOME} \ && rm muTect-${MUTECT_VERSION}-bin.zip \ && mv ${MUTECT_HOME}/muTect-${MUTECT_VERSION}.jar ${MUTECT_HOME}/muTect.jar - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/picard/Dockerfile b/containers/picard/Dockerfile index 5deafe3d6a..8a558102d5 100644 --- a/containers/picard/Dockerfile +++ b/containers/picard/Dockerfile @@ -23,6 +23,3 @@ RUN \ && unzip picard-tools-${PICARD_VERSION}.zip \ && mv picard-tools-${PICARD_VERSION} ${PICARD_HOME} \ && rm picard-tools-${PICARD_VERSION}.zip - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/qualimap/Dockerfile b/containers/qualimap/Dockerfile index c4be0f8249..4702b84b73 100644 --- a/containers/qualimap/Dockerfile +++ b/containers/qualimap/Dockerfile @@ -23,6 +23,3 @@ RUN \ && unzip qualimap_v${QUALIMAP_VERSION}.zip -d /opt/ \ && rm qualimap_v${QUALIMAP_VERSION}.zip \ && mv /opt/qualimap_v${QUALIMAP_VERSION} /opt/qualimap - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/r-base/Dockerfile b/containers/r-base/Dockerfile index a2263230c3..2f38953fa6 100644 --- a/containers/r-base/Dockerfile +++ b/containers/r-base/Dockerfile @@ -7,6 +7,3 @@ maintainer="maxime.garcia@scilifelab.se" # Install libraries RUN echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile \ && Rscript -e "install.packages('RColorBrewer')" - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/runallelecount/Dockerfile b/containers/runallelecount/Dockerfile index 5b7bf6c1b8..ac4b26494a 100644 --- a/containers/runallelecount/Dockerfile +++ b/containers/runallelecount/Dockerfile @@ -32,6 +32,3 @@ RUN \ && cd /opt/alleleCount-${ALLELECOUNT_VERSION} \ && ./setup.sh /opt/ \ && rm /opt/v${ALLELECOUNT_VERSION}.tar.gz - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/sarek/build.sh b/containers/sarek/build.sh index 67aa36275d..fbe57b5aee 100755 --- a/containers/sarek/build.sh +++ b/containers/sarek/build.sh @@ -101,6 +101,3 @@ apt-get remove -y \ zlib1g-dev apt-get clean rm -rf /build /var/lib/apt/lists/* /opt/get-pip.py - -# Create UPPMAX directories -mkdir /pica /proj /scratch /sw diff --git a/containers/snpeff/Dockerfile b/containers/snpeff/Dockerfile index 4181e4bf69..bcd7e8f5c5 100644 --- a/containers/snpeff/Dockerfile +++ b/containers/snpeff/Dockerfile @@ -27,6 +27,3 @@ RUN \ http://downloads.sourceforge.net/project/snpeff/snpEff_v${SNPEFF_VERSION}_core.zip \ && unzip snpEff_v${SNPEFF_VERSION}_core.zip -d /opt/ \ && rm snpEff_v${SNPEFF_VERSION}_core.zip - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/vepgrch37/Dockerfile b/containers/vepgrch37/Dockerfile index af8108b89c..472b1ab96f 100644 --- a/containers/vepgrch37/Dockerfile +++ b/containers/vepgrch37/Dockerfile @@ -18,7 +18,3 @@ RUN \ ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && tar xzf homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && rm homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz - -# Create UPPMAX directories -USER root -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/vepgrch38/Dockerfile b/containers/vepgrch38/Dockerfile index 03334d11c8..a7cfc79dc0 100644 --- a/containers/vepgrch38/Dockerfile +++ b/containers/vepgrch38/Dockerfile @@ -18,7 +18,3 @@ RUN \ ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && tar xzf homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && rm homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz - -# Create UPPMAX directories -USER root -RUN mkdir /pica /proj /scratch /sw From 2e9a6e232963fdf911d20fda2205cb0d8a54bef0 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Tue, 20 Mar 2018 13:50:26 +0100 Subject: [PATCH 05/36] update Checklist --- .github/RELEASE_CHECKLIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/RELEASE_CHECKLIST.md b/.github/RELEASE_CHECKLIST.md index 5698f49b9f..772ca58d94 100644 --- a/.github/RELEASE_CHECKLIST.md +++ b/.github/RELEASE_CHECKLIST.md @@ -3,7 +3,7 @@ This checklist is for our own reference 1. Check that everything is up to date and ready to go 2. Increase version numbers. -3. Update version numbers in code: `annotate.nf`, `buildContainers.nf`, `buildReferences.nf`, `germlineVC.nf`, `main.nf` and `somaticVC.nf` +3. Update version numbers in code: `configuration/base.config` 4. Build, and get the containers. - `./scripts/do_all.sh --push --tag ` - `./scripts/do_all.sh --pull --tag ` From 4a411b74c08c7bd2c725d78670bc99c52a3c4b06 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Tue, 20 Mar 2018 13:51:33 +0100 Subject: [PATCH 06/36] move all configuration into base.config + use MyUtils lib for some shared functions --- annotate.nf | 203 ++++---------------------------------- buildContainers.nf | 121 +++-------------------- configuration/base.config | 24 ++++- germlineVC.nf | 164 ++++-------------------------- lib/MyUtils.groovy | 93 +++++++++++++++++ main.nf | 169 +++++-------------------------- runMultiQC.nf | 125 ++--------------------- somaticVC.nf | 182 +++++----------------------------- 8 files changed, 228 insertions(+), 853 deletions(-) create mode 100644 lib/MyUtils.groovy diff --git a/annotate.nf b/annotate.nf index 5a75c2a0db..ffe312fd2d 100644 --- a/annotate.nf +++ b/annotate.nf @@ -37,63 +37,33 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (params.more) exit 0, moreMessage() +if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// No tools to annotate -params.annotateTools = '' -// No vcf to annotare -params.annotateVCF = '' -// Reports are generated -params.noReports = false -// Run Sarek in onlyQC mode -params.onlyQC = false -// outDir is current directory -params.outDir = '.' -// Step is annotate -step = 'annotate' -// Not testing -params.test = '' -// No tools to be used -params.tools = '' -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : [] annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim()} : [] directoryMap = defineDirectoryMap() toolList = defineToolList() -reports = !params.noReports -onlyQC = params.onlyQC -verbose = params.verbose -if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' +if (!MyUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' /* ================================================================================ @@ -106,7 +76,7 @@ startMessage() vcfToAnnotate = Channel.create() vcfNotToAnnotate = Channel.create() -if (step == 'annotate' && annotateVCF == []) { +if (annotateVCF == []) { Channel.empty().mix( Channel.fromPath("${params.outDir}/VariantCalling/HaplotypeCaller/*.vcf.gz") .flatten().map{vcf -> ['haplotypecaller',vcf]}, @@ -118,9 +88,10 @@ if (step == 'annotate' && annotateVCF == []) { .flatten().map{vcf -> ['mutect2',vcf]}, Channel.fromPath("${params.outDir}/VariantCalling/Strelka/*{somatic,variants}*.vcf.gz") .flatten().map{vcf -> ['strelka',vcf]} - ).choice(vcfToAnnotate, vcfNotToAnnotate) { annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1 } - -} else if (step == 'annotate' && annotateTools == [] && annotateVCF != []) { + ).choice(vcfToAnnotate, vcfNotToAnnotate) { + annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1 + } +} else if (annotateTools == []) { list = "" annotateVCF.each{ list += ",${it}" } list = list.substring(1) @@ -128,8 +99,7 @@ if (step == 'annotate' && annotateVCF == []) { .map{vcf -> ['userspecified',vcf]} else vcfToAnnotate = Channel.fromPath("{$list}") .map{vcf -> ['userspecified',vcf]} - -}else exit 1, "specify only tools or files to annotate, bot both" +} else exit 1, "specify only tools or files to annotate, not both" vcfNotToAnnotate.close() @@ -146,7 +116,7 @@ process RunBcftoolsStats { output: file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport - when: reports + when: !params.noReports script: """ @@ -154,7 +124,7 @@ process RunBcftoolsStats { """ } -if (verbose) bcfReport = bcfReport.view { +if (params.verbose) bcfReport = bcfReport.view { "BCFTools stats report:\n\ File : [${it.fileName}]" } @@ -189,7 +159,7 @@ process RunSnpeff { """ } -if (verbose) snpeffReport = snpeffReport.view { +if (params.verbose) snpeffReport = snpeffReport.view { "snpEff report:\n\ File : ${it.fileName}" } @@ -226,7 +196,7 @@ process RunVEP { """ } -if (verbose) vepReport = vepReport.view { +if (params.verbose) vepReport = vepReport.view { "VEP report:\n\ Files : ${it.fileName}" } @@ -239,122 +209,7 @@ if (verbose) vepReport = vepReport.view { def sarekMessage() { // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - -def checkParameterExistence(it, list) { - // Check parameter existence - if (!list.contains(it)) { - println("Unknown parameter: ${it}") - return false - } - return true -} - -def checkParameterList(list, realList) { - // Loop through all parameters to check their existence and spelling - return list.every{ checkParameterExistence(it, realList) } -} - -def checkParamReturnFile(item) { - params."${item}" = params.genomes[params.genome]."${item}" - return file(params."${item}") -} - -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - -def checkReferenceMap(referenceMap) { - // Loop through all the references files to check their existence - referenceMap.every { - referenceFile, fileToCheck -> - checkRefExistence(referenceFile, fileToCheck) - } -} - -def checkRefExistence(referenceFile, fileToCheck) { - if (fileToCheck instanceof List) return fileToCheck.every{ checkRefExistence(referenceFile, it) } - def f = file(fileToCheck) - // this is an expanded wildcard: we can assume all files exist - if (f instanceof List && f.size() > 0) return true - else if (!f.exists()) { - log.info "Missing references: ${referenceFile} ${fileToCheck}" - return false - } - return true + log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") } def checkUppmaxProject() { @@ -370,12 +225,6 @@ def defineDirectoryMap() { ] } -def defineStepList() { - return [ - 'annotate' - ] -} - def defineToolList() { return [ 'snpeff', @@ -433,22 +282,10 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --version" + log.info " --more" log.info " displays version number" } -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test -} - def minimalInformationMessage() { // Minimal information message log.info "Command Line: " + workflow.commandLine @@ -457,10 +294,8 @@ def minimalInformationMessage() { log.info "Launch Dir : " + workflow.launchDir log.info "Work Dir : " + workflow.workDir log.info "Out Dir : " + params.outDir - if (step != 'annotate') log.info "TSV file : ${tsvFile}" log.info "Genome : " + params.genome log.info "Genome_base : " + params.genome_base - log.info "Step : " + step if (tools) log.info "Tools : " + tools.join(', ') if (annotateTools) log.info "Annotate on : " + annotateTools.join(', ') if (annotateVCF) log.info "VCF files : " +annotateVCF.join(',\n ') @@ -483,10 +318,10 @@ def startMessage() { this.minimalInformationMessage() } -def versionMessage() { +def moreMessage() { // Display version message log.info "Sarek" - log.info " version : " + version + log.info " version : " + params.version log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() } diff --git a/buildContainers.nf b/buildContainers.nf index 23d0bc17c4..590fe4ea63 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -37,45 +37,25 @@ New Germline (+ Somatic) Analysis Workflow. Started March 2016. ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (params.more) exit 0, moreMessage() +if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// containerPath is current Directory -params.containerPath = "${baseDir}" -// all containers to be build -params.containers = 'all' -// Docker will not be used -params.docker = false -// Containers will not be pushed on DockerHub -params.push = false -// DockerHub repository is maxulysse -// TODO Change to a SciLifeLab repository -params.repository = 'maxulysse' -// Singularity will not be used -params.singularity = false - // Define containers to handle (build/push or pull) containersList = defineContainersList() containers = params.containers.split(',').collect {it.trim()} @@ -84,12 +64,6 @@ containers = containers == ['all'] ? containersList : containers // push only to DockerHub, so only when using Docker push = params.docker && params.push ? true : false -// by default the tag will be the current version -tag = params.tag ? params.tag : version - -// to simplify verbose mode -verbose = params.verbose - if (!params.docker && !params.singularity) exit 1, 'No container technology choosed, specify --docker or --singularity, see --help for more information' if (!checkContainers(containers,containersList)) exit 1, 'Unknown container(s), see --help for more information' @@ -122,7 +96,7 @@ process BuildDockerContainers { """ } -if (verbose) containersBuilt = containersBuilt.view { +if (params.verbose) containersBuilt = containersBuilt.view { "Docker container: ${params.repository}/${it}:${tag} built." } @@ -145,7 +119,7 @@ process PullSingularityContainers { """ } -if (verbose) imagePulled = imagePulled.view { +if (params.verbose) imagePulled = imagePulled.view { "Singularity image: ${it.fileName} pulled." } @@ -166,7 +140,7 @@ process PushDockerContainers { """ } -if (verbose) containersPushed = containersPushed.view { +if (params.verbose) containersPushed = containersPushed.view { "Docker container: ${params.repository}/${it}:${tag} pushed." } @@ -178,7 +152,7 @@ if (verbose) containersPushed = containersPushed.view { def sarekMessage() { // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") + log.info "Sarek ~ ${params.version} - " + MyUtils.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") } def checkContainerExistence(container, list) { @@ -199,58 +173,6 @@ def checkContainers(containers, containersList) { return containerExists ? true : false } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'docker', - 'genome_base', - 'genome', - 'genomes', - 'help', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-GVCF', - 'no-reports', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) @@ -278,11 +200,6 @@ def defineContainersList(){ ] } -def grabRevision() { - // Return the same string executed from github or not - return workflow.revision ?: workflow.commitId ?: workflow.scriptId.substring(0,10) -} - def helpMessage() { // Display help message this.sarekMessage() @@ -309,23 +226,11 @@ def helpMessage() { log.info " --containerPath: Select where to download images" log.info " Default: \$PWD" log.info " --tag`: Choose the tag for the containers" - log.info " Default (version number): " + version - log.info " --version" + log.info " Default (version number): " + params.version + log.info " --more" log.info " displays version number and more informations" } -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test -} - def minimalInformationMessage() { // Minimal information message log.info "Command Line: " + workflow.commandLine @@ -349,11 +254,11 @@ def startMessage() { this.minimalInformationMessage() } -def versionMessage() { +def moreMessage() { // Display version message log.info "Sarek - Workflow For Somatic And Germline Variations" - log.info " version : " + version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() + log.info " version : " + params.version + log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + MyUtils.grabRevision() } workflow.onComplete { diff --git a/configuration/base.config b/configuration/base.config index 9ec7de20b0..de9d17deb7 100644 --- a/configuration/base.config +++ b/configuration/base.config @@ -13,14 +13,36 @@ wf_repository = 'maxulysse' wf_tag = 'latest' params { + annotateTools = '' + annotateVCF = '' + containerPath = '.' + containers = '.' + docker = false + explicitBqsrNeeded = true genome = 'GRCh38' + genome_base = '' help = false + more = false + nfRequiredVersion = '0.25.0' + noBAMQC = false + noGVCF = false + noReports = false + nucleotidesPerSecond = 1000.0 + onlyQC = false outDir = '.' project = '' + push = false + refDir = '' repository = wf_repository + sample = '' + sampleDir = '' + singularity = false + step = 'mapping' tag = wf_tag + test = '' + tools = '' verbose = false - version = false + version = '2.0.0' } process { diff --git a/germlineVC.nf b/germlineVC.nf index 04b46e4c71..aab5763357 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -43,68 +43,35 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (params.more) exit 0, moreMessage() +if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// GVCF are generated -params.noGVCF = false -// Reports are generated -params.noReports = false -// BAMQC is used -params.noBAMQC = false -// Run Sarek in onlyQC mode -params.onlyQC = false -// outDir is current directory -params.outDir = '.' -// No sample is defined -params.sample = '' -// Step is variantcalling -step = 'variantcalling' -// Not testing -params.test = '' -// No tools to be used -params.tools = '' -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] directoryMap = defineDirectoryMap() referenceMap = defineReferenceMap() -stepList = defineStepList() toolList = defineToolList() nucleotidesPerSecond = 1000.0 // used to estimate variant calling runtime gvcf = !params.noGVCF reports = !params.noReports onlyQC = params.onlyQC -verbose = params.verbose -if (!checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information' -if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' if (!checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information' if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' @@ -141,7 +108,7 @@ if (tsvPath) { startMessage() -if (verbose) bamFiles = bamFiles.view { +if (params.verbose) bamFiles = bamFiles.view { "BAMs to process:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -154,7 +121,7 @@ recalTables = recalTables.map{ it + [null] } // null recalibration table means: recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status -if (verbose) recalibratedBam = recalibratedBam.view { +if (params.verbose) recalibratedBam = recalibratedBam.view { "Recalibrated BAM for variant Calling:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -179,7 +146,7 @@ process RunSamtoolsStats { """ } -if (verbose) samtoolsStatsReport = samtoolsStatsReport.view { +if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { "SAMTools stats report:\n\ File : [${it.fileName}]" } @@ -207,7 +174,7 @@ process RunBamQC { """ } -if (verbose) bamQCreport = bamQCreport.view { +if (params.verbose) bamQCreport = bamQCreport.view { "BamQC report:\n\ Dir : [${it.fileName}]" } @@ -307,7 +274,7 @@ bedIntervals = bedIntervals .flatten().collate(2) .map{duration, intervalFile -> intervalFile} -if (verbose) bedIntervals = bedIntervals.view { +if (params.verbose) bedIntervals = bedIntervals.view { " Interv: ${it.baseName}" } @@ -423,7 +390,7 @@ hcGenotypedVCF = hcGenotypedVCF.groupTuple(by:[0,1,2,3]) // so we can have a single sorted VCF containing all the calls for a given caller vcfsToMerge = hcGenomicVCF.mix(hcGenotypedVCF) -if (verbose) vcfsToMerge = vcfsToMerge.view { +if (params.verbose) vcfsToMerge = vcfsToMerge.view { "VCFs To be merged:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : ${it[4].fileName}" @@ -491,7 +458,7 @@ process ConcatVCF { """ } -if (verbose) vcfConcatenated = vcfConcatenated.view { +if (params.verbose) vcfConcatenated = vcfConcatenated.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ File : ${it[4].fileName}" @@ -534,7 +501,7 @@ process RunSingleStrelka { """ } -if (verbose) singleStrelkaOutput = singleStrelkaOutput.view { +if (params.verbose) singleStrelkaOutput = singleStrelkaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: ${it[2]}\n\ Files : ${it[3].fileName}\n\ @@ -582,7 +549,7 @@ process RunSingleManta { """ } -if (verbose) singleMantaOutput = singleMantaOutput.view { +if (params.verbose) singleMantaOutput = singleMantaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: ${it[2]}\n\ Files : ${it[3].fileName}\n\ @@ -618,7 +585,7 @@ process RunBcftoolsStats { """ } -if (verbose) bcfReport = bcfReport.view { +if (params.verbose) bcfReport = bcfReport.view { "BCFTools stats report:\n\ File : [${it.fileName}]" } @@ -633,7 +600,7 @@ bcfReport.close() def sarekMessage() { // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") + log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") } def checkFileExtension(it, extension) { @@ -660,82 +627,6 @@ def checkParamReturnFile(item) { return file(params."${item}") } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkReferenceMap(referenceMap) { // Loop through all the references files to check their existence referenceMap.every { @@ -805,12 +696,6 @@ def defineReferenceMap() { ] } -def defineStepList() { - return [ - 'variantcalling' - ] -} - def defineToolList() { return [ 'ascat', @@ -931,22 +816,10 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --version" + log.info " --more" log.info " displays version number" } -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test -} - def minimalInformationMessage() { // Minimal information message log.info "Command Line: " + workflow.commandLine @@ -958,7 +831,6 @@ def minimalInformationMessage() { log.info "TSV file : ${tsvFile}" log.info "Genome : " + params.genome log.info "Genome_base : " + params.genome_base - log.info "Step : " + step log.info "Tools : " + tools.join(', ') log.info "Containers :" if (params.repository) log.info " Repository : ${params.repository}" @@ -1008,10 +880,10 @@ def startMessage() { this.minimalInformationMessage() } -def versionMessage() { +def moreMessage() { // Display version message log.info "Sarek" - log.info " version : " + version + log.info " version : " + params.version log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() } diff --git a/lib/MyUtils.groovy b/lib/MyUtils.groovy new file mode 100644 index 0000000000..51bf1dbb9e --- /dev/null +++ b/lib/MyUtils.groovy @@ -0,0 +1,93 @@ +class MyUtils { + static def checkParams(it) { + // Check if params is in this given list + return it in [ + 'annotate-tools', + 'annotate-VCF', + 'annotateTools', + 'annotateVCF', + 'build', + 'call-name', + 'callName', + 'contact-mail', + 'contactMail', + 'container-path', + 'containerPath', + 'containers', + 'docker', + 'download', + 'explicit-bqsr-needed', + 'explicitBqsrNeeded', + 'genome_base', + 'genome', + 'genomes', + 'help', + 'max_cpus', + 'max_memory', + 'max_time', + 'more', + 'nf-required-version', + 'nfRequiredVersion', + 'no-BAMQC', + 'no-GVCF', + 'no-reports', + 'noBAMQC', + 'noGVCF', + 'noReports', + 'nucleotides-per-second', + 'nucleotidesPerSecond', + 'only-QC', + 'onlyQC', + 'out-dir', + 'outDir', + 'params', + 'project', + 'push', + 'ref-dir', + 'refDir', + 'repository', + 'run-time', + 'runTime', + 'sample-dir', + 'sample', + 'sampleDir', + 'single-CPUMem', + 'singleCPUMem', + 'singularity', + 'step', + 'tag', + 'test', + 'tools', + 'total-memory', + 'totalMemory', + 'vcflist', + 'verbose', + 'version'] + } + + static def checkParameterList(list, realList) { + // Loop through all parameters to check their existence and spelling + return list.every{ checkParameterExistence(it, realList) } + } + + static def checkParameterExistence(it, list) { + // Check parameter existence + if (!list.contains(it)) { + println("Unknown parameter: ${it}") + return false + } + return true + } + + static def isAllowedParams(params) { + // Compare params to list of verified params + final test = true + params.each{ + if (!checkParams(it.toString().split('=')[0])) { + println "params ${it.toString().split('=')[0]} is unknown" + test = false + } + } + return test + } +} diff --git a/main.nf b/main.nf index df041fecdb..8628539bf1 100644 --- a/main.nf +++ b/main.nf @@ -44,54 +44,25 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (params.more) exit 0, moreMessage() +if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// Reports are generated -params.noReports = false -// BQSR are explicitly asked -params.explicitBqsrNeeded = true -// BAMQC is used -params.noBAMQC = false -// Run Sarek in onlyQC mode -params.onlyQC = false -// outDir is current directory -params.outDir = '.' -// No sample is defined -params.sample = '' -// No sampleDir is defined -params.sampleDir = '' -// Step is mapping -params.step = 'mapping' -// No testing -params.test = '' -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - step = params.step.toLowerCase() if (step == 'preprocessing') step = 'mapping' @@ -100,7 +71,6 @@ referenceMap = defineReferenceMap() stepList = defineStepList() reports = !params.noReports onlyQC = params.onlyQC -verbose = params.verbose if (!checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information' if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' @@ -162,13 +132,13 @@ startMessage() (fastqFiles, fastqFilesforFastQC) = fastqFiles.into(2) -if (verbose) fastqFiles = fastqFiles.view { +if (params.verbose) fastqFiles = fastqFiles.view { "FASTQs to preprocess:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\tRun : ${it[3]}\n\ Files : [${it[4].fileName}, ${it[5].fileName}]" } -if (verbose) bamFiles = bamFiles.view { +if (params.verbose) bamFiles = bamFiles.view { "BAMs to process:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -193,7 +163,7 @@ process RunFastQC { """ } -if (verbose) fastQCreport = fastQCreport.view { +if (params.verbose) fastQCreport = fastQCreport.view { "FastQC report:\n\ Files : [${it[0].fileName}, ${it[1].fileName}]" } @@ -221,7 +191,7 @@ process MapReads { """ } -if (verbose) mappedBam = mappedBam.view { +if (params.verbose) mappedBam = mappedBam.view { "Mapped BAM (single or to be merged):\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\tRun : ${it[3]}\n\ File : [${it[4].fileName}]" @@ -256,13 +226,13 @@ process MergeBams { """ } -if (verbose) singleBam = singleBam.view { +if (params.verbose) singleBam = singleBam.view { "Single BAM:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ File : [${it[3].fileName}]" } -if (verbose) mergedBam = mergedBam.view { +if (params.verbose) mergedBam = mergedBam.view { "Merged BAM:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ File : [${it[3].fileName}]" @@ -270,7 +240,7 @@ if (verbose) mergedBam = mergedBam.view { mergedBam = mergedBam.mix(singleBam) -if (verbose) mergedBam = mergedBam.view { +if (params.verbose) mergedBam = mergedBam.view { "BAM for MarkDuplicates:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ File : [${it[3].fileName}]" @@ -328,21 +298,21 @@ else if (step == 'realign') duplicatesGrouped = bamFiles.map{ // and the other to the IndelRealigner process (duplicatesInterval, duplicatesRealign) = duplicatesGrouped.into(2) -if (verbose) duplicatesInterval = duplicatesInterval.view { +if (params.verbose) duplicatesInterval = duplicatesInterval.view { "BAMs for RealignerTargetCreator:\n\ ID : ${it[0]}\n\ Files : ${it[1].fileName}\n\ Files : ${it[2].fileName}" } -if (verbose) duplicatesRealign = duplicatesRealign.view { +if (params.verbose) duplicatesRealign = duplicatesRealign.view { "BAMs to phase:\n\ ID : ${it[0]}\n\ Files : ${it[1].fileName}\n\ Files : ${it[2].fileName}" } -if (verbose) markDuplicatesReport = markDuplicatesReport.view { +if (params.verbose) markDuplicatesReport = markDuplicatesReport.view { "MarkDuplicates report:\n\ File : [${it.fileName}]" } @@ -385,7 +355,7 @@ process RealignerTargetCreator { """ } -if (verbose) intervals = intervals.view { +if (params.verbose) intervals = intervals.view { "Intervals to phase:\n\ ID : ${it[0]}\n\ File : [${it[1].fileName}]" @@ -401,7 +371,7 @@ bamsAndIntervals = duplicatesRealign intervals[1] )} -if (verbose) bamsAndIntervals = bamsAndIntervals.view { +if (params.verbose) bamsAndIntervals = bamsAndIntervals.view { "BAMs and Intervals phased for IndelRealigner:\n\ ID : ${it[0]}\n\ Files : ${it[1].fileName}\n\ @@ -450,7 +420,7 @@ realignedBam = realignedBam.map { [idPatient, status, idSample, bam, bai] } -if (verbose) realignedBam = realignedBam.view { +if (params.verbose) realignedBam = realignedBam.view { "Realigned BAM to CreateRecalibrationTable:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -508,7 +478,7 @@ recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable -> if (step == 'recalibrate') recalibrationTable = bamFiles -if (verbose) recalibrationTable = recalibrationTable.view { +if (params.verbose) recalibrationTable = recalibrationTable.view { "Base recalibrated table for RecalibrateBam:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}, ${it[5].fileName}]" @@ -565,7 +535,7 @@ recalibratedBamTSV.map { idPatient, status, idSample, bam, bai -> name: 'recalibrated.tsv', sort: true, storeDir: "${params.outDir}/${directoryMap.recalibrated}" ) -if (verbose) recalibratedBam = recalibratedBam.view { +if (params.verbose) recalibratedBam = recalibratedBam.view { "Recalibrated BAM for variant Calling:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -590,7 +560,7 @@ process RunSamtoolsStats { """ } -if (verbose) samtoolsStatsReport = samtoolsStatsReport.view { +if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { "SAMTools stats report:\n\ File : [${it.fileName}]" } @@ -618,7 +588,7 @@ process RunBamQC { """ } -if (verbose) bamQCreport = bamQCreport.view { +if (params.verbose) bamQCreport = bamQCreport.view { "BamQC report:\n\ Dir : [${it.fileName}]" } @@ -631,7 +601,7 @@ if (verbose) bamQCreport = bamQCreport.view { def sarekMessage() { // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") + log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") } def checkFileExtension(it, extension) { @@ -658,83 +628,6 @@ def checkParamReturnFile(item) { return file(params."${item}") } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'explicitBqsrNeeded', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkReferenceMap(referenceMap) { // Loop through all the references files to check their existence referenceMap.every { @@ -989,22 +882,10 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --version" + log.info " --more" log.info " displays version number" } -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test -} - def minimalInformationMessage() { // Minimal information message log.info "Command Line: " + workflow.commandLine @@ -1065,10 +946,10 @@ def startMessage() { this.minimalInformationMessage() } -def versionMessage() { +def moreMessage() { // Display version message log.info "Sarek" - log.info " version : " + version + log.info " version : " + params.version log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() } diff --git a/runMultiQC.nf b/runMultiQC.nf index db79d949f4..eec09fdef9 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -36,44 +36,28 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (params.more) exit 0, moreMessage() +if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// Reports are generated -params.noReports = false -// outDir is current directory -params.outDir = '.' -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - directoryMap = defineDirectoryMap() reports = !params.noReports -verbose = params.verbose /* ================================================================================ @@ -100,7 +84,7 @@ process GenerateMultiQCconfig { echo "custom_logo_url: http://opensource.scilifelab.se/projects/sarek" >> multiqc_config.yaml echo "custom_logo_title: 'Sarek'" >> multiqc_config.yaml echo "report_header_info:" >> multiqc_config.yaml - echo "- Sarek version: ${version}" >> multiqc_config.yaml + echo "- Sarek version: ${params.version}" >> multiqc_config.yaml echo "- Contact Name: ${params.callName}" >> multiqc_config.yaml echo "- Contact E-mail: ${params.contactMail}" >> multiqc_config.yaml echo "- Directory: ${workflow.launchDir}" >> multiqc_config.yaml @@ -115,7 +99,7 @@ process GenerateMultiQCconfig { """ } -if (verbose && reports) multiQCconfig = multiQCconfig.view { +if (params.verbose && reports) multiQCconfig = multiQCconfig.view { "MultiQC config:\n\ File : [${it.fileName}]" } @@ -147,7 +131,7 @@ process RunMultiQC { """ } -if (verbose) multiQCReport = multiQCReport.view { +if (params.verbose) multiQCReport = multiQCReport.view { "MultiQC report:\n\ File : [${it[0].fileName}]\n\ Dir : [${it[1].fileName}]" @@ -161,7 +145,7 @@ if (verbose) multiQCReport = multiQCReport.view { def sarekMessage() { // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") + log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") } def checkParameterExistence(it, list) { @@ -173,83 +157,6 @@ def checkParameterExistence(it, list) { return true } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'explicitBqsrNeeded', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) @@ -303,18 +210,6 @@ def helpMessage() { log.info " displays version number" } -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test -} - def minimalInformationMessage() { // Minimal information message log.info "Command Line: " + workflow.commandLine @@ -342,10 +237,10 @@ def startMessage() { this.minimalInformationMessage() } -def versionMessage() { +def moreMessage() { // Display version message log.info "Sarek" - log.info " version : " + version + log.info " version : " + params.version log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() } diff --git a/somaticVC.nf b/somaticVC.nf index 573112e51b..7dfcd7d13d 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -48,68 +48,35 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (params.more) exit 0, moreMessage() +if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// GVCF are generated -params.noGVCF = false -// Reports are generated -params.noReports = false -// BAMQC is used -params.noBAMQC = false -// Run Sarek in onlyQC mode -params.onlyQC = false -// outDir is current directory -params.outDir = '.' -// No sample is defined -params.sample = '' -// Step is variantcalling -step = 'variantcalling' -// Not testing -params.test = '' -// No tools to be used -params.tools = '' -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] directoryMap = defineDirectoryMap() referenceMap = defineReferenceMap() -stepList = defineStepList() toolList = defineToolList() nucleotidesPerSecond = 1000.0 // used to estimate variant calling runtime gvcf = !params.noGVCF reports = !params.noReports onlyQC = params.onlyQC -verbose = params.verbose -if (!checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information' -if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' if (!checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information' if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' @@ -146,7 +113,7 @@ if (tsvPath) { startMessage() -if (verbose) bamFiles = bamFiles.view { +if (params.verbose) bamFiles = bamFiles.view { "BAMs to process:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -159,7 +126,7 @@ recalTables = recalTables.map{ it + [null] } // null recalibration table means: recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status -if (verbose) recalibratedBam = recalibratedBam.view { +if (params.verbose) recalibratedBam = recalibratedBam.view { "Recalibrated BAM for variant Calling:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -184,7 +151,7 @@ process RunSamtoolsStats { """ } -if (verbose) samtoolsStatsReport = samtoolsStatsReport.view { +if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { "SAMTools stats report:\n\ File : [${it.fileName}]" } @@ -212,7 +179,7 @@ process RunBamQC { """ } -if (verbose) bamQCreport = bamQCreport.view { +if (params.verbose) bamQCreport = bamQCreport.view { "BamQC report:\n\ Dir : [${it.fileName}]" } @@ -274,7 +241,7 @@ process CreateIntervalBeds { t = \$5 # runtime estimate if (t == "") { # no runtime estimate in this row, assume default value - t = (\$3 - \$2) / ${nucleotidesPerSecond} + t = (\$3 - \$2) / ${params.nucleotidesPerSecond} } if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) { # start a new chunk @@ -306,7 +273,7 @@ bedIntervals = bedIntervals else { start = fields[1].toInteger() end = fields[2].toInteger() - duration += (end - start) / nucleotidesPerSecond + duration += (end - start) / params.nucleotidesPerSecond } } [duration, intervalFile] @@ -314,7 +281,7 @@ bedIntervals = bedIntervals .flatten().collate(2) .map{duration, intervalFile -> intervalFile} -if (verbose) bedIntervals = bedIntervals.view { +if (params.verbose) bedIntervals = bedIntervals.view { " Interv: ${it.baseName}" } @@ -452,7 +419,7 @@ freebayesOutput = freebayesOutput.groupTuple(by:[0,1,2,3]) // so we can have a single sorted VCF containing all the calls for a given caller vcfsToMerge = mutect1Output.mix(mutect2Output, freebayesOutput) -if (verbose) vcfsToMerge = vcfsToMerge.view { +if (params.verbose) vcfsToMerge = vcfsToMerge.view { "VCFs To be merged:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : ${it[4].fileName}" @@ -518,7 +485,7 @@ process ConcatVCF { """ } -if (verbose) vcfConcatenated = vcfConcatenated.view { +if (params.verbose) vcfConcatenated = vcfConcatenated.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ File : ${it[4].fileName}" @@ -563,7 +530,7 @@ process RunStrelka { """ } -if (verbose) strelkaOutput = strelkaOutput.view { +if (params.verbose) strelkaOutput = strelkaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : ${it[4].fileName}\n\ @@ -616,7 +583,7 @@ process RunManta { """ } -if (verbose) mantaOutput = mantaOutput.view { +if (params.verbose) mantaOutput = mantaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : ${it[4].fileName}\n\ @@ -664,7 +631,7 @@ process RunSingleManta { """ } -if (verbose) singleMantaOutput = singleMantaOutput.view { +if (params.verbose) singleMantaOutput = singleMantaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: ${it[2]}\n\ Files : ${it[3].fileName}\n\ @@ -759,7 +726,7 @@ process RunAscat { """ } -if (verbose) ascatOutput = ascatOutput.view { +if (params.verbose) ascatOutput = ascatOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : [${it[4].fileName}]" @@ -809,7 +776,7 @@ process RunBcftoolsStats { """ } -if (verbose) bcfReport = bcfReport.view { +if (params.verbose) bcfReport = bcfReport.view { "BCFTools stats report:\n\ File : [${it.fileName}]" } @@ -824,7 +791,7 @@ bcfReport.close() def sarekMessage() { // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") + log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") } def checkFileExtension(it, extension) { @@ -851,82 +818,6 @@ def checkParamReturnFile(item) { return file(params."${item}") } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkReferenceMap(referenceMap) { // Loop through all the references files to check their existence referenceMap.every { @@ -947,17 +838,17 @@ def checkRefExistence(referenceFile, fileToCheck) { return true } -def checkUppmaxProject() { - // check if UPPMAX project number is specified - return !(workflow.profile == 'slurm' && !params.project) -} - def checkExactlyOne(list) { final n = 0 list.each{n += it ? 1 : 0} return n == 1 } +def checkUppmaxProject() { + // check if UPPMAX project number is specified + return !(workflow.profile == 'slurm' && !params.project) +} + def defineDirectoryMap() { return [ 'recalibrated' : 'Preprocessing/Recalibrated', @@ -996,12 +887,6 @@ def defineReferenceMap() { ] } -def defineStepList() { - return [ - 'variantcalling' - ] -} - def defineToolList() { return [ 'ascat', @@ -1122,22 +1007,10 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --version" + log.info " --more" log.info " displays version number" } -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test -} - def minimalInformationMessage() { // Minimal information message log.info "Command Line: " + workflow.commandLine @@ -1149,7 +1022,6 @@ def minimalInformationMessage() { log.info "TSV file : ${tsvFile}" log.info "Genome : " + params.genome log.info "Genome_base : " + params.genome_base - log.info "Step : " + step log.info "Tools : " + tools.join(', ') log.info "Containers :" if (params.repository) log.info " Repository : ${params.repository}" @@ -1199,10 +1071,10 @@ def startMessage() { this.minimalInformationMessage() } -def versionMessage() { +def moreMessage() { // Display version message log.info "Sarek" - log.info " version : " + version + log.info " version : " + params.version log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() } From c208ef6fe358661bb78db8a281c3b6570487f7a6 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Tue, 20 Mar 2018 13:51:57 +0100 Subject: [PATCH 07/36] clean up code + use convention --- buildReferences.nf | 231 ++++++++++++++------------------------------- 1 file changed, 72 insertions(+), 159 deletions(-) diff --git a/buildReferences.nf b/buildReferences.nf index d21a8e3302..4ebc57f19f 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -40,46 +40,30 @@ New Germline (+ Somatic) Analysis Workflow. Started March 2016. ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (params.more) exit 0, moreMessage() +if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// No download of reference source files -params.download = false -// outDir is References/${params.genome} -params.outDir = "./References/${params.genome}" -// refDir is empty -params.refDir = '' - -verbose = params.verbose -download = params.download ? true : false - -if (!download && params.refDir == "" ) exit 1, "No --refDir specified" -if (download && params.refDir != "" ) exit 1, "No need to specify --refDir" +if (!params.download && params.refDir == "" ) exit 1, "No --refDir specified" +if (params.download && params.refDir != "" ) exit 1, "No need to specify --refDir" if (params.genome == "smallGRCh37") { - referencesFiles = + ch_referencesFiles = [ '1000G_phase1.indels.b37.small.vcf.gz', '1000G_phase3_20130502_SNP_maf0.3.small.loci', @@ -90,7 +74,7 @@ if (params.genome == "smallGRCh37") { 'small.intervals' ] } else if (params.genome == "GRCh37") { - referencesFiles = + ch_referencesFiles = [ '1000G_phase1.indels.b37.vcf.gz', '1000G_phase3_20130502_SNP_maf0.3.loci.tar.bz2', @@ -102,9 +86,9 @@ if (params.genome == "smallGRCh37") { ] } else exit 1, "Can't build this reference genome" -if (download && params.genome != "smallGRCh37") exit 1, "Not possible to download ${params.genome} references files" +if (params.download && params.genome != "smallGRCh37") exit 1, "Not possible to download ${params.genome} references files" -if (!download) referencesFiles.each{checkFile(params.refDir + "/" + it)} +if (!params.download) ch_referencesFiles.each{checkFile(params.refDir + "/" + it)} /* ================================================================================ @@ -115,177 +99,175 @@ if (!download) referencesFiles.each{checkFile(params.refDir + "/" + it)} startMessage() process ProcessReference { - tag download ? {"Download: " + reference} : {"Link: " + reference} + tag params.download ? {"Download: " + f_reference} : {"Link: " + f_reference} input: - val(reference) from referencesFiles + val(f_reference) from ch_referencesFiles output: - file(reference) into processedFiles + file(f_reference) into ch_processedFiles script: - if (download) + if (params.download) """ - wget https://github.com/szilvajuhos/smallRef/raw/master/${reference} + wget https://github.com/szilvajuhos/smallRef/raw/master/${f_reference} """ else """ - ln -s ${params.refDir}/${reference} . + ln -s ${params.refDir}/${f_reference} . """ } -if (verbose) processedFiles = processedFiles.view { +if (params.verbose) ch_processedFiles = ch_processedFiles.view { "Files preprocessed : ${it.fileName}" } -compressedfiles = Channel.create() -notCompressedfiles = Channel.create() +ch_compressedfiles = Channel.create() +ch_notCompressedfiles = Channel.create() -processedFiles - .choice(compressedfiles, notCompressedfiles) {it =~ ".(gz|tar.bz2)" ? 0 : 1} +ch_processedFiles + .choice(ch_compressedfiles, ch_notCompressedfiles) {it =~ ".(gz|tar.bz2)" ? 0 : 1} process DecompressFile { - tag {reference} + tag {f_reference} input: - file(reference) from compressedfiles + file(f_reference) from ch_compressedfiles output: - file("*.{vcf,fasta,loci}") into decompressedFiles + file("*.{vcf,fasta,loci}") into ch_decompressedFiles script: - realReference="readlink ${reference}" - if (reference =~ ".gz") + realReferenceFile="readlink ${f_reference}" + if (f_reference =~ ".gz") """ - gzip -d -c \$(${realReference}) > ${reference.baseName} + gzip -d -c \$(${realReferenceFile}) > ${f_reference.baseName} """ - else if (reference =~ ".tar.bz2") + else if (f_reference =~ ".tar.bz2") """ - tar xvjf \$(${realReference}) + tar xvjf \$(${realReferenceFile}) """ } -if (verbose) decompressedFiles = decompressedFiles.view { +if (params.verbose) ch_decompressedFiles = ch_decompressedFiles.view { "Files decomprecessed: ${it.fileName}" } -fastaFile = Channel.create() -otherFiles = Channel.create() -vcfFiles = Channel.create() +ch_fastaFile = Channel.create() +ch_otherFiles = Channel.create() +ch_vcfFiles = Channel.create() -decompressedFiles - .choice(fastaFile, vcfFiles, otherFiles) { +ch_decompressedFiles + .choice(ch_fastaFile, ch_vcfFiles, ch_otherFiles) { it =~ ".fasta" ? 0 : it =~ ".vcf" ? 1 : 2} -notCompressedfiles - .mix(otherFiles) +ch_notCompressedfiles + .mix(ch_otherFiles) .collectFile(storeDir: params.outDir) -fastaForBWA = Channel.create() -fastaForPicard = Channel.create() -fastaForSAMTools = Channel.create() +ch_fastaForBWA = Channel.create() +ch_fastaForPicard = Channel.create() +ch_fastaForSAMTools = Channel.create() -fastaFile.into(fastaForBWA,fastaForPicard,fastaForSAMTools) +ch_fastaFile.into(ch_fastaForBWA,ch_fastaForPicard,ch_fastaForSAMTools) process BuildBWAindexes { - tag {reference} + tag {f_reference} publishDir params.outDir, mode: 'copy' input: - file(reference) from fastaForBWA + file(f_reference) from ch_fastaForBWA output: - file(reference) into fastaFileToKeep + file(f_reference) into ch_fastaFileToKeep file("*.{amb,ann,bwt,pac,sa}") into bwaIndexes script: """ - bwa index ${reference} + bwa index ${f_reference} """ } -if (verbose) fastaFileToKeep.view { +if (params.verbose) ch_fastaFileToKeep.view { "Fasta File : ${it.fileName}" } -if (verbose) bwaIndexes.flatten().view { +if (params.verbose) bwaIndexes.flatten().view { "BWA index : ${it.fileName}" } process BuildPicardIndex { - tag {reference} + tag {f_reference} publishDir params.outDir, mode: 'copy' input: - file(reference) from fastaForPicard + file(f_reference) from ch_fastaForPicard output: - file("*.dict") into picardIndex + file("*.dict") into ch_picardIndex script: """ java -Xmx${task.memory.toGiga()}g \ -jar \$PICARD_HOME/picard.jar \ CreateSequenceDictionary \ - REFERENCE=${reference} \ - OUTPUT=${reference.baseName}.dict + REFERENCE=${f_reference} \ + OUTPUT=${f_reference.baseName}.dict """ } -if (verbose) picardIndex.view { +if (params.verbose) ch_picardIndex.view { "Picard index : ${it.fileName}" } process BuildSAMToolsIndex { - tag {reference} + tag {f_reference} publishDir params.outDir, mode: 'copy' input: - file(reference) from fastaForSAMTools + file(f_reference) from ch_fastaForSAMTools output: - file("*.fai") into samtoolsIndex + file("*.fai") into ch_samtoolsIndex script: """ - samtools faidx ${reference} + samtools faidx ${f_reference} """ } -if (verbose) samtoolsIndex.view { +if (params.verbose) ch_samtoolsIndex.view { "SAMTools index : ${it.fileName}" } process BuildVCFIndex { - tag {reference} + tag {f_reference} publishDir params.outDir, mode: 'copy' input: - file(reference) from vcfFiles + file(f_reference) from ch_vcfFiles output: - file(reference) into vcfIndexed - file("*.idx") into vcfIndex + set file(f_reference), file("${f_reference}.idx") into ch_vcfIndex script: """ - \$IGVTOOLS_HOME/igvtools index ${reference} + \$IGVTOOLS_HOME/igvtools index ${f_reference} """ } -if (verbose) vcfIndexed.view { - "VCF indexed : ${it.fileName}" -} -if (verbose) vcfIndex.view { - "VCF index : ${it.fileName}" +if (params.verbose) ch_vcfIndex.view { + "VCF indexed:\n\ + VCF File : ${it[0].fileName}\n\ + VCF index : ${it[1].fileName}" } /* @@ -296,7 +278,7 @@ if (verbose) vcfIndex.view { def sarekMessage() { // Display Sarek message - log.info "Sarek - Workflow To Find Somatic And Germline Variations ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") + log.info "Sarek - Workflow To Find Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") } def checkFile(it) { @@ -306,63 +288,6 @@ def checkFile(it) { return true } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'docker', - 'download', - 'genome_base', - 'genome', - 'genomes', - 'help', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-GVCF', - 'no-reports', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'ref-dir', - 'refDir', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) @@ -394,22 +319,10 @@ def helpMessage() { log.info " smallGRCh37" log.info " --help" log.info " you're reading it" - log.info " --version" + log.info " --more" log.info " displays version number" } -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test -} - def minimalInformationMessage() { // Minimal information message log.info "Command Line: " + workflow.commandLine @@ -418,8 +331,8 @@ def minimalInformationMessage() { log.info "Work Dir : " + workflow.workDir log.info "Out Dir : " + params.outDir log.info "Genome : " + params.genome - log.info "Containers :" - if (params.repository) log.info " Repository : ${params.repository}" + log.info "Containers" + if (params.repository) log.info " Repository :" + params.repository else log.info " ContainerPath: " + params.containerPath log.info " Tag : " + params.tag } @@ -435,10 +348,10 @@ def startMessage() { this.minimalInformationMessage() } -def versionMessage() { +def moreMessage() { // Display version message log.info "Sarek - Workflow For Somatic And Germline Variations" - log.info " version : " + version + log.info " version : " + params.version log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() } From 22d6c393f7bb52534968f4ab80463b1705d52812 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Tue, 20 Mar 2018 14:08:49 +0100 Subject: [PATCH 08/36] fix scripts --- buildContainers.nf | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/buildContainers.nf b/buildContainers.nf index 590fe4ea63..88dc50e4f8 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -80,7 +80,7 @@ dockerContainers = containers singularityContainers = containers process BuildDockerContainers { - tag {"${params.repository}/${container}:${tag}"} + tag {"${params.repository}/${container}:${params.tag}"} input: val container from dockerContainers @@ -92,16 +92,16 @@ process BuildDockerContainers { script: """ - docker build -t ${params.repository}/${container}:${tag} ${baseDir}/containers/${container}/. + docker build -t ${params.repository}/${container}:${params.tag} ${baseDir}/containers/${container}/. """ } if (params.verbose) containersBuilt = containersBuilt.view { - "Docker container: ${params.repository}/${it}:${tag} built." + "Docker container: ${params.repository}/${it}:${params.tag} built." } process PullSingularityContainers { - tag {"${params.repository}/${container}:${tag}"} + tag {"${params.repository}/${container}:${params.tag}"} publishDir "${params.containerPath}", mode: 'move' @@ -109,13 +109,13 @@ process PullSingularityContainers { val container from singularityContainers output: - file("${container}-${tag}.img") into imagePulled + file("${container}-${params.tag}.img") into imagePulled when: params.singularity script: """ - singularity pull --name ${container}-${tag}.img docker://${params.repository}/${container}:${tag} + singularity pull --name ${container}-${params.tag}.img docker://${params.repository}/${container}:${params.tag} """ } @@ -136,12 +136,12 @@ process PushDockerContainers { script: """ - docker push ${params.repository}/${container}:${tag} + docker push ${params.repository}/${container}:${params.tag} """ } if (params.verbose) containersPushed = containersPushed.view { - "Docker container: ${params.repository}/${it}:${tag} pushed." + "Docker container: ${params.repository}/${it}:${params.tag} pushed." } /* @@ -152,7 +152,7 @@ if (params.verbose) containersPushed = containersPushed.view { def sarekMessage() { // Display Sarek message - log.info "Sarek ~ ${params.version} - " + MyUtils.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") + log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") } def checkContainerExistence(container, list) { @@ -178,6 +178,11 @@ def checkUppmaxProject() { return !(workflow.profile == 'slurm' && !params.project) } +def grabRevision() { + // Return the same string executed from github or not + return workflow.revision ?: workflow.commitId ?: workflow.scriptId.substring(0,10) +} + def defineContainersList(){ // Return list of authorized containers return [ @@ -258,7 +263,7 @@ def moreMessage() { // Display version message log.info "Sarek - Workflow For Somatic And Germline Variations" log.info " version : " + params.version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + MyUtils.grabRevision() + log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() } workflow.onComplete { From 0dae5de8b4535410640516c98899e2ac596b4d72 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Tue, 20 Mar 2018 14:09:02 +0100 Subject: [PATCH 09/36] update tools --- containers/multiqc/Dockerfile | 2 +- containers/vepgrch37/Dockerfile | 4 ++-- containers/vepgrch38/Dockerfile | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/containers/multiqc/Dockerfile b/containers/multiqc/Dockerfile index 2b85974906..1e7325cf42 100644 --- a/containers/multiqc/Dockerfile +++ b/containers/multiqc/Dockerfile @@ -1,4 +1,4 @@ -FROM ewels/multiqc:v1.4 +FROM ewels/multiqc:v1.5 LABEL \ author="Maxime Garcia" \ diff --git a/containers/vepgrch37/Dockerfile b/containers/vepgrch37/Dockerfile index 472b1ab96f..42e4cdacb9 100644 --- a/containers/vepgrch37/Dockerfile +++ b/containers/vepgrch37/Dockerfile @@ -1,4 +1,4 @@ -FROM willmclaren/ensembl-vep:release_90.6 +FROM willmclaren/ensembl-vep:release_92 LABEL \ author="Maxime Garcia" \ @@ -8,7 +8,7 @@ LABEL \ # Setup ENV variables ENV \ GENOME=GRCh37 \ - VEP_VERSION=90 + VEP_VERSION=91 # Download Genome RUN \ diff --git a/containers/vepgrch38/Dockerfile b/containers/vepgrch38/Dockerfile index a7cfc79dc0..04185c8f13 100644 --- a/containers/vepgrch38/Dockerfile +++ b/containers/vepgrch38/Dockerfile @@ -1,4 +1,4 @@ -FROM willmclaren/ensembl-vep:release_90.6 +FROM willmclaren/ensembl-vep:release_92 LABEL \ author="Maxime Garcia" \ @@ -8,7 +8,7 @@ LABEL \ # Setup ENV variables ENV \ GENOME=GRCh38 \ - VEP_VERSION=90 + VEP_VERSION=91 # Download Genome RUN \ From 492e0120bdd4daf10af9455e9e8ba50e453ec9ec Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Tue, 20 Mar 2018 14:58:17 +0100 Subject: [PATCH 10/36] typo --- buildContainers.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildContainers.nf b/buildContainers.nf index 88dc50e4f8..b99b62a0b6 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -124,7 +124,7 @@ if (params.verbose) imagePulled = imagePulled.view { } process PushDockerContainers { - tag {params.repository + "/" + container + ":" + tag} + tag {params.repository + "/" + container + ":" + params.tag} input: val container from containersBuilt From c488095314a044f97fd4e2dcc7af2b06a5cf327e Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Tue, 20 Mar 2018 14:58:40 +0100 Subject: [PATCH 11/36] switch to root user --- containers/vepgrch37/Dockerfile | 2 ++ containers/vepgrch38/Dockerfile | 2 ++ 2 files changed, 4 insertions(+) diff --git a/containers/vepgrch37/Dockerfile b/containers/vepgrch37/Dockerfile index 42e4cdacb9..c13e108aa2 100644 --- a/containers/vepgrch37/Dockerfile +++ b/containers/vepgrch37/Dockerfile @@ -18,3 +18,5 @@ RUN \ ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && tar xzf homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && rm homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz + +USER root diff --git a/containers/vepgrch38/Dockerfile b/containers/vepgrch38/Dockerfile index 04185c8f13..6a1ec794cc 100644 --- a/containers/vepgrch38/Dockerfile +++ b/containers/vepgrch38/Dockerfile @@ -18,3 +18,5 @@ RUN \ ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && tar xzf homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && rm homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz + +USER root From deb63719408baadc4cf2f3d4757d67ad09004a68 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Tue, 20 Mar 2018 15:05:46 +0100 Subject: [PATCH 12/36] fix vep user bug in vep container without changing to user root --- configuration/docker.config | 1 + containers/vepgrch37/Dockerfile | 2 -- containers/vepgrch38/Dockerfile | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/configuration/docker.config b/configuration/docker.config index fc1de91f7e..98a8b79578 100644 --- a/configuration/docker.config +++ b/configuration/docker.config @@ -12,4 +12,5 @@ vim: syntax=groovy docker { enabled = true fixOwnership = true + runOptions = "-u \$(id -u):\$(id -g)" } diff --git a/containers/vepgrch37/Dockerfile b/containers/vepgrch37/Dockerfile index c13e108aa2..42e4cdacb9 100644 --- a/containers/vepgrch37/Dockerfile +++ b/containers/vepgrch37/Dockerfile @@ -18,5 +18,3 @@ RUN \ ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && tar xzf homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && rm homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz - -USER root diff --git a/containers/vepgrch38/Dockerfile b/containers/vepgrch38/Dockerfile index 6a1ec794cc..04185c8f13 100644 --- a/containers/vepgrch38/Dockerfile +++ b/containers/vepgrch38/Dockerfile @@ -18,5 +18,3 @@ RUN \ ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && tar xzf homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && rm homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz - -USER root From 57b924267ea4711b59c0b94a1d5a9dc5b616e162 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Wed, 21 Mar 2018 09:52:35 +0100 Subject: [PATCH 13/36] update comments --- configuration/base.config | 63 ++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/configuration/base.config b/configuration/base.config index de9d17deb7..739823829d 100644 --- a/configuration/base.config +++ b/configuration/base.config @@ -10,39 +10,40 @@ vim: syntax=groovy includeConfig 'genomes.config' wf_repository = 'maxulysse' -wf_tag = 'latest' params { - annotateTools = '' - annotateVCF = '' - containerPath = '.' - containers = '.' - docker = false - explicitBqsrNeeded = true - genome = 'GRCh38' - genome_base = '' - help = false - more = false - nfRequiredVersion = '0.25.0' - noBAMQC = false - noGVCF = false - noReports = false - nucleotidesPerSecond = 1000.0 - onlyQC = false - outDir = '.' - project = '' - push = false - refDir = '' - repository = wf_repository - sample = '' - sampleDir = '' - singularity = false - step = 'mapping' - tag = wf_tag - test = '' - tools = '' - verbose = false - version = '2.0.0' + // set up default params + annotateTools = '' // Tools to annotate by annotate.nf + annotateVCF = '' // Files to annotate by annotate.nf + containerPath = '.' // Path to containers + containers = '.' // List of containers to build in buildContainers.nf + docker = false // Don't use docker to build buildContainers.nf + download = false // Don't download reference in buildReferences.nf + explicitBqsrNeeded = true // Enable recalibration in main.nf + genome = 'GRCh38' // Default reference genome is GRCh38 + genome_base = '' // location of the reference files + help = false // help flag + more = false // more flag -> will give version information + nfRequiredVersion = '0.25.0' // Minimum version of nextflow required + noBAMQC = false // Use BAMQC + noGVCF = false // HaplotypeCaller will output gVCF as well + noReports = false // Reports are made by default + nucleotidesPerSecond = 1000.0 // To estimate interval size by default + onlyQC = false // All process will be run and not only the QC tools + outDir = '.' // Output directory + project = '' // UPPMAX project number + push = false // Don't push container to DockerHub + refDir = '' // Directory where are stored the references to build + repository = wf_repository // overwritten in singularity-path.config + sample = '' // sample files in tsv format + sampleDir = '' // samples directory + singularity = false // Don't use singularity to build buildContainers.nf + step = 'mapping' // Default step is mapping + tag = 'latest' // Default tag is latest, to be overwritten by --tag + test = false // Not testing by default + tools = '' // List of tools to use + verbose = false // Enable for more verbose information + version = '2.0.0' // Workflow version } process { From b41eb1380483c82e0fd7c8919dc0b8cfac602853 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Wed, 21 Mar 2018 13:59:09 +0100 Subject: [PATCH 14/36] ennhance comments --- configuration/base.config | 23 +++++++++++++---------- configuration/singularity-path.config | 4 ---- nextflow.config | 26 +++++++++++++++++--------- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/configuration/base.config b/configuration/base.config index 739823829d..3f2ff56131 100644 --- a/configuration/base.config +++ b/configuration/base.config @@ -15,28 +15,31 @@ params { // set up default params annotateTools = '' // Tools to annotate by annotate.nf annotateVCF = '' // Files to annotate by annotate.nf - containerPath = '.' // Path to containers + containerPath = '.' // Path to Singularity images containers = '.' // List of containers to build in buildContainers.nf - docker = false // Don't use docker to build buildContainers.nf - download = false // Don't download reference in buildReferences.nf + docker = false // Don't use Docker to build buildContainers.nf + download = false // Don't download reference files in buildReferences.nf explicitBqsrNeeded = true // Enable recalibration in main.nf genome = 'GRCh38' // Default reference genome is GRCh38 - genome_base = '' // location of the reference files - help = false // help flag - more = false // more flag -> will give version information + genome_base = '' // Path to the reference files + help = false // Don't give help information + max_cpus = 16 // Base specifications + max_memory = 128.GB // Base specifications + max_time = 240.h // Base specifications + more = false // Don't give version information nfRequiredVersion = '0.25.0' // Minimum version of nextflow required noBAMQC = false // Use BAMQC noGVCF = false // HaplotypeCaller will output gVCF as well noReports = false // Reports are made by default nucleotidesPerSecond = 1000.0 // To estimate interval size by default onlyQC = false // All process will be run and not only the QC tools - outDir = '.' // Output directory + outDir = '.' // Path to output directory project = '' // UPPMAX project number push = false // Don't push container to DockerHub - refDir = '' // Directory where are stored the references to build - repository = wf_repository // overwritten in singularity-path.config + refDir = '' // Path to the references to build + repository = wf_repository // DockerHub containers repository sample = '' // sample files in tsv format - sampleDir = '' // samples directory + sampleDir = '' // samples directory (for Germline only) singularity = false // Don't use singularity to build buildContainers.nf step = 'mapping' // Default step is mapping tag = 'latest' // Default tag is latest, to be overwritten by --tag diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config index f31faeb501..289da406d0 100644 --- a/configuration/singularity-path.config +++ b/configuration/singularity-path.config @@ -14,10 +14,6 @@ singularity { enabled = true } -params { - containerPath='containers' -} - process { $BuildBWAindexes.container = "${params.containerPath}/sarek-${params.tag}.img" $BuildPicardIndex.container = "${params.containerPath}/picard-${params.tag}.img" diff --git a/nextflow.config b/nextflow.config index 81f34b3caf..f4b927c441 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,57 +20,65 @@ env { } profiles { + // Default profile for UPPMAX secure clusters // Runs the pipeline locally on a single 16-core node // Singularity images need to be set up - standard { // Default profile for UPPMAX secure clusters + standard { includeConfig 'configuration/base.config' includeConfig 'configuration/uppmax-localhost.config' includeConfig 'configuration/singularity-path.config' } + // slurm profile for UPPMAX secure clusters // Runs the pipeline using the job scheduler // Singularity images need to be set up - slurm { // slurm profile for UPPMAX secure clusters + slurm { includeConfig 'configuration/base.config' includeConfig 'configuration/uppmax-slurm.config' includeConfig 'configuration/singularity-path.config' } + // profile for UPPMAX clusters // Runs the pipeline locally on a single 16-core node // Singularity images will be pulled automatically - download { // profile for UPPMAX clusters + download { includeConfig 'configuration/base.config' includeConfig 'configuration/uppmax-localhost.config' includeConfig 'configuration/singularity.config' } + // slurm profile for UPPMAX clusters // Runs the pipeline using the job scheduler // Singularity images will be pulled automatically - slurmDownload { // slurm profile for UPPMAX clusters + slurmDownload { includeConfig 'configuration/base.config' includeConfig 'configuration/uppmax-slurm.config' includeConfig 'configuration/singularity.config' } + // Small testing with Docker profile // Docker images will be pulled automatically - docker { // For small testing testing with Docker + docker { includeConfig 'configuration/base.config' includeConfig 'configuration/travis.config' includeConfig 'configuration/docker.config' includeConfig 'configuration/containers.config' } + // AWS Batch with Docker profile // Docker images will be pulled automatically - awsbatch { // For running on AWS Batch with Docker + awsbatch { includeConfig 'configuration/base.config' includeConfig 'configuration/aws-batch.config' includeConfig 'configuration/docker.config' includeConfig 'configuration/containers.config' - } + } + // Small testing with Singularity profile // Singularity images will be pulled automatically - singularity { // For small testing + singularity { includeConfig 'configuration/base.config' includeConfig 'configuration/travis.config' includeConfig 'configuration/singularity.config' includeConfig 'configuration/containers.config' } + // Small testing with Singularity profile // Singularity images need to be set up - singularityPath { // For small testing + singularityPath { includeConfig 'configuration/base.config' includeConfig 'configuration/travis.config' includeConfig 'configuration/singularity-path.config' From db6dbc800b8c3948e90bb1e3507baebfb8b67882 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Wed, 21 Mar 2018 13:59:24 +0100 Subject: [PATCH 15/36] simplify configuration --- configuration/travis.config | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/configuration/travis.config b/configuration/travis.config index ef7666037f..dded8464e1 100644 --- a/configuration/travis.config +++ b/configuration/travis.config @@ -11,10 +11,13 @@ vim: syntax=groovy params { genome = 'smallGRCh37' - genome_base = params.genome == 'GRCh37' ? '/sw/data/uppnex/ToolBox/ReferenceAssemblies/hg38make/bundle/2.8/b37' : params.genome == 'GRCh38' ? '/sw/data/uppnex/ToolBox/hg38bundle' : 'References/smallGRCh37' + genome_base = 'References/smallGRCh37' + max_cpus = 2 + max_memory = 7.GB + max_time = 1.h } process { - cpus = 2 - memory = 7.GB + cpus = params.max_cpus + memory = params.max_memory } From 2ec96fd79dc8810e5fb8d25f6ab279978fcddc12 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 10:03:40 +0100 Subject: [PATCH 16/36] use /home/max/workspace/github/Sarek instead of . to fix #530 --- configuration/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configuration/base.config b/configuration/base.config index 3f2ff56131..af94a4545d 100644 --- a/configuration/base.config +++ b/configuration/base.config @@ -33,7 +33,7 @@ params { noReports = false // Reports are made by default nucleotidesPerSecond = 1000.0 // To estimate interval size by default onlyQC = false // All process will be run and not only the QC tools - outDir = '.' // Path to output directory + outDir = "${PWD}" // Path to output directory project = '' // UPPMAX project number push = false // Don't push container to DockerHub refDir = '' // Path to the references to build From a5a176f2c9e5eab778daa7887f9c871fbefc08fb Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 10:51:15 +0100 Subject: [PATCH 17/36] use hardlink instead of copy for PublishDir --- annotate.nf | 6 +++--- buildReferences.nf | 20 +++++++++----------- germlineVC.nf | 12 ++++++------ main.nf | 12 ++++++------ runMultiQC.nf | 4 ++-- somaticVC.nf | 18 +++++++++--------- 6 files changed, 35 insertions(+), 37 deletions(-) diff --git a/annotate.nf b/annotate.nf index ffe312fd2d..a2ac8fec44 100644 --- a/annotate.nf +++ b/annotate.nf @@ -108,7 +108,7 @@ vcfNotToAnnotate.close() process RunBcftoolsStats { tag {vcf} - publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'link' input: set variantCaller, file(vcf) from vcfForBCFtools @@ -132,7 +132,7 @@ if (params.verbose) bcfReport = bcfReport.view { process RunSnpeff { tag {vcf} - publishDir "${params.outDir}/${directoryMap.snpeff}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.snpeff}", mode: 'link' input: set variantCaller, file(vcf) from vcfForSnpeff @@ -167,7 +167,7 @@ if (params.verbose) snpeffReport = snpeffReport.view { process RunVEP { tag {vcf} - publishDir "${params.outDir}/${directoryMap.vep}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.vep}", mode: 'link' input: set variantCaller, file(vcf) from vcfForVep diff --git a/buildReferences.nf b/buildReferences.nf index 4ebc57f19f..f64f35acf6 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -165,8 +165,11 @@ ch_decompressedFiles it =~ ".fasta" ? 0 : it =~ ".vcf" ? 1 : 2} +(ch_fastaFile, ch_fastaFileToKeep) = ch_fastaFile.into(2) +(ch_vcfFiles, ch_vcfFilesToKeep) = ch_vcfFiles.into(2) + ch_notCompressedfiles - .mix(ch_otherFiles) + .mix(ch_otherFiles, ch_fastaFileToKeep, ch_vcfFilesToKeep) .collectFile(storeDir: params.outDir) ch_fastaForBWA = Channel.create() @@ -178,13 +181,12 @@ ch_fastaFile.into(ch_fastaForBWA,ch_fastaForPicard,ch_fastaForSAMTools) process BuildBWAindexes { tag {f_reference} - publishDir params.outDir, mode: 'copy' + publishDir params.outDir, mode: 'link' input: file(f_reference) from ch_fastaForBWA output: - file(f_reference) into ch_fastaFileToKeep file("*.{amb,ann,bwt,pac,sa}") into bwaIndexes script: @@ -194,9 +196,6 @@ process BuildBWAindexes { """ } -if (params.verbose) ch_fastaFileToKeep.view { - "Fasta File : ${it.fileName}" -} if (params.verbose) bwaIndexes.flatten().view { "BWA index : ${it.fileName}" } @@ -204,7 +203,7 @@ if (params.verbose) bwaIndexes.flatten().view { process BuildPicardIndex { tag {f_reference} - publishDir params.outDir, mode: 'copy' + publishDir params.outDir, mode: 'link' input: file(f_reference) from ch_fastaForPicard @@ -229,7 +228,7 @@ if (params.verbose) ch_picardIndex.view { process BuildSAMToolsIndex { tag {f_reference} - publishDir params.outDir, mode: 'copy' + publishDir params.outDir, mode: 'link' input: file(f_reference) from ch_fastaForSAMTools @@ -250,13 +249,13 @@ if (params.verbose) ch_samtoolsIndex.view { process BuildVCFIndex { tag {f_reference} - publishDir params.outDir, mode: 'copy' + publishDir params.outDir, mode: 'link' input: file(f_reference) from ch_vcfFiles output: - set file(f_reference), file("${f_reference}.idx") into ch_vcfIndex + set file("${f_reference}.idx") into ch_vcfIndex script: """ @@ -266,7 +265,6 @@ process BuildVCFIndex { if (params.verbose) ch_vcfIndex.view { "VCF indexed:\n\ - VCF File : ${it[0].fileName}\n\ VCF index : ${it[1].fileName}" } diff --git a/germlineVC.nf b/germlineVC.nf index aab5763357..f50148aece 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -130,7 +130,7 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -154,7 +154,7 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC @@ -399,7 +399,7 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view { process ConcatVCF { tag {variantCaller + "-" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap."$variantCaller"}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap."$variantCaller"}", mode: 'link' input: set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge @@ -467,7 +467,7 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view { process RunSingleStrelka { tag {idSample} - publishDir "${params.outDir}/${directoryMap.strelka}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.strelka}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleStrelka @@ -511,7 +511,7 @@ if (params.verbose) singleStrelkaOutput = singleStrelkaOutput.view { process RunSingleManta { tag {idSample + " - Single Diploid"} - publishDir "${params.outDir}/${directoryMap.manta}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.manta}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta @@ -569,7 +569,7 @@ vcfForBCFtools = Channel.empty().mix( process RunBcftoolsStats { tag {vcf} - publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'link' input: set variantCaller, file(vcf) from vcfForBCFtools diff --git a/main.nf b/main.nf index 8628539bf1..87d88de699 100644 --- a/main.nf +++ b/main.nf @@ -147,7 +147,7 @@ if (params.verbose) bamFiles = bamFiles.view { process RunFastQC { tag {idPatient + "-" + idRun} - publishDir "${params.outDir}/${directoryMap.fastQC}/${idRun}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.fastQC}/${idRun}", mode: 'link' input: set idPatient, status, idSample, idRun, file(fastqFile1), file(fastqFile2) from fastqFilesforFastQC @@ -249,7 +249,7 @@ if (params.verbose) mergedBam = mergedBam.view { process MarkDuplicates { tag {idPatient + "-" + idSample} - publishDir params.outDir, saveAs: { it == "${bam}.metrics" ? "${directoryMap.markDuplicatesQC}/${it}" : "${directoryMap.nonRealigned}/${it}" }, mode: 'copy' + publishDir params.outDir, saveAs: { it == "${bam}.metrics" ? "${directoryMap.markDuplicatesQC}/${it}" : "${directoryMap.nonRealigned}/${it}" }, mode: 'link' input: set idPatient, status, idSample, file(bam) from mergedBam @@ -429,7 +429,7 @@ if (params.verbose) realignedBam = realignedBam.view { process CreateRecalibrationTable { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.nonRecalibrated}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.nonRecalibrated}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from realignedBam @@ -496,7 +496,7 @@ recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status process RecalibrateBam { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.recalibrated}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.recalibrated}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable @@ -544,7 +544,7 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -568,7 +568,7 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC diff --git a/runMultiQC.nf b/runMultiQC.nf index eec09fdef9..ccf49e204a 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -68,7 +68,7 @@ reports = !params.noReports startMessage() process GenerateMultiQCconfig { - publishDir "${params.outDir}/${directoryMap.multiQC}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.multiQC}", mode: 'link' input: @@ -115,7 +115,7 @@ reportsForMultiQC = Channel.empty() ).collect() process RunMultiQC { - publishDir "${params.outDir}/${directoryMap.multiQC}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.multiQC}", mode: 'link' input: file ('*') from reportsForMultiQC diff --git a/somaticVC.nf b/somaticVC.nf index 7dfcd7d13d..d3969ca644 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -135,7 +135,7 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -159,7 +159,7 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC @@ -428,7 +428,7 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view { process ConcatVCF { tag {variantCaller + "_" + idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap."$variantCaller"}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap."$variantCaller"}", mode: 'link' input: set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge @@ -494,7 +494,7 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view { process RunStrelka { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.strelka}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.strelka}", mode: 'link' input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForStrelka @@ -540,7 +540,7 @@ if (params.verbose) strelkaOutput = strelkaOutput.view { process RunManta { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.manta}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.manta}", mode: 'link' input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForManta @@ -593,7 +593,7 @@ if (params.verbose) mantaOutput = mantaOutput.view { process RunSingleManta { tag {idSample + " - Tumor-Only"} - publishDir "${params.outDir}/${directoryMap.manta}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.manta}", mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta @@ -686,7 +686,7 @@ alleleCountOutput = alleleCountOutput.map { process RunConvertAlleleCounts { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.ascat}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.ascat}", mode: 'link' input: set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCountOutput @@ -708,7 +708,7 @@ process RunConvertAlleleCounts { process RunAscat { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.ascat}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.ascat}", mode: 'link' input: set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOutput @@ -760,7 +760,7 @@ vcfForBCFtools = Channel.empty().mix( process RunBcftoolsStats { tag {vcf} - publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'copy' + publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'link' input: set variantCaller, file(vcf) from vcfForBCFtools From b9ab0bfb76850e68d2662b4dbc317673e97b199e Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 10:51:24 +0100 Subject: [PATCH 18/36] update Checklist --- .github/RELEASE_CHECKLIST.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/RELEASE_CHECKLIST.md b/.github/RELEASE_CHECKLIST.md index 772ca58d94..97e0e363f8 100644 --- a/.github/RELEASE_CHECKLIST.md +++ b/.github/RELEASE_CHECKLIST.md @@ -2,6 +2,8 @@ This checklist is for our own reference 1. Check that everything is up to date and ready to go + - Travis test is passing + - Manual testing on Bianca is passing 2. Increase version numbers. 3. Update version numbers in code: `configuration/base.config` 4. Build, and get the containers. From 0c8b5ed808fccae0f24143ec2a2936350567c132 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 12:07:33 +0100 Subject: [PATCH 19/36] order tests by time --- .travis.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 980702f158..08ad72172b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,19 +8,19 @@ services: - docker env: - - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=DIR TOOL_INSTALL=all - - NXF_VER=0.27.0 PROFILE=docker TEST=DIR TOOL_INSTALL=nextflow - - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=STEP TOOL_INSTALL=all - - NXF_VER=0.27.0 PROFILE=docker TEST=STEP TOOL_INSTALL=nextflow - - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=ANNOTATESNPEFF TOOL_INSTALL=all - - NXF_VER=0.27.0 PROFILE=docker TEST=ANNOTATESNPEFF TOOL_INSTALL=nextflow - - NXF_VER=0.27.0 PROFILE=docker TEST=ANNOTATEVEP TOOL_INSTALL=nextflow - - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=GERMLINE TOOL_INSTALL=all - - NXF_VER=0.27.0 PROFILE=docker TEST=GERMLINE TOOL_INSTALL=nextflow - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=TOOLS TOOL_INSTALL=all - NXF_VER=0.27.0 PROFILE=docker TEST=TOOLS TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=STEP TOOL_INSTALL=all - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=MANTA TOOL_INSTALL=all + - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=ANNOTATESNPEFF TOOL_INSTALL=all + - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=GERMLINE TOOL_INSTALL=all + - NXF_VER=0.27.0 PROFILE=docker TEST=ANNOTATEVEP TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 PROFILE=docker TEST=ANNOTATESNPEFF TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=DIR TOOL_INSTALL=all - NXF_VER=0.27.0 PROFILE=docker TEST=MANTA TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 PROFILE=docker TEST=STEP TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 PROFILE=docker TEST=GERMLINE TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 PROFILE=docker TEST=DIR TOOL_INSTALL=nextflow install: # Install Nextflow (and Singularity if needed) - "./scripts/install.sh --tool $TOOL_INSTALL" From 174babe014ee2af4aaa6c1ee2e2f562de24f5563 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 12:46:25 +0100 Subject: [PATCH 20/36] remove set for single file output of process --- buildReferences.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/buildReferences.nf b/buildReferences.nf index f64f35acf6..7b2e88ae86 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -255,7 +255,7 @@ process BuildVCFIndex { file(f_reference) from ch_vcfFiles output: - set file("${f_reference}.idx") into ch_vcfIndex + file("${f_reference}.idx") into ch_vcfIndex script: """ @@ -264,8 +264,7 @@ process BuildVCFIndex { } if (params.verbose) ch_vcfIndex.view { - "VCF indexed:\n\ - VCF index : ${it[1].fileName}" + "VCF index : ${it.fileName}" } /* From d0b96e62a083c230cc64470ed4629d0293f5ed1f Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 14:03:47 +0100 Subject: [PATCH 21/36] directoryMap now contains params.outDir --- annotate.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/annotate.nf b/annotate.nf index a2ac8fec44..faf1044b80 100644 --- a/annotate.nf +++ b/annotate.nf @@ -108,7 +108,7 @@ vcfNotToAnnotate.close() process RunBcftoolsStats { tag {vcf} - publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'link' + publishDir directoryMap.bcftoolsStats, mode: 'link' input: set variantCaller, file(vcf) from vcfForBCFtools @@ -132,7 +132,7 @@ if (params.verbose) bcfReport = bcfReport.view { process RunSnpeff { tag {vcf} - publishDir "${params.outDir}/${directoryMap.snpeff}", mode: 'link' + publishDir directoryMap.snpeff, mode: 'link' input: set variantCaller, file(vcf) from vcfForSnpeff @@ -167,7 +167,7 @@ if (params.verbose) snpeffReport = snpeffReport.view { process RunVEP { tag {vcf} - publishDir "${params.outDir}/${directoryMap.vep}", mode: 'link' + publishDir directoryMap.vep, mode: 'link' input: set variantCaller, file(vcf) from vcfForVep @@ -219,9 +219,9 @@ def checkUppmaxProject() { def defineDirectoryMap() { return [ - 'bcftoolsStats' : 'Reports/BCFToolsStats', - 'snpeff' : 'Annotation/SnpEff', - 'vep' : 'Annotation/VEP' + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'snpeff' : "${params.outDir}/Annotation/SnpEff", + 'vep' : "${params.outDir}/Annotation/VEP" ] } From eea89aa264f0e36ae09273c0b12c86c9983a495c Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 14:04:14 +0100 Subject: [PATCH 22/36] fix space missing --- buildContainers.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildContainers.nf b/buildContainers.nf index b99b62a0b6..e328ab086e 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -7,7 +7,7 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ = S A R E K = ================================================================================ -New Germline (+ Somatic) Analysis Workflow. Started March 2016. + New Germline (+ Somatic) Analysis Workflow. Started March 2016. -------------------------------------------------------------------------------- @Authors Sebastian DiLorenzo [@Sebastian-D] From 507caf9fc33bbe67cb53e4b655e1c3b42cbcc931 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 14:05:03 +0100 Subject: [PATCH 23/36] typo + create function to define reference file names --- buildReferences.nf | 50 ++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/buildReferences.nf b/buildReferences.nf index 7b2e88ae86..becc27645c 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -7,7 +7,7 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ = S A R E K = ================================================================================ -New Germline (+ Somatic) Analysis Workflow. Started March 2016. + New Germline (+ Somatic) Analysis Workflow. Started March 2016. -------------------------------------------------------------------------------- @Authors Sebastian DiLorenzo [@Sebastian-D] @@ -62,29 +62,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project Date: Thu, 22 Mar 2018 14:05:54 +0100 Subject: [PATCH 24/36] directoryMap now contains params.outDir + remove old variables replaced by parameters --- germlineVC.nf | 64 ++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/germlineVC.nf b/germlineVC.nf index f50148aece..af1d3775d4 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -67,10 +67,6 @@ tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} directoryMap = defineDirectoryMap() referenceMap = defineReferenceMap() toolList = defineToolList() -nucleotidesPerSecond = 1000.0 // used to estimate variant calling runtime -gvcf = !params.noGVCF -reports = !params.noReports -onlyQC = params.onlyQC if (!checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information' if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' @@ -88,7 +84,7 @@ if (params.test && params.genome in ['GRCh37', 'GRCh38']) { tsvPath = '' if (params.sample) tsvPath = params.sample -else tsvPath = "${params.outDir}/${directoryMap.recalibrated}/recalibrated.tsv" +else tsvPath = "${directoryMap.recalibrated}/recalibrated.tsv" // Set up the bamFiles channel @@ -130,7 +126,7 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'link' + publishDir directoryMap.samtoolsStats, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -138,7 +134,7 @@ process RunSamtoolsStats { output: file ("${bam}.samtools.stats.out") into samtoolsStatsReport - when: reports + when: !params.noReports script: """ @@ -154,7 +150,7 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'link' + publishDir directoryMap.bamQC, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC @@ -162,7 +158,7 @@ process RunBamQC { output: file("${idSample}") into bamQCreport - when: reports && !params.noBAMQC + when: !params.noReports && !params.noBAMQC script: """ @@ -234,7 +230,7 @@ process CreateIntervalBeds { t = \$5 # runtime estimate if (t == "") { # no runtime estimate in this row, assume default value - t = (\$3 - \$2) / ${nucleotidesPerSecond} + t = (\$3 - \$2) / ${params.nucleotidesPerSecond} } if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) { # start a new chunk @@ -266,7 +262,7 @@ bedIntervals = bedIntervals else { start = fields[1].toInteger() end = fields[2].toInteger() - duration += (end - start) / nucleotidesPerSecond + duration += (end - start) / params.nucleotidesPerSecond } } [duration, intervalFile] @@ -329,7 +325,7 @@ process RunHaplotypecaller { set val("gvcf-hc"), idPatient, idSample, idSample, file("${intervalBed.baseName}_${idSample}.g.vcf") into hcGenomicVCF set idPatient, idSample, file(intervalBed), file("${intervalBed.baseName}_${idSample}.g.vcf") into vcfsToGenotype - when: 'haplotypecaller' in tools && !onlyQC + when: 'haplotypecaller' in tools && !params.onlyQC script: BQSR = (recalTable != null) ? "--BQSR $recalTable" : '' @@ -350,7 +346,7 @@ process RunHaplotypecaller { } hcGenomicVCF = hcGenomicVCF.groupTuple(by:[0,1,2,3]) -if (!gvcf) hcGenomicVCF.close() +if (params.noGVCF) hcGenomicVCF.close() process RunGenotypeGVCFs { tag {idSample + "-" + intervalBed.baseName} @@ -368,7 +364,7 @@ process RunGenotypeGVCFs { output: set val("haplotypecaller"), idPatient, idSample, idSample, file("${intervalBed.baseName}_${idSample}.vcf") into hcGenotypedVCF - when: 'haplotypecaller' in tools && !onlyQC + when: 'haplotypecaller' in tools && !params.onlyQC script: // Using -L is important for speed @@ -399,7 +395,7 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view { process ConcatVCF { tag {variantCaller + "-" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap."$variantCaller"}", mode: 'link' + publishDir "${directoryMap."$variantCaller"}", mode: 'link' input: set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge @@ -409,7 +405,7 @@ process ConcatVCF { set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated file("*.vcf.gz.tbi") into vcfConcatenatedTbi - when: ( 'haplotypecaller' in tools || 'mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !onlyQC + when: ( 'haplotypecaller' in tools || 'mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC script: if (variantCaller == 'haplotypecaller') outputFile = "${variantCaller}_${idSampleNormal}.vcf" @@ -467,7 +463,7 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view { process RunSingleStrelka { tag {idSample} - publishDir "${params.outDir}/${directoryMap.strelka}", mode: 'link' + publishDir directoryMap.strelka, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleStrelka @@ -479,7 +475,7 @@ process RunSingleStrelka { output: set val("singlestrelka"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into singleStrelkaOutput - when: 'strelka' in tools && !onlyQC + when: 'strelka' in tools && !params.onlyQC script: """ @@ -511,7 +507,7 @@ if (params.verbose) singleStrelkaOutput = singleStrelkaOutput.view { process RunSingleManta { tag {idSample + " - Single Diploid"} - publishDir "${params.outDir}/${directoryMap.manta}", mode: 'link' + publishDir directoryMap.manta, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta @@ -523,7 +519,7 @@ process RunSingleManta { output: set val("singlemanta"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into singleMantaOutput - when: 'manta' in tools && status == 0 && !onlyQC + when: 'manta' in tools && status == 0 && !params.onlyQC script: """ @@ -569,7 +565,7 @@ vcfForBCFtools = Channel.empty().mix( process RunBcftoolsStats { tag {vcf} - publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'link' + publishDir directoryMap.bcftoolsStats, mode: 'link' input: set variantCaller, file(vcf) from vcfForBCFtools @@ -577,7 +573,7 @@ process RunBcftoolsStats { output: file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport - when: reports + when: !params.noReports script: """ @@ -660,18 +656,18 @@ def checkExactlyOne(list) { def defineDirectoryMap() { return [ - 'recalibrated' : 'Preprocessing/Recalibrated', - 'bamQC' : 'Reports/bamQC', - 'bcftoolsStats' : 'Reports/BCFToolsStats', - 'samtoolsStats' : 'Reports/SamToolsStats', - 'ascat' : 'VariantCalling/Ascat', - 'freebayes' : 'VariantCalling/FreeBayes', - 'haplotypecaller' : 'VariantCalling/HaplotypeCaller', - 'gvcf-hc' : 'VariantCalling/HaplotypeCallerGVCF', - 'manta' : 'VariantCalling/Manta', - 'mutect1' : 'VariantCalling/MuTect1', - 'mutect2' : 'VariantCalling/MuTect2', - 'strelka' : 'VariantCalling/Strelka' + 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", + 'bamQC' : "${params.outDir}/Reports/bamQC", + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", + 'ascat' : "${params.outDir}/VariantCalling/Ascat", + 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes", + 'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller", + 'gvcf-hc' : "${params.outDir}/VariantCalling/HaplotypeCallerGVCF", + 'manta' : "${params.outDir}/VariantCalling/Manta", + 'mutect1' : "${params.outDir}/VariantCalling/MuTect1", + 'mutect2' : "${params.outDir}/VariantCalling/MuTect2", + 'strelka' : "${params.outDir}/VariantCalling/Strelka" ] } From 620701d8aa4e734dc6ee599df27232ca419e1f64 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 14:06:33 +0100 Subject: [PATCH 25/36] directoryMap now contains params.outDir + remove old variables replaced by parameters --- main.nf | 66 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/main.nf b/main.nf index 87d88de699..723161bd12 100644 --- a/main.nf +++ b/main.nf @@ -69,8 +69,6 @@ if (step == 'preprocessing') step = 'mapping' directoryMap = defineDirectoryMap() referenceMap = defineReferenceMap() stepList = defineStepList() -reports = !params.noReports -onlyQC = params.onlyQC if (!checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information' if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' @@ -89,8 +87,8 @@ if (params.sample) tsvPath = params.sample if (!params.sample && !params.sampleDir) { tsvPaths = [ 'mapping': "${workflow.projectDir}/data/tsv/tiny.tsv", - 'realign': "${params.outDir}/${directoryMap.nonRealigned}/nonRealigned.tsv", - 'recalibrate': "${params.outDir}/${directoryMap.nonRecalibrated}/nonRecalibrated.tsv" + 'realign': "${directoryMap.nonRealigned}/nonRealigned.tsv", + 'recalibrate': "${directoryMap.nonRecalibrated}/nonRecalibrated.tsv" ] if (params.test || step != 'mapping') tsvPath = tsvPaths[step] } @@ -147,7 +145,7 @@ if (params.verbose) bamFiles = bamFiles.view { process RunFastQC { tag {idPatient + "-" + idRun} - publishDir "${params.outDir}/${directoryMap.fastQC}/${idRun}", mode: 'link' + publishDir "${directoryMap.fastQC}/${idRun}", mode: 'link' input: set idPatient, status, idSample, idRun, file(fastqFile1), file(fastqFile2) from fastqFilesforFastQC @@ -155,7 +153,7 @@ process RunFastQC { output: file "*_fastqc.{zip,html}" into fastQCreport - when: step == 'mapping' && reports + when: step == 'mapping' && !params.noReports script: """ @@ -178,7 +176,7 @@ process MapReads { output: set idPatient, status, idSample, idRun, file("${idRun}.bam") into mappedBam - when: step == 'mapping' && !onlyQC + when: step == 'mapping' && !params.onlyQC script: readGroup = "@RG\\tID:${idRun}\\tPU:${idRun}\\tSM:${idSample}\\tLB:${idSample}\\tPL:illumina" @@ -218,7 +216,7 @@ process MergeBams { output: set idPatient, status, idSample, file("${idSample}.bam") into mergedBam - when: step == 'mapping' && !onlyQC + when: step == 'mapping' && !params.onlyQC script: """ @@ -259,7 +257,7 @@ process MarkDuplicates { set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai") into markDuplicatesTSV file ("${bam}.metrics") into markDuplicatesReport - when: step == 'mapping' && !onlyQC + when: step == 'mapping' && !params.onlyQC script: """ @@ -278,9 +276,9 @@ process MarkDuplicates { // Creating a TSV file to restart from this step markDuplicatesTSV.map { idPatient, status, idSample, bam, bai -> gender = patientGenders[idPatient] - "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outDir}/${directoryMap.nonRealigned}/${bam}\t${params.outDir}/${directoryMap.nonRealigned}/${bai}\n" + "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.nonRealigned}/${bam}\t${directoryMap.nonRealigned}/${bai}\n" }.collectFile( - name: 'nonRealigned.tsv', sort: true, storeDir: "${params.outDir}/${directoryMap.nonRealigned}" + name: 'nonRealigned.tsv', sort: true, storeDir: "${directoryMap.nonRealigned}" ) // Create intervals for realignement using both tumor+normal as input @@ -337,7 +335,7 @@ process RealignerTargetCreator { output: set idPatient, file("${idPatient}.intervals") into intervals - when: ( step == 'mapping' || step == 'realign' ) && !onlyQC + when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC script: bams = bam.collect{"-I ${it}"}.join(' ') @@ -383,6 +381,8 @@ if (params.verbose) bamsAndIntervals = bamsAndIntervals.view { process IndelRealigner { tag {idPatient} + publishDir directoryMap.nonRecalibrated, mode: 'link' + input: set idPatient, file(bam), file(bai), file(intervals) from bamsAndIntervals set file(genomeFile), file(genomeIndex), file(genomeDict), file(knownIndels), file(knownIndelsIndex) from Channel.value([ @@ -395,7 +395,7 @@ process IndelRealigner { output: set idPatient, file("*.real.bam"), file("*.real.bai") into realignedBam mode flatten - when: ( step == 'mapping' || step == 'realign' ) && !onlyQC + when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC script: bams = bam.collect{"-I ${it}"}.join(' ') @@ -429,7 +429,7 @@ if (params.verbose) realignedBam = realignedBam.view { process CreateRecalibrationTable { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.nonRecalibrated}", mode: 'link' + publishDir directoryMap.nonRecalibrated, mode: 'link', overwrite: false input: set idPatient, status, idSample, file(bam), file(bai) from realignedBam @@ -448,7 +448,7 @@ process CreateRecalibrationTable { set idPatient, status, idSample, file(bam), file(bai), file("${idSample}.recal.table") into recalibrationTable set idPatient, status, idSample, val("${idSample}_${status}.md.real.bam"), val("${idSample}_${status}.md.real.bai"), val("${idSample}.recal.table") into recalibrationTableTSV - when: ( step == 'mapping' || step == 'realign' ) && !onlyQC + when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC script: known = knownIndels.collect{ "-knownSites ${it}" }.join(' ') @@ -471,9 +471,9 @@ process CreateRecalibrationTable { // Create a TSV file to restart from this step recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable -> gender = patientGenders[idPatient] - "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outDir}/${directoryMap.nonRecalibrated}/${bam}\t${params.outDir}/${directoryMap.nonRecalibrated}/${bai}\t${params.outDir}/${directoryMap.nonRecalibrated}/${recalTable}\n" + "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.nonRecalibrated}/${bam}\t${directoryMap.nonRecalibrated}/${bai}\t${directoryMap.nonRecalibrated}/${recalTable}\n" }.collectFile( - name: 'nonRecalibrated.tsv', sort: true, storeDir: "${params.outDir}/${directoryMap.nonRecalibrated}" + name: 'nonRecalibrated.tsv', sort: true, storeDir: directoryMap.nonRecalibrated ) if (step == 'recalibrate') recalibrationTable = bamFiles @@ -496,7 +496,7 @@ recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status process RecalibrateBam { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.recalibrated}", mode: 'link' + publishDir directoryMap.recalibrated, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable @@ -513,7 +513,7 @@ process RecalibrateBam { // HaplotypeCaller can do BQSR on the fly, so do not create a // recalibrated BAM explicitly. - when: params.explicitBqsrNeeded && !onlyQC + when: params.explicitBqsrNeeded && !params.onlyQC script: """ @@ -530,9 +530,9 @@ process RecalibrateBam { // Creating a TSV file to restart from this step recalibratedBamTSV.map { idPatient, status, idSample, bam, bai -> gender = patientGenders[idPatient] - "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outDir}/${directoryMap.recalibrated}/${bam}\t${params.outDir}/${directoryMap.recalibrated}/${bai}\n" + "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.recalibrated}/${bam}\t${directoryMap.recalibrated}/${bai}\n" }.collectFile( - name: 'recalibrated.tsv', sort: true, storeDir: "${params.outDir}/${directoryMap.recalibrated}" + name: 'recalibrated.tsv', sort: true, storeDir: directoryMap.recalibrated ) if (params.verbose) recalibratedBam = recalibratedBam.view { @@ -544,7 +544,7 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'link' + publishDir directoryMap.samtoolsStats, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -552,7 +552,7 @@ process RunSamtoolsStats { output: file ("${bam}.samtools.stats.out") into samtoolsStatsReport - when: reports + when: !params.noReports script: """ @@ -568,7 +568,7 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'link' + publishDir directoryMap.bamQC, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC @@ -576,7 +576,7 @@ process RunBamQC { output: file("${idSample}") into bamQCreport - when: reports && !params.noBAMQC + when: !params.noReports && !params.noBAMQC script: """ @@ -661,14 +661,14 @@ def checkExactlyOne(list) { def defineDirectoryMap() { return [ - 'nonRealigned' : 'Preprocessing/NonRealigned', - 'nonRecalibrated' : 'Preprocessing/NonRecalibrated', - 'recalibrated' : 'Preprocessing/Recalibrated', - 'bamQC' : 'Reports/bamQC', - 'bcftoolsStats' : 'Reports/BCFToolsStats', - 'fastQC' : 'Reports/FastQC', - 'markDuplicatesQC' : 'Reports/MarkDuplicates', - 'samtoolsStats' : 'Reports/SamToolsStats' + 'nonRealigned' : "${params.outDir}/Preprocessing/NonRealigned", + 'nonRecalibrated' : "${params.outDir}/Preprocessing/NonRecalibrated", + 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", + 'bamQC' : "${params.outDir}/Reports/bamQC", + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'fastQC' : "${params.outDir}/Reports/FastQC", + 'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates", + 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats" ] } From 0d3fe47795869a09540b2ef51206ab8e8f61ef58 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 14:07:04 +0100 Subject: [PATCH 26/36] directoryMap now contains params.outDir + remove old variables replaced by parameters --- runMultiQC.nf | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/runMultiQC.nf b/runMultiQC.nf index ccf49e204a..efa92e7526 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -56,9 +56,6 @@ if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for mo if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " directoryMap = defineDirectoryMap() - -reports = !params.noReports - /* ================================================================================ = P R O C E S S E S = @@ -68,14 +65,14 @@ reports = !params.noReports startMessage() process GenerateMultiQCconfig { - publishDir "${params.outDir}/${directoryMap.multiQC}", mode: 'link' + publishDir directoryMap.multiQC, mode: 'link' input: output: file("multiqc_config.yaml") into multiQCconfig - when: reports + when: !params.noReports script: """ @@ -99,23 +96,23 @@ process GenerateMultiQCconfig { """ } -if (params.verbose && reports) multiQCconfig = multiQCconfig.view { +if (params.verbose && !params.noReports) multiQCconfig = multiQCconfig.view { "MultiQC config:\n\ File : [${it.fileName}]" } reportsForMultiQC = Channel.empty() .mix( - Channel.fromPath("${params.outDir}/Reports/bamQC/*", type: 'dir'), - Channel.fromPath("${params.outDir}/Reports/BCFToolsStats/*"), - Channel.fromPath("${params.outDir}/Reports/FastQC/*/*"), - Channel.fromPath("${params.outDir}/Reports/MarkDuplicates/*"), - Channel.fromPath("${params.outDir}/Reports/SamToolsStats/*"), + Channel.fromPath("${directoryMap.bamQC}/*", type: 'dir'), + Channel.fromPath("${directoryMap.bcftoolsStats}/*"), + Channel.fromPath("${directoryMap.fastQC}/*/*"), + Channel.fromPath("${directoryMap.markDuplicatesQC}/*"), + Channel.fromPath("${directoryMap.samtoolsStats}/*"), multiQCconfig ).collect() process RunMultiQC { - publishDir "${params.outDir}/${directoryMap.multiQC}", mode: 'link' + publishDir directoryMap.multiQC, mode: 'link' input: file ('*') from reportsForMultiQC @@ -123,7 +120,7 @@ process RunMultiQC { output: set file("*multiqc_report.html"), file("*multiqc_data") into multiQCReport - when: reports + when: !params.noReports script: """ @@ -164,7 +161,12 @@ def checkUppmaxProject() { def defineDirectoryMap() { return [ - 'multiQC' : 'Reports/MultiQC' + 'bamQC' : "${params.outDir}/Reports/bamQC", + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'fastQC' : "${params.outDir}/Reports/FastQC", + 'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates", + 'multiQC' : "${params.outDir}/Reports/MultiQC", + 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats" ] } From a084e0a47ffa30a85a652b1e2c9fc82bc5749276 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 14:07:45 +0100 Subject: [PATCH 27/36] directoryMap now contains params.outDir + remove old variables replaced by parameters --- somaticVC.nf | 72 ++++++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/somaticVC.nf b/somaticVC.nf index d3969ca644..53a180daed 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -72,10 +72,6 @@ tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} directoryMap = defineDirectoryMap() referenceMap = defineReferenceMap() toolList = defineToolList() -nucleotidesPerSecond = 1000.0 // used to estimate variant calling runtime -gvcf = !params.noGVCF -reports = !params.noReports -onlyQC = params.onlyQC if (!checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information' if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' @@ -93,7 +89,7 @@ if (params.test && params.genome in ['GRCh37', 'GRCh38']) { tsvPath = '' if (params.sample) tsvPath = params.sample -else tsvPath = "${params.outDir}/${directoryMap.recalibrated}/recalibrated.tsv" +else tsvPath = "${directoryMap.recalibrated}/recalibrated.tsv" // Set up the bamFiles channel @@ -135,7 +131,7 @@ if (params.verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'link' + publishDir directoryMap.samtoolsStats, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -143,7 +139,7 @@ process RunSamtoolsStats { output: file ("${bam}.samtools.stats.out") into samtoolsStatsReport - when: reports + when: !params.noReports script: """ @@ -159,7 +155,7 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'link' + publishDir directoryMap.bamQC, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC @@ -167,7 +163,7 @@ process RunBamQC { output: file("${idSample}") into bamQCreport - when: reports && !params.noBAMQC + when: !params.noReports && !params.noBAMQC script: """ @@ -324,7 +320,7 @@ process RunMutect1 { output: set val("mutect1"), idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into mutect1Output - when: 'mutect1' in tools && !onlyQC + when: 'mutect1' in tools && !params.onlyQC script: """ @@ -363,7 +359,7 @@ process RunMutect2 { output: set val("mutect2"), idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into mutect2Output - when: 'mutect2' in tools && !onlyQC + when: 'mutect2' in tools && !params.onlyQC script: """ @@ -393,7 +389,7 @@ process RunFreeBayes { output: set val("freebayes"), idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into freebayesOutput - when: 'freebayes' in tools && !onlyQC + when: 'freebayes' in tools && !params.onlyQC script: """ @@ -428,7 +424,7 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view { process ConcatVCF { tag {variantCaller + "_" + idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap."$variantCaller"}", mode: 'link' + publishDir "${directoryMap."$variantCaller"}", mode: 'link' input: set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge @@ -438,7 +434,7 @@ process ConcatVCF { set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated file("*.vcf.gz.tbi") into vcfConcatenatedTbi - when: ('mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !onlyQC + when: ('mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC script: outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf" @@ -494,7 +490,7 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view { process RunStrelka { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.strelka}", mode: 'link' + publishDir directoryMap.strelka, mode: 'link' input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForStrelka @@ -507,7 +503,7 @@ process RunStrelka { output: set val("strelka"), idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into strelkaOutput - when: 'strelka' in tools && !onlyQC + when: 'strelka' in tools && !params.onlyQC script: """ @@ -540,7 +536,7 @@ if (params.verbose) strelkaOutput = strelkaOutput.view { process RunManta { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.manta}", mode: 'link' + publishDir directoryMap.manta, mode: 'link' input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForManta @@ -552,7 +548,7 @@ process RunManta { output: set val("manta"), idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into mantaOutput - when: 'manta' in tools && !onlyQC + when: 'manta' in tools && !params.onlyQC script: """ @@ -593,7 +589,7 @@ if (params.verbose) mantaOutput = mantaOutput.view { process RunSingleManta { tag {idSample + " - Tumor-Only"} - publishDir "${params.outDir}/${directoryMap.manta}", mode: 'link' + publishDir directoryMap.manta, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta @@ -605,7 +601,7 @@ process RunSingleManta { output: set val("singlemanta"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into singleMantaOutput - when: 'manta' in tools && status == 1 && !onlyQC + when: 'manta' in tools && status == 1 && !params.onlyQC script: """ @@ -655,7 +651,7 @@ process RunAlleleCount { output: set idPatient, status, idSample, file("${idSample}.alleleCount") into alleleCountOutput - when: 'ascat' in tools && !onlyQC + when: 'ascat' in tools && !params.onlyQC script: """ @@ -686,7 +682,7 @@ alleleCountOutput = alleleCountOutput.map { process RunConvertAlleleCounts { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.ascat}", mode: 'link' + publishDir directoryMap.ascat, mode: 'link' input: set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCountOutput @@ -694,7 +690,7 @@ process RunConvertAlleleCounts { output: set idPatient, idSampleNormal, idSampleTumor, file("${idSampleNormal}.BAF"), file("${idSampleNormal}.LogR"), file("${idSampleTumor}.BAF"), file("${idSampleTumor}.LogR") into convertAlleleCountsOutput - when: 'ascat' in tools && !onlyQC + when: 'ascat' in tools && !params.onlyQC script: gender = patientGenders[idPatient] @@ -708,7 +704,7 @@ process RunConvertAlleleCounts { process RunAscat { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.ascat}", mode: 'link' + publishDir directoryMap.ascat, mode: 'link' input: set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOutput @@ -716,7 +712,7 @@ process RunAscat { output: set val("ascat"), idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.*.{png,txt}") into ascatOutput - when: 'ascat' in tools && !onlyQC + when: 'ascat' in tools && !params.onlyQC script: """ @@ -760,7 +756,7 @@ vcfForBCFtools = Channel.empty().mix( process RunBcftoolsStats { tag {vcf} - publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'link' + publishDir directoryMap.bcftoolsStats, mode: 'link' input: set variantCaller, file(vcf) from vcfForBCFtools @@ -768,7 +764,7 @@ process RunBcftoolsStats { output: file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport - when: reports + when: !params.noReports script: """ @@ -851,18 +847,16 @@ def checkUppmaxProject() { def defineDirectoryMap() { return [ - 'recalibrated' : 'Preprocessing/Recalibrated', - 'bamQC' : 'Reports/bamQC', - 'bcftoolsStats' : 'Reports/BCFToolsStats', - 'samtoolsStats' : 'Reports/SamToolsStats', - 'ascat' : 'VariantCalling/Ascat', - 'freebayes' : 'VariantCalling/FreeBayes', - 'haplotypecaller' : 'VariantCalling/HaplotypeCaller', - 'gvcf-hc' : 'VariantCalling/HaplotypeCallerGVCF', - 'manta' : 'VariantCalling/Manta', - 'mutect1' : 'VariantCalling/MuTect1', - 'mutect2' : 'VariantCalling/MuTect2', - 'strelka' : 'VariantCalling/Strelka' + 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", + 'bamQC' : "${params.outDir}/Reports/bamQC", + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", + 'ascat' : "${params.outDir}/VariantCalling/Ascat", + 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes", + 'manta' : "${params.outDir}/VariantCalling/Manta", + 'mutect1' : "${params.outDir}/VariantCalling/MuTect1", + 'mutect2' : "${params.outDir}/VariantCalling/MuTect2", + 'strelka' : "${params.outDir}/VariantCalling/Strelka" ] } From f8dbb75288a4be65fc9df722a5b617dfd0d4b632 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 15:43:10 +0100 Subject: [PATCH 28/36] remove unused profile --- nextflow.config | 8 -------- 1 file changed, 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index f4b927c441..14d0c2c474 100644 --- a/nextflow.config +++ b/nextflow.config @@ -36,14 +36,6 @@ profiles { includeConfig 'configuration/uppmax-slurm.config' includeConfig 'configuration/singularity-path.config' } - // profile for UPPMAX clusters - // Runs the pipeline locally on a single 16-core node - // Singularity images will be pulled automatically - download { - includeConfig 'configuration/base.config' - includeConfig 'configuration/uppmax-localhost.config' - includeConfig 'configuration/singularity.config' - } // slurm profile for UPPMAX clusters // Runs the pipeline using the job scheduler // Singularity images will be pulled automatically From 5add69d2e6879d88d4e5b044dae4a816d73b81db Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Thu, 22 Mar 2018 15:43:29 +0100 Subject: [PATCH 29/36] use NF scratch support --- configuration/uppmax-slurm.config | 1 + 1 file changed, 1 insertion(+) diff --git a/configuration/uppmax-slurm.config b/configuration/uppmax-slurm.config index 1ac1af0c31..d80d400abe 100644 --- a/configuration/uppmax-slurm.config +++ b/configuration/uppmax-slurm.config @@ -20,6 +20,7 @@ process { executor = 'slurm' memory = 110.GB queue = 'node' + scratch = true time = 48.h errorStrategy = {task.exitStatus == 143 ? 'retry' : 'terminate'} From 0fa995605071ece94c6f7decee472652ba1220bf Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 23 Mar 2018 09:57:14 +0100 Subject: [PATCH 30/36] remove unnecessary functions --- germlineVC.nf | 7 ------- main.nf | 5 ----- runMultiQC.nf | 9 --------- somaticVC.nf | 6 ------ 4 files changed, 27 deletions(-) diff --git a/germlineVC.nf b/germlineVC.nf index af1d3775d4..bf6b367161 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -648,12 +648,6 @@ def checkUppmaxProject() { return !(workflow.profile == 'slurm' && !params.project) } -def checkExactlyOne(list) { - final n = 0 - list.each{n += it ? 1 : 0} - return n == 1 -} - def defineDirectoryMap() { return [ 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", @@ -738,7 +732,6 @@ def extractGenders(channel) { } def generateIntervalsForVC(bams, intervals) { - def (bamsNew, bamsForVC) = bams.into(2) def (intervalsNew, vcIntervals) = intervals.into(2) def bamsForVCNew = bamsForVC.combine(vcIntervals) diff --git a/main.nf b/main.nf index 723161bd12..25796e2420 100644 --- a/main.nf +++ b/main.nf @@ -618,11 +618,6 @@ def checkParameterExistence(it, list) { return true } -def checkParameterList(list, realList) { - // Loop through all parameters to check their existence and spelling - return list.every{ checkParameterExistence(it, realList) } -} - def checkParamReturnFile(item) { params."${item}" = params.genomes[params.genome]."${item}" return file(params."${item}") diff --git a/runMultiQC.nf b/runMultiQC.nf index efa92e7526..63e8a1c7fa 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -145,15 +145,6 @@ def sarekMessage() { log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") } -def checkParameterExistence(it, list) { - // Check parameter existence - if (!list.contains(it)) { - println("Unknown parameter: ${it}") - return false - } - return true -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) diff --git a/somaticVC.nf b/somaticVC.nf index 53a180daed..b28e20d992 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -834,12 +834,6 @@ def checkRefExistence(referenceFile, fileToCheck) { return true } -def checkExactlyOne(list) { - final n = 0 - list.each{n += it ? 1 : 0} - return n == 1 -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) From c1024dd1d18157a916ce9a62bec50f03746518c7 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 23 Mar 2018 13:59:56 +0100 Subject: [PATCH 31/36] update documentation --- doc/CONFIG.md | 35 +++++++++----------- doc/INSTALL.md | 4 +-- doc/INSTALL_BIANCA.md | 6 ++-- doc/INSTALL_RACKHAM.md | 4 +-- doc/PROCESS.md | 4 +-- doc/REFERENCES.md | 19 ++++------- doc/TESTS.md | 23 +++++++------ doc/TSV.md | 7 ++-- doc/USAGE.md | 45 +++++++++++++++----------- doc/USE_CASES.md | 73 ++++++++++++++++++++++++++---------------- 10 files changed, 121 insertions(+), 99 deletions(-) diff --git a/doc/CONFIG.md b/doc/CONFIG.md index 5e570e1694..368324d1c4 100644 --- a/doc/CONFIG.md +++ b/doc/CONFIG.md @@ -10,46 +10,44 @@ We provides several configuration files and profiles for Sarek. The standard one Every configuration file can be modified for your own use. If you want you can specify the use of a config file using `-c ` -### [`containers.config`](../configuration/containers.config) +### [`containers.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/containers.config) -Contain images for all process. +Define Containers for all process. Images will be pulled automatically. Use in your own profile if needed. -### [`docker.config`](../configuration/docker.config) +### [`docker.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/docker.config) -Contain Docker images for all process. +Define Docker Containers for all process. Images will be pulled automatically. Use in your own profile if needed. -### [`genomes.config`](../configuration/genomes.config) +### [`genomes.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/genomes.config) Contain path to all references. Modify it if you want to change genome version, or the path to your references files. -### [`singularity-path.config`](../configuration/singularity-path.config) +### [`singularity-path.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/singularity-path.config) -To be used when downloading singularity containers, like on a secure UPPMAX cluster. +Define path to Singularity Containers for all process. +To be used when downloading Singularity Containers, like on a secure UPPMAX cluster. Images will not be pulled automatically. You need to set them up before. -### [`singularity.config`](../configuration/singularity.config) +### [`singularity.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/singularity.config) -Contain Singularity images for all process. +Define Singularity Containers for all process. Images will be pulled automatically. Use in your own profile if needed. -### [`travis.config`](../configuration/travis.config) +### [`travis.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/travis.config) To be used for Travis (2 cpus) or on small computer for testing purpose -### [`uppmax-localhost.config`](../configuration/uppmax-localhost.config) - -To be used on a typical localhost on a UPPMAX cluster (16 cpus) - -### [`uppmax-slurm.config`](../configuration/uppmax-slurm.config) +### [`uppmax-slurm.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/uppmax-slurm.config) Slurm configuration for a UPPMAX cluster +Will run the workflow on `/scratch` using the Nextflow [`scratch`](https://www.nextflow.io/docs/latest/process.html#scratch) directive ## profiles @@ -65,19 +63,16 @@ Docker images will be pulled automatically. This is the default profile for use on a localhost on a UPPMAX cluster with Singularity. Singularity images need to be set up. -### `download` - -This is the default profile for use on a localhost on a UPPMAX cluster with Singularity. -Singularity images will be pulled automatically. - ### `slurm` This is another profile for use on a UPPMAX cluster using the job scheduler slurm with Singularity. +Will run the workflow on `/scratch`. Singularity images need to be set up. ### `slurmDownload` This is another profile for use on a UPPMAX cluster using the job scheduler slurm with Singularity. +Will run the workflow on `/scratch`. Singularity images will be pulled automatically. ### `singularity` diff --git a/doc/INSTALL.md b/doc/INSTALL.md index ed0954438b..06fca77f50 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -55,10 +55,10 @@ The following tutorial explain how to run Sarek on a small dataset using a small > nextflow run SciLifeLab/Sarek/buildReferences.nf --download --genome smallGRCh37 -profile docker # Test Sarek on a test tiny set using Singularity -> nextflow run SciLifeLab/Sarek --test --genome smallGRCh37 --noReports -profile singularity +> nextflow run SciLifeLab/Sarek/main.nf --test --genome smallGRCh37 --noReports -profile singularity # Or test Sarek on a test tiny set using Docker -> nextflow run SciLifeLab/Sarek --test --genome smallGRCh37 --noReports -profile docker +> nextflow run SciLifeLab/Sarek/main.nf --test --genome smallGRCh37 --noReports -profile docker ``` ## Update diff --git a/doc/INSTALL_BIANCA.md b/doc/INSTALL_BIANCA.md index 5e67c384e1..c69e44f953 100644 --- a/doc/INSTALL_BIANCA.md +++ b/doc/INSTALL_BIANCA.md @@ -9,7 +9,7 @@ Sarek use Singularity containers to package all the different tools. As `bianca` is secure, no direct download is available, so Sarek and the Singularity containers will have to be installed and updated manually. -You can either download Sarek and the containers on your computer or on `rackham`, make an archive, and send it to `bianca` using `FileZilla` or `sftp` given your preferences. +You can either download Sarek and the containers on your computer (you will need Nextflow and Singularity for that) or on `rackham`, make an archive, and send it to `bianca` using `FileZilla` or `sftp` given your preferences. All Reference files are already stored in `bianca`. @@ -38,7 +38,7 @@ Wrote Sarek-[snapID].tar.gz # To get the containers # This script will need Singularity and Nextflow installed -> ./scripts/do_all.sh --pull +> ./scripts/do_all.sh --pull --tag # Send the containers to bianca using the same method # They will be in the containers/ directory as .img files @@ -81,7 +81,7 @@ The principle is to have every member of your project to be able to use the same And then Sarek can be used with: ```bash -> nextflow run ~/Sarek/main.nf ... +> nextflow run ~/Sarek/main.nf -profile slurm --project [PROJECT] ... ``` ## Update Sarek diff --git a/doc/INSTALL_RACKHAM.md b/doc/INSTALL_RACKHAM.md index 9d73715db6..360fe8e6fe 100644 --- a/doc/INSTALL_RACKHAM.md +++ b/doc/INSTALL_RACKHAM.md @@ -66,14 +66,14 @@ For more information, follow the [reference files documentation](REFERENCES.md). ## Use Sarek with slurm -To use Sarek on rackham you will need to use the `slurm` profile. +To use Sarek on rackham you will need to use the `slurmDownload` profile. ```bash # Connect to rackham > ssh -AX [USER]@rackham.uppmax.uu.se # Run the workflow directly on the login node -> nextflow run SciLifeLab/Sarek --sample [FILE.TSV] --genome [GENOME] --project [PROJECT] -profile slurm-download +> nextflow run SciLifeLab/Sarek/main.nf --project [PROJECT] -profile slurmDownload ``` -------------------------------------------------------------------------------- diff --git a/doc/PROCESS.md b/doc/PROCESS.md index e46b9d6808..2a20c0562d 100644 --- a/doc/PROCESS.md +++ b/doc/PROCESS.md @@ -1,6 +1,7 @@ # Workflow processes -Several processes are run within the workflow. We divide them for the moment into 5 main steps: +Several processes are run within the workflow. +We divide them for the moment into 5 main steps: ## Preprocessing: @@ -40,7 +41,6 @@ Several processes are run within the workflow. We divide them for the moment int - RunFastQC - Run FastQC for QC on fastq files - RunSamtoolsStats - Run Samtools stats on recalibrated BAM files - RunBamQC - Run qualimap BamQC on recalibrated BAM files -- RunBcftoolsStats - Run BCFTools stats on vcf before annotation - RunBcftoolsStats - Run BCFTools stats on vcf files ## Annotation: diff --git a/doc/REFERENCES.md b/doc/REFERENCES.md index 007d30dd48..9ebe6281e6 100644 --- a/doc/REFERENCES.md +++ b/doc/REFERENCES.md @@ -1,6 +1,6 @@ # Genomes and reference files -CAW currently uses GRCh38 by default. The settings are in `genomes.config`, they can be tailored to your needs. The [`buildReferences.nf`](#buildreferencesnf) script can be use to build the indexes based on the reference files. +Sarek currently uses GRCh38 by default. The settings are in `genomes.config`, they can be tailored to your needs. The [`buildReferences.nf`](#buildreferencesnf) script can be use to build the indexes based on the reference files. ## GRCh37 @@ -21,18 +21,13 @@ The following files need to be downloaded: From our repo, get the [`intervals` list file](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/repeats/wgs_calling_regions.grch37.list). More information about this file in the [intervals documentation](INTERVALS.md) -The rest of the references files are stored in in [export.uppmax.uu.se](https://export.uppmax.uu.se/b2015110/caw-references/b37/) and also on the repository [CAW-References](https://github.com/MaxUlysse/CAW-References) using [GIT-LFS](https://git-lfs.github.com/): - -- '1000G\_phase3\_20130502\_SNP\_maf0.3.loci' -- 'b37\_cosmic\_v74.noCHR.sort.4.1.vcf' - You can create your own cosmic reference for any human reference as specified below. ### COSMIC files -To annotate with COSMIC variants during MuTect1/2 Variant Calling you need to create a compatible VCF file. -Download the coding and non-coding VCF files from [COSMIC](http://cancer.sanger.ac.uk/cosmic/download) and -process them with the [Create\_Cosmic.sh](https://github.com/SciLifeLab/Sarek/tree/master/scripts/Create_Cosmic.sh) +To annotate with COSMIC variants during MuTect1/2 Variant Calling you need to create a compatible VCF file. +Download the coding and non-coding VCF files from [COSMIC](http://cancer.sanger.ac.uk/cosmic/download) and +process them with the [Create\_Cosmic.sh](https://github.com/SciLifeLab/Sarek/tree/master/scripts/Create_Cosmic.sh) script. The script requires a fasta index `.fai`, of the reference file you are using. Example: @@ -54,7 +49,7 @@ igvtools index Use `--genome GRCh38` to map against GRCh38. Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs. -To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](mailto:ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/). +To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/). The MD5SUM of `Homo_sapiens_assembly38.fasta` included in that file is 7ff134953dcca8c8997453bbb80b6b5e. @@ -93,10 +88,10 @@ nextflow run buildReferences.nf --refDir --genome ### `--genome` -Same parameter used for `main.nf` +Same parameter used for other scripts. - GRCh37 -- GRCh38 +- GRCh38 (not yet available) - smallGRCh37 -------------------------------------------------------------------------------- diff --git a/doc/TESTS.md b/doc/TESTS.md index fa8aaddb8a..4d030b603b 100644 --- a/doc/TESTS.md +++ b/doc/TESTS.md @@ -13,12 +13,15 @@ Four optional arguments are supported: - `-s` || `--sample`: Use to change the test sample (default=`data/tsv/tiny.tsv`) - `-t` || `--test`: - - `MAPPING`: will try preprocessing - - `REALIGN`: will try realignment - - `RECALIBRATE`: will try recalibration - - `ANNOTATESNPEFF`: will try variant calling and annotation using snpEff - - `ANNOTATEVEP`: will try variant calling and annotation using VEP - - `ALL`: will try all the previous tests (default) + - `DIR`: test `mapping` with an input directory, all other tests use a TSV file + - `STEP`: test `mapping`, `realign` and `recalibrate` + - `GERMLINE`: test `mapping` and Variant Calling with `HaplotypeCaller` + - `TOOLS`: test `mapping` and Variant Calling with `FreeBayes`, `HaplotypeCaller`, `MuTect1`, `MuTect2`, `Strelka` + - `MANTA`: test `mapping` and Variant Calling with `Manta` + - `ANNOTATESNPEFF`: test annotation using `snpEFF` + - `ANNOTATEVEP`: test annotation using `VEP` + - `BUILDCONTAINERS`: test building all containers except `snpeffgrch37`, `snpeffgrch38`, `vepgrch37` and `vepgrch38` + - `ALL`: test all the previous tests (default) ## Usage @@ -27,10 +30,10 @@ Four optional arguments are supported: ./scripts/test.sh # Will try all tests using Docker ./scripts/test.sh -p docker -# Will try MAPPING tests using Singularity -./scripts/test.sh -t MAPPING -# Will try MAPPING tests using Singularity with GRCh37 genome -./scripts/test.sh -t MAPPING -g GRCh37 +# Will try `STEP` tests using Singularity +./scripts/test.sh -t `STEP` +# Will try `STEP` tests using Singularity with GRCh37 genome +./scripts/test.sh -t `STEP` -g GRCh37 # Will try all tests using Singularity on manta test data ./scripts/test.sh -s data/tsv/tiny-manta.tsv ``` diff --git a/doc/TSV.md b/doc/TSV.md index 1d3f68c3c0..1140706248 100644 --- a/doc/TSV.md +++ b/doc/TSV.md @@ -12,6 +12,8 @@ It's a Tab Separated Value file, based on: `subject gender status sample lane fa - `bam` is the bam file - `bai` is the index +All examples are given for a normal/tumor pair. If no tumors are listed in the TSV file, then the workflow will proceed as if it was a single normal sample instead of a normal/tumor pair. + # Example TSV file for a normal/tumor pair with FASTQ files In this sample for the normal case there are 3 read groups, and 2 for the tumor. It is recommended to add the absolute path of the paired FASTQ files, but relative path should work also. Note, the delimiter is the tab (\t) character: @@ -36,7 +38,8 @@ G15511 XX 1 D0ENMT pathToFiles/G15511.D0ENMT.md.real.bam pathToFiles All the files will be created in the Preprocessing/NonRealigned/ directory, and by default a corresponding TSV file will also be deposited there. Generally, getting MuTect1 and Strelka calls on the preprocessed files should be done by: ```bash -nextflow run SciLifeLab/Sarek --sample Preprocessing/NonRealigned/mysample.tsv --step realign --tools Mutect2,Strelka +nextflow run SciLifeLab/Sarek/main.nf --sample Preprocessing/NonRealigned/mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/somaticVC.nf --sample Preprocessing/Recalibrated/mysample.tsv --tools Mutect2,Strelka ``` # Example TSV file for a normal/tumor pair with recalibrated BAM files @@ -51,7 +54,7 @@ G15511 XX 1 D0ENMT pathToFiles/G15511.D0ENMT.md.real.bam pathToFi All the files will be in he Preprocessing/Recalibrated/ directory, and by default a corresponding TSV file will also be deposited there. Generally, getting MuTect1 and Strelka calls on the recalibrated files should be done by: ```bash -nextflow run SciLifeLab/Sarek --sample Preprocessing/Recalibrated/mysample.tsv --step variantcalling --tool Mutect2,Strelka +nextflow run SciLifeLab/Sarek/somaticVC.nf --sample Preprocessing/Recalibrated/mysample.tsv --tools Mutect2,Strelka ``` -------------------------------------------------------------------------------- diff --git a/doc/USAGE.md b/doc/USAGE.md index 5fd9120ef7..a58df2e8ff 100644 --- a/doc/USAGE.md +++ b/doc/USAGE.md @@ -1,9 +1,16 @@ # Usage -I would recommand to run Nextflow within a [screen](https://www.gnu.org/software/screen/) or [tmux](https://tmux.github.io/) session. It is recommanded to run only one instance of Sarek for one patient in the same directory. The typical reduced command line is: +I would recommand to run Nextflow within a [screen](https://www.gnu.org/software/screen/) or [tmux](https://tmux.github.io/) session. +It is recommended to run only one instance of Sarek for one patient in the same directory. +Sarek uses several scripts, a wrapper is currently being made to simplify the command lines. +Currently the typical reduced command lines are: ```bash -nextflow run SciLifeLab/Sarek --sample --step --tools +nextflow run SciLifeLab/Sarek/main.nf --sample --step +nextflow run SciLifeLab/Sarek/germlineVC.nf --sample --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --sample --tools +nextflow run SciLifeLab/Sarek/annotate.nf --tools (--annotateTools ||--annotateVCF ) +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` All parameters, options and variables can be specified with configuration files and profile (cf [configuration documentation](#profiles)). @@ -59,35 +66,35 @@ Test run Sarek on a smaller dataset, that way you don't have to specify `--sampl Choose which tools will be used in the workflow. Different tools to be separated by commas. Possible values are: -- ascat (use ascat for CNV) -- haplotypecaller (use HaplotypeCaller for VC) -- manta (use Manta for SV) -- mutect1 (use MuTect1 for VC) -- mutect2 (use MuTect2 for VC) -- strelka (use Strelka for VC) -- snpeff (use snpEff for Annotation) -- vep (use VEP for Annotation) +- haplotypecaller (use `HaplotypeCaller` for VC) (germlineVC) +- manta (use `Manta` for SV) (germlineVC,somaticVC) +- strelka (use `Strelka` for VC) (germlineVC,somaticVC) +- ascat (use `ASCAT` for CNV) (somaticVC) +- mutect1 (use `MuTect1` for VC) (somaticVC) +- mutect2 (use `MuTect2` for VC) (somaticVC) +- snpeff (use `snpEff` for Annotation) (annotate) +- vep (use `VEP` for Annotation) (annotate) -`--tools` option is case insensitive to avoid easy introduction of errors when choosing tools. So you can write `--tools mutect2,snpEff` or `--tools MuTect2,snpeff` without worrying about case sensitivity. +`--tools` option is case insensitive to avoid easy introduction of errors when choosing tools. So you can write `--tools mutect2,ascat` or `--tools MuTect2,ASCAT` without worrying about case sensitivity. ### --annotateTools `tool1[,tool2,tool3...]` Choose which tools to annotate. Different tools to be separated by commas. Possible values are: -- haplotypecaller (Annotate HaplotypeCaller output) -- manta (Annotate Manta output) -- mutect1 (Annotate MuTect1 output) -- mutect2 (Annotate MuTect2 output) -- strelka (Annotate Strelka output) +- haplotypecaller (Annotate `HaplotypeCaller` output) +- manta (Annotate `Manta` output) +- mutect1 (Annotate `MuTect1` output) +- mutect2 (Annotate `MuTect2` output) +- strelka (Annotate `Strelka` output) ### --annotateVCF `file1[,file2,file3...]` -Choose which vcf to annotate. Different vcf to be separated by commas. +Choose vcf to annotate. Different vcfs to be separated by commas. ### --verbose Display more information about files being processed. -### --version +### --more Display version number and information. @@ -170,7 +177,7 @@ nextflow pull SciLifeLab/Sarek If there is a feature or bugfix you want to use in a resumed or re-analyzed run, you have to update the workflow to the latest version. By default it is not updated automatically, so use something like: ```bash -nextflow run -latest SciLifeLab/Sarek --sample mysample.tsv -resume +nextflow run -latest SciLifeLab/Sarek/main.nf ... -resume ``` -------------------------------------------------------------------------------- diff --git a/doc/USE_CASES.md b/doc/USE_CASES.md index 271bd7e5eb..8dabca0fbc 100644 --- a/doc/USE_CASES.md +++ b/doc/USE_CASES.md @@ -3,21 +3,27 @@ The workflow has three pre-processing options: `mapping`, `realign` and `recalibrate`. Using the `mapping` directive one will have a pair of mapped, deduplicated and recalibrated BAM files in the `Preprocessing/Recalibrated/` directory. Furthermore, during this process a deduplicated BAM file is created in the `Preprocessing/NonRealigned/` directory. This is the usual option you have to give when you are starting from raw FASTQ data: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --tools # For somatic only +nextflow run SciLifeLab/Sarek/annotate.nf --tool --annotateVCF myfile.vcf # For somatic only +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` `mapping` will start by default, you do not have to give any additional parameters, only the TSV file describing the sample (see below). In the [genomes.config](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/configuration/genomes.config) configuration file we are defining the intervals file as well, this is used to define regions for variant call and realignment (in a scatter and gather fashion when possible). The intervals are chromosomes cut at their centromeres (so each chromosome arm processed separately) also additional unassigned contigs. We are ignoring the hs37d5 contig that contains concatenated decoy sequences. -During the execution of the workflow a `trace.txt`, a `timeline.html` and a `report.html` files are generated automatically. These files contain statistics about resources used and processes finished. If you start a new flow or restart/resume a sample, the previous version will be renamed as `trace.txt.1`, `timeline.html.1` and `report.html.1` respectively. Also, older version are renamed with incremented numbers. +During the execution of the workflow a `Sarek-trace.txt`, a `Sarek-timeline.html` and a `Sarek-report.html` files are generated automatically. These files contain statistics about resources used and processes finished. If you start a new workflow or restart/resume a sample, the previous version will be renamed as `Sarek-trace.txt.1`, `Sarek-timeline.html.1` and `Sarek-report.html.1` respectively. Also, older version are renamed with incremented numbers. ## Starting from raw FASTQ - pair of FASTQ files The workflow should be started in this case with the smallest set of options as written above: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` The TSV file should have at least one tab-separated lines: @@ -35,6 +41,23 @@ The columns are: 5. first set of reads 6. second set of reads +## Starting from raw FASTQ on a normal sample only (with `--sampleDir`) + +The `--sampleDir` option can be used to point Sarek to a directory with FASTQ files: +```bash +nextflow run SciLifeLab/Sarek/main.nf --sampleDir path/to/FASTQ/files +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/runMultiQC.nf +``` +The given directory is searched recursively for FASTQ files that are named `*_R1_*.fastq.gz`, and a matching pair with the same name except `_R2_` instead of `_R1_` is expected to exist alongside. All of the found FASTQ files are considered to belong to the sample. Each FASTQ file pair gets its own read group (`@RG`) in the resulting BAM file. + +### Metadata when using `--sampleDir` + +When using `--sampleDir`, the metadata about the sample that are written to the BAM header in the `@RG` tag are determined in the following way. + +- The sample name (`SM`) is derived from the the last component of the path given to `--sampleDir`. That is, you should make sure that that directory has a meaningful name! For example, with `--sampleDir=/my/fastqs/sample123`, the sample name will be `sample123`. +- The read group id is set to *flowcell.samplename.lane*. The flowcell id and lane number are auto-detected from the name of the first read in the FASTQ file. + ## Starting from raw FASTQ - having pair of FASTQ files for tumor/normal samples (one lane for each sample) The workflow command line is just the same as before, but the TSV contains extra lines. You can see the second column is used to distinguish normal and tumor samples. You can add as many relapse samples as many you have, providing their name in the third column is different. Each will be compared to the normal one-by-one. Obviously, if you do not have relapse samples, you can leave out this last line. @@ -73,7 +96,9 @@ SUBJECT_ID XX 1 SAMPLEIDR 9 /samples/relapse9_1.fastq.gz /sample NGI Production in the previous years delivered many preprocessed samples; these BAM files are not recalibrated. To have BAMs suitable for variant calling, realignement of pairs is necessary: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` And the corresponding TSV file should be like: @@ -89,7 +114,12 @@ At the end of this step you should have recalibrated BAM files in the `Preproces NGI Production in the previous years delivered many preprocessed samples; these BAM files are not recalibrated. To have BAMs suitable for variant calling, realignement of pairs is necessary: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --tools +nextflow run SciLifeLab/Sarek/annotate.nf --tool --annotateVCF myfile.vcf +nextflow run SciLifeLab/Sarek/runMultiQC.nf + ``` And the corresponding TSV file should be like (obviously, if you do not have relapse samples, you can leave out this last line): @@ -107,7 +137,11 @@ At the end of this step you should have recalibrated BAM files in the `Preproces If the BAM files were realigned together, you can start from recalibration: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv --step recalibrate +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv --step recalibrate +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --tools +nextflow run SciLifeLab/Sarek/annotate.nf --tool --annotateVCF myfile.vcf +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` And the corresponding TSV file should be like (obviously, if you do not have relapse samples, you can leave out this last line): @@ -123,7 +157,8 @@ SUBJECT_ID XX 1 SAMPLEIDR /samples/SAMPLEIDR.bam /samples/SAMPLEIDR At this step we are assuming that all the required preprocessing is over, we only want to run variant callers or other tools using recalibrated BAMs. ```bash -nextflow run SciLifeLab/Sarek/ --sample mysample.tsv --step variantcalling --tools +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` And the corresponding TSV file should be like: @@ -139,7 +174,10 @@ If you want to restart a previous run of the pipeline, you may not have a recali At this step we are assuming that all the required preprocessing is over, we only want to run variant callers or other tools using recalibrated BAMs. ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv --step variantcalling --tools +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --tools +nextflow run SciLifeLab/Sarek/annotate.nf --tool --annotateVCF myfile.vcf +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` And the corresponding TSV file should be like (obviously, if you do not have relapse samples, you can leave out this last line): @@ -152,25 +190,6 @@ SUBJECT_ID XX 1 SAMPLEIDR /samples/SAMPLEIDR.bam /samples/SAMPLEIDR If you want to restart a previous run of the pipeline, you may not have a recalibrated BAM file. This is the case if HaplotypeCaller was the only tool (recalibration is done on-the-fly with HaplotypeCaller to improve performance and save space). In this case, you need to start with `--step=recalibrate` (see previous section). - -## Running the pipeline on a normal sample only (with `--sampleDir`) - -Sarek can also be used to process a single normal sample. The tools that require tumor/normal pairs should not be run in this case. - -When running a normal-only sample, it is not necessary to create a TSV file describing the input. Instead, the `--sampleDir` option can be used to point Sarek to a directory with FASTQ files: -```bash -nextflow run SciLifeLab/Sarek --tools=HaplotypeCaller --sampleDir path/to/FASTQ/files -``` -The given directory is searched recursively for FASTQ files that are named `*_R1_*.fastq.gz`, and a matching pair with the same name except `_R2_` instead of `_R1_` is expected to exist alongside. All of the found FASTQ files are considered to belong to the sample. Each FASTQ file pair gets its own read group (`@RG`) in the resulting BAM file. - -### Metadata when using `--sampleDir` - -When using `--sampleDir`, the metadata about the sample that are written to the BAM header in the `@RG` tag are determined in the following way. - -- The sample name (`SM`) is derived from the the last component of the path given to `--sampleDir`. That is, you should make sure that that directory has a meaningful name! For example, with `--sampleDir=/my/fastqs/sample123`, the sample name will be `sample123`. -- The read group id is set to *flowcell.samplename.lane*. The flowcell id and lane number are auto-detected from the name of the first read in the FASTQ file. - - -------------------------------------------------------------------------------- [![](images/SciLifeLab_logo.png "SciLifeLab")][scilifelab-link] From 4f637ee534f44f379c1df0df834d626210026347 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 23 Mar 2018 15:08:35 +0100 Subject: [PATCH 32/36] add CHANGELOG --- CHANGELOG.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 6 +++++- 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000..806ca2a6f7 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,59 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + +## [Unreleased] +- Python wrapper script + +## [2.0.0] - 2018-03-23 +### `Added` +- basic wrapper script +- Abstract, posters and figures +- ROI selector and FreeBayes sanitizer scripts +- New logo and icon for the project +- check for existing tumor/normal channel +- `--more` to get the current version of the workflow +- `lib/MyUtils.groovy` with `checkParams`, `checkParameterList`, `checkParameterExistence` and `isAllowedParams` functions +- some `runOptions` for `docker` (prevent some user right problem) +- This `CHANGELOG` + +### `Changed` +- `CAW` is now `Sarek` +- Dissect Workflow in 5 new scripts: `annotate.nf`, `main.nf`, `germlineVC.nf`, `runMultiQC.nf` and `somaticVC.nf` +- `report.html`, `timeline.html` and `trace.html` are generated in `Reports/` +- `--version` is now used to define the workflow version +- most params are now defined in the base.config file instead of in the scripts +- update RELEASE_CHECKLIST.md +- `checkParams`, `checkParameterList`, `checkParameterExistence` and `isAllowedParams` in script functions are now called within `MyUtils` +- `nf_required_version` is now `params.nfRequiredVersion` +- in `buildReferences.nf` script, channels now begin by `ch_`, and files by `f_` +- use `PublishDir mode: 'link'`` instead of `copy` +- `directoryMap` now contains `params.outDir` +- use Nextflow support of scratch (close #539) +- reordered Travis CI tests +- update documentation +- `MultiQC` version in container from v`1.4` to v`1.5` +- `vepgrch37` container base image from `release_90.6` to `release_92` +- `vepgrch38` container base image from `release_90.6` to `release_92` +- `VEP` version in containers from v`90` to v`91` +- `nucleotidesPerSecond` is now `params.nucleotidesPerSecond` +- default `params.tag` is now `latest` instead of current version, so --tag needs to be specified with the right version to be sure of using the `containers` corresponding + +### `Deprecated` +- `standard` profile +- `uppmax-localhost.config` file + +### `Removed` +- `scripts/skeleton_batch.sh` +- old data and tsv files +- UPPMAX directories from containers +- `--step` in `annotate.nf`, `germlineVC.nf` and `somatic.nf` +- some `runOptions` for Singularity (binding not needed anymore on UPPMAX) +- `download` profile + + +### `Fixed` +- Replace `VEP` `--pick` option by `--per_gene` (fix #533) +- use `$PWD` for default `outDir` (fix #530) diff --git a/README.md b/README.md index b701d39551..dfecb613a0 100644 --- a/README.md +++ b/README.md @@ -78,10 +78,14 @@ The Sarek pipeline comes with documentation in the `doc/` directory: ## Contributions & Support -If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). +If you would like to contribute to this pipeline, please see the [contributing guidelines](https://github.com/SciLifeLab/Sarek/blob/master/.github/CONTRIBUTING.md). For further information or help, don't hesitate to get in touch on [Gitter][gitter-link] or contact us: maxime.garcia@scilifelab.se, szilveszter.juhos@scilifelab.se +## CHANGELOG + +- [CHANGELOG](https://github.com/SciLifeLab/Sarek/blob/master/CHANGELOG.md) + ## Authors * [Sebastian DiLorenzo](https://github.com/Sebastian-D) From 698537a759c30dd7b9b024aae0bb8998b36a2c9c Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 23 Mar 2018 15:55:37 +0100 Subject: [PATCH 33/36] change name of library --- CHANGELOG.md | 4 ++-- annotate.nf | 4 ++-- buildContainers.nf | 2 +- buildReferences.nf | 2 +- germlineVC.nf | 2 +- lib/{MyUtils.groovy => SarekUtils.groovy} | 0 main.nf | 2 +- runMultiQC.nf | 2 +- somaticVC.nf | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) rename lib/{MyUtils.groovy => SarekUtils.groovy} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 806ca2a6f7..103cd2faec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - New logo and icon for the project - check for existing tumor/normal channel - `--more` to get the current version of the workflow -- `lib/MyUtils.groovy` with `checkParams`, `checkParameterList`, `checkParameterExistence` and `isAllowedParams` functions +- `lib/SarekUtils.groovy` with `checkParams`, `checkParameterList`, `checkParameterExistence` and `isAllowedParams` functions - some `runOptions` for `docker` (prevent some user right problem) - This `CHANGELOG` @@ -26,7 +26,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - `--version` is now used to define the workflow version - most params are now defined in the base.config file instead of in the scripts - update RELEASE_CHECKLIST.md -- `checkParams`, `checkParameterList`, `checkParameterExistence` and `isAllowedParams` in script functions are now called within `MyUtils` +- `checkParams`, `checkParameterList`, `checkParameterExistence` and `isAllowedParams` in script functions are now called within `SarekUtils` - `nf_required_version` is now `params.nfRequiredVersion` - in `buildReferences.nf` script, channels now begin by `ch_`, and files by `f_` - use `PublishDir mode: 'link'`` instead of `copy` diff --git a/annotate.nf b/annotate.nf index faf1044b80..2c7a8084bc 100644 --- a/annotate.nf +++ b/annotate.nf @@ -53,7 +53,7 @@ try { if (params.help) exit 0, helpMessage() if (params.more) exit 0, moreMessage() -if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] @@ -63,7 +63,7 @@ annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim directoryMap = defineDirectoryMap() toolList = defineToolList() -if (!MyUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' +if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' /* ================================================================================ diff --git a/buildContainers.nf b/buildContainers.nf index e328ab086e..916be1599b 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -53,7 +53,7 @@ try { if (params.help) exit 0, helpMessage() if (params.more) exit 0, moreMessage() -if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " // Define containers to handle (build/push or pull) diff --git a/buildReferences.nf b/buildReferences.nf index becc27645c..e866d219e3 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -56,7 +56,7 @@ try { if (params.help) exit 0, helpMessage() if (params.more) exit 0, moreMessage() -if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " if (!params.download && params.refDir == "" ) exit 1, "No --refDir specified" diff --git a/germlineVC.nf b/germlineVC.nf index bf6b367161..4a608c4a42 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -59,7 +59,7 @@ try { if (params.help) exit 0, helpMessage() if (params.more) exit 0, moreMessage() -if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] diff --git a/lib/MyUtils.groovy b/lib/SarekUtils.groovy similarity index 100% rename from lib/MyUtils.groovy rename to lib/SarekUtils.groovy diff --git a/main.nf b/main.nf index 25796e2420..f93a003278 100644 --- a/main.nf +++ b/main.nf @@ -60,7 +60,7 @@ try { if (params.help) exit 0, helpMessage() if (params.more) exit 0, moreMessage() -if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " step = params.step.toLowerCase() diff --git a/runMultiQC.nf b/runMultiQC.nf index 63e8a1c7fa..a5dac9ef00 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -52,7 +52,7 @@ try { if (params.help) exit 0, helpMessage() if (params.more) exit 0, moreMessage() -if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " directoryMap = defineDirectoryMap() diff --git a/somaticVC.nf b/somaticVC.nf index b28e20d992..e93d4f9a4b 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -64,7 +64,7 @@ try { if (params.help) exit 0, helpMessage() if (params.more) exit 0, moreMessage() -if (!MyUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] From 6842d375b8c6f3e8e7f843b017a5d534a9517cd2 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 23 Mar 2018 15:55:50 +0100 Subject: [PATCH 34/36] update CHECKLIST --- .github/RELEASE_CHECKLIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/RELEASE_CHECKLIST.md b/.github/RELEASE_CHECKLIST.md index 97e0e363f8..748a4c5a33 100644 --- a/.github/RELEASE_CHECKLIST.md +++ b/.github/RELEASE_CHECKLIST.md @@ -16,7 +16,7 @@ This checklist is for our own reference - `./scripts/test.sh -p singularity --tag ` - `./scripts/test.sh -p singularityPath --tag ` 6. Commit and push version updates -7. Make a [release](https://github.com/SciLifeLab/Sarek/releases) on GitHub - list PRs as changelog. +7. Make a [release](https://github.com/SciLifeLab/Sarek/releases) on GitHub 8. Tweet that new version is released 9. Commit and push. Continue making more awesome :metal: 10. Have fika :cake: From df9f3012c5c728eb7f649ee98fcd373c97e224fe Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 23 Mar 2018 16:25:38 +0100 Subject: [PATCH 35/36] remove --more --- CHANGELOG.md | 1 - annotate.nf | 20 +++++--------------- buildContainers.nf | 20 +++++--------------- buildReferences.nf | 20 +++++--------------- doc/USAGE.md | 4 ---- germlineVC.nf | 20 +++++--------------- main.nf | 20 +++++--------------- runMultiQC.nf | 18 +++++------------- somaticVC.nf | 20 +++++--------------- 9 files changed, 35 insertions(+), 108 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 103cd2faec..0a75a6e821 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - ROI selector and FreeBayes sanitizer scripts - New logo and icon for the project - check for existing tumor/normal channel -- `--more` to get the current version of the workflow - `lib/SarekUtils.groovy` with `checkParams`, `checkParameterList`, `checkParameterExistence` and `isAllowedParams` functions - some `runOptions` for `docker` (prevent some user right problem) - This `CHANGELOG` diff --git a/annotate.nf b/annotate.nf index 2c7a8084bc..389c647eba 100644 --- a/annotate.nf +++ b/annotate.nf @@ -52,7 +52,6 @@ try { } if (params.help) exit 0, helpMessage() -if (params.more) exit 0, moreMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " @@ -207,11 +206,6 @@ if (params.verbose) vepReport = vepReport.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) @@ -282,8 +276,6 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --more" - log.info " displays version number" } def minimalInformationMessage() { @@ -312,19 +304,17 @@ def nextflowMessage() { log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}" } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def moreMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + params.version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/buildContainers.nf b/buildContainers.nf index 916be1599b..380e6025e8 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -52,7 +52,6 @@ try { } if (params.help) exit 0, helpMessage() -if (params.more) exit 0, moreMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " @@ -150,11 +149,6 @@ if (params.verbose) containersPushed = containersPushed.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkContainerExistence(container, list) { try {assert list.contains(container)} catch (AssertionError ae) { @@ -232,8 +226,6 @@ def helpMessage() { log.info " Default: \$PWD" log.info " --tag`: Choose the tag for the containers" log.info " Default (version number): " + params.version - log.info " --more" - log.info " displays version number and more informations" } def minimalInformationMessage() { @@ -253,19 +245,17 @@ def nextflowMessage() { log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}" } +def sarekMessage() { + // Display Sarek message + log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def moreMessage() { - // Display version message - log.info "Sarek - Workflow For Somatic And Germline Variations" - log.info " version : " + params.version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/buildReferences.nf b/buildReferences.nf index e866d219e3..4bb4b40465 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -55,7 +55,6 @@ try { } if (params.help) exit 0, helpMessage() -if (params.more) exit 0, moreMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " @@ -251,11 +250,6 @@ if (params.verbose) ch_vcfIndex.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek - Workflow To Find Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkFile(it) { // Check file existence final f = file(it) @@ -318,8 +312,6 @@ def helpMessage() { log.info " smallGRCh37" log.info " --help" log.info " you're reading it" - log.info " --more" - log.info " displays version number" } def minimalInformationMessage() { @@ -341,19 +333,17 @@ def nextflowMessage() { log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}" } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def moreMessage() { - // Display version message - log.info "Sarek - Workflow For Somatic And Germline Variations" - log.info " version : " + params.version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/doc/USAGE.md b/doc/USAGE.md index a58df2e8ff..5dea8d6dc8 100644 --- a/doc/USAGE.md +++ b/doc/USAGE.md @@ -94,10 +94,6 @@ Choose vcf to annotate. Different vcfs to be separated by commas. Display more information about files being processed. -### --more - -Display version number and information. - ## Containers ### --containerPath `Path to the singularity containers (default=containers/)` diff --git a/germlineVC.nf b/germlineVC.nf index 4a608c4a42..567c65969f 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -58,7 +58,6 @@ try { } if (params.help) exit 0, helpMessage() -if (params.more) exit 0, moreMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " @@ -594,11 +593,6 @@ bcfReport.close() ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkFileExtension(it, extension) { // Check file extension if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information" @@ -805,8 +799,6 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --more" - log.info " displays version number" } def minimalInformationMessage() { @@ -863,19 +855,17 @@ def returnTSV(it, number) { return it } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def moreMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + params.version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/main.nf b/main.nf index f93a003278..38e5c880c1 100644 --- a/main.nf +++ b/main.nf @@ -59,7 +59,6 @@ try { } if (params.help) exit 0, helpMessage() -if (params.more) exit 0, moreMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " @@ -599,11 +598,6 @@ if (params.verbose) bamQCreport = bamQCreport.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkFileExtension(it, extension) { // Check file extension if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information" @@ -877,8 +871,6 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --more" - log.info " displays version number" } def minimalInformationMessage() { @@ -935,19 +927,17 @@ def returnTSV(it, number) { return it } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def moreMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + params.version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/runMultiQC.nf b/runMultiQC.nf index a5dac9ef00..51cd8fb1fe 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -51,7 +51,6 @@ try { } if (params.help) exit 0, helpMessage() -if (params.more) exit 0, moreMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " @@ -140,11 +139,6 @@ if (params.verbose) multiQCReport = multiQCReport.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) @@ -224,19 +218,17 @@ def nextflowMessage() { log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}" } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def moreMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + params.version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/somaticVC.nf b/somaticVC.nf index e93d4f9a4b..231b70eb4b 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -63,7 +63,6 @@ try { } if (params.help) exit 0, helpMessage() -if (params.more) exit 0, moreMessage() if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " @@ -785,11 +784,6 @@ bcfReport.close() ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkFileExtension(it, extension) { // Check file extension if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information" @@ -995,8 +989,6 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --more" - log.info " displays version number" } def minimalInformationMessage() { @@ -1053,19 +1045,17 @@ def returnTSV(it, number) { return it } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def moreMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + params.version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() From d87d4b8aaebba695b9899b1152ad98829e973cad Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 23 Mar 2018 16:52:52 +0100 Subject: [PATCH 36/36] more complete CHANGELOG --- CHANGELOG.md | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a75a6e821..d333cfe672 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,7 +52,98 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - some `runOptions` for Singularity (binding not needed anymore on UPPMAX) - `download` profile - ### `Fixed` - Replace `VEP` `--pick` option by `--per_gene` (fix #533) - use `$PWD` for default `outDir` (fix #530) + +## [1.2.5] - 2018-01-18 + +### `Added` +- Zenodo for DOI +- Delivery README +- Document use of the `--sampleDir` option +- Contributing Guidelines +- Issue Templates +- Release Checklist +- `--outDir` +- `awsbatch` profile +- `aws-batch.config` config file +- `--noBAMQC` params (failing sometimes on Bianca) + +### `Changed` +- Update `Nextflow` to `0.26.0` (new fancy report + AWS Batch) +- Extra time on Travis CI testing +- Replace `bundleDir` by `params.genome_base` +- Update `MultiQC` to `1.3` (MEGAQC FTW) +- Move and rename some test files + +### `Fixed` +- Version of COSMIC GRCh37 v83 +- Write an error message when `--sampleDir` does not find any FASTQ files +- `base.config` for ConcatVCF process +- File specification for recalibrationReport in RecalibrateBam process (got error on AWS Batch) + +## [1.2.4] - 2017-10-27 + +### `Fixed` +- Better CPU requirements for `ConcatVCF` (fix #488) +- Exception handling for `ASCAT` (close #489) +- CPU requirements for `runSingleStrelka` and `runSingleManta` (fix #490) + +## [1.2.3] - 2017-10-18 + +### `Fixed` +- 16 cpus for local executor (fix #475) +- `ASCAT` works for GRCh38 (fix #357) +- Running `Singularity` on /scratch (fix #471) +- No tsv for step `annotate` (fix #480) + +## [1.2.2] - 2017-10-06 + +### `Fixed` + - Typo in `uppmax-localhost.config` (fix #479) + +## [1.2.1] - 2017-10-06 + +### `Changed` +- `runascat` and `runconvertallelecounts` containers are now replaced by `r-base` +- `willmclaren/ensembl-vep:release_90.5` is now base for `vepgrch37` and `vepgrch38` + +### `Removed` +- `vep` container +- `strelka_config.ini` file + +### `Fixed` +- Running `Singularity` on /scratch (fix #471) +- Update function to check Nextflow version (fix #472) +- Remove `returnMin()` function (fix #473) + +## [1.2.0] - 2017-10-02 + +### `Changed` +- Fix version for Manuscript + +## [1.1] - 2017-09-15 + +### `Added` +- Singularity possibilities + +### `Changed` +- Reports made by default +- Intervals file can be a bed file +- Normal sample preprocessing + HaplotypeCaller is possible +- Better Travis CI tests + +### `Fixed` +- Memory requirements + +## [1.0] - 2017-02-16 + +### `Added` +- Docker possibilities + +## [0.9] - 2016-11-16 + +## [0.8] - 2016-11-16 + +## [0.1] - 2016-04-05