diff --git a/.github/RELEASE_CHECKLIST.md b/.github/RELEASE_CHECKLIST.md index 21378aebba..748a4c5a33 100644 --- a/.github/RELEASE_CHECKLIST.md +++ b/.github/RELEASE_CHECKLIST.md @@ -2,20 +2,21 @@ This checklist is for our own reference 1. Check that everything is up to date and ready to go + - Travis test is passing + - Manual testing on Bianca is passing 2. Increase version numbers. -3. Update version numbers in code: `main.nf`, `buildContainers.nf`, `buildReferences.nf` -4. If any changes on any containers, match the tag to current version `docker.config`, `singularity.config`, `singularity-path.config`. -5. Build, and get the containers. - - `./scripts/do_all.sh --push` - - `./scripts/do_all.sh --pull` -6. Test against sample data. +3. Update version numbers in code: `configuration/base.config` +4. Build, and get the containers. + - `./scripts/do_all.sh --push --tag ` + - `./scripts/do_all.sh --pull --tag ` +5. Test against sample data. - Check for any command line errors - Check version numbers are printed correctly - - `./scripts/test.sh -p docker` - - `./scripts/test.sh -p singularity` - - `./scripts/test.sh -p singularityPath` -7. Commit and push version updates -8. Make a [release](https://github.com/SciLifeLab/CAW/releases) on GitHub - list PRs as changelog. -9. Tweet that new version is released -10. Commit and push. Continue making more awesome :metal: -11. Have fika :cake: + - `./scripts/test.sh -p docker --tag ` + - `./scripts/test.sh -p singularity --tag ` + - `./scripts/test.sh -p singularityPath --tag ` +6. Commit and push version updates +7. Make a [release](https://github.com/SciLifeLab/Sarek/releases) on GitHub +8. Tweet that new version is released +9. Commit and push. Continue making more awesome :metal: +10. Have fika :cake: diff --git a/.travis.yml b/.travis.yml index 980702f158..08ad72172b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,19 +8,19 @@ services: - docker env: - - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=DIR TOOL_INSTALL=all - - NXF_VER=0.27.0 PROFILE=docker TEST=DIR TOOL_INSTALL=nextflow - - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=STEP TOOL_INSTALL=all - - NXF_VER=0.27.0 PROFILE=docker TEST=STEP TOOL_INSTALL=nextflow - - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=ANNOTATESNPEFF TOOL_INSTALL=all - - NXF_VER=0.27.0 PROFILE=docker TEST=ANNOTATESNPEFF TOOL_INSTALL=nextflow - - NXF_VER=0.27.0 PROFILE=docker TEST=ANNOTATEVEP TOOL_INSTALL=nextflow - - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=GERMLINE TOOL_INSTALL=all - - NXF_VER=0.27.0 PROFILE=docker TEST=GERMLINE TOOL_INSTALL=nextflow - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=TOOLS TOOL_INSTALL=all - NXF_VER=0.27.0 PROFILE=docker TEST=TOOLS TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=STEP TOOL_INSTALL=all - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=MANTA TOOL_INSTALL=all + - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=ANNOTATESNPEFF TOOL_INSTALL=all + - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=GERMLINE TOOL_INSTALL=all + - NXF_VER=0.27.0 PROFILE=docker TEST=ANNOTATEVEP TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 PROFILE=docker TEST=ANNOTATESNPEFF TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 SGT_VER=2.4.2 PROFILE=singularity TEST=DIR TOOL_INSTALL=all - NXF_VER=0.27.0 PROFILE=docker TEST=MANTA TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 PROFILE=docker TEST=STEP TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 PROFILE=docker TEST=GERMLINE TOOL_INSTALL=nextflow + - NXF_VER=0.27.0 PROFILE=docker TEST=DIR TOOL_INSTALL=nextflow install: # Install Nextflow (and Singularity if needed) - "./scripts/install.sh --tool $TOOL_INSTALL" diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000..d333cfe672 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,149 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + +## [Unreleased] +- Python wrapper script + +## [2.0.0] - 2018-03-23 +### `Added` +- basic wrapper script +- Abstract, posters and figures +- ROI selector and FreeBayes sanitizer scripts +- New logo and icon for the project +- check for existing tumor/normal channel +- `lib/SarekUtils.groovy` with `checkParams`, `checkParameterList`, `checkParameterExistence` and `isAllowedParams` functions +- some `runOptions` for `docker` (prevent some user right problem) +- This `CHANGELOG` + +### `Changed` +- `CAW` is now `Sarek` +- Dissect Workflow in 5 new scripts: `annotate.nf`, `main.nf`, `germlineVC.nf`, `runMultiQC.nf` and `somaticVC.nf` +- `report.html`, `timeline.html` and `trace.html` are generated in `Reports/` +- `--version` is now used to define the workflow version +- most params are now defined in the base.config file instead of in the scripts +- update RELEASE_CHECKLIST.md +- `checkParams`, `checkParameterList`, `checkParameterExistence` and `isAllowedParams` in script functions are now called within `SarekUtils` +- `nf_required_version` is now `params.nfRequiredVersion` +- in `buildReferences.nf` script, channels now begin by `ch_`, and files by `f_` +- use `PublishDir mode: 'link'`` instead of `copy` +- `directoryMap` now contains `params.outDir` +- use Nextflow support of scratch (close #539) +- reordered Travis CI tests +- update documentation +- `MultiQC` version in container from v`1.4` to v`1.5` +- `vepgrch37` container base image from `release_90.6` to `release_92` +- `vepgrch38` container base image from `release_90.6` to `release_92` +- `VEP` version in containers from v`90` to v`91` +- `nucleotidesPerSecond` is now `params.nucleotidesPerSecond` +- default `params.tag` is now `latest` instead of current version, so --tag needs to be specified with the right version to be sure of using the `containers` corresponding + +### `Deprecated` +- `standard` profile +- `uppmax-localhost.config` file + +### `Removed` +- `scripts/skeleton_batch.sh` +- old data and tsv files +- UPPMAX directories from containers +- `--step` in `annotate.nf`, `germlineVC.nf` and `somatic.nf` +- some `runOptions` for Singularity (binding not needed anymore on UPPMAX) +- `download` profile + +### `Fixed` +- Replace `VEP` `--pick` option by `--per_gene` (fix #533) +- use `$PWD` for default `outDir` (fix #530) + +## [1.2.5] - 2018-01-18 + +### `Added` +- Zenodo for DOI +- Delivery README +- Document use of the `--sampleDir` option +- Contributing Guidelines +- Issue Templates +- Release Checklist +- `--outDir` +- `awsbatch` profile +- `aws-batch.config` config file +- `--noBAMQC` params (failing sometimes on Bianca) + +### `Changed` +- Update `Nextflow` to `0.26.0` (new fancy report + AWS Batch) +- Extra time on Travis CI testing +- Replace `bundleDir` by `params.genome_base` +- Update `MultiQC` to `1.3` (MEGAQC FTW) +- Move and rename some test files + +### `Fixed` +- Version of COSMIC GRCh37 v83 +- Write an error message when `--sampleDir` does not find any FASTQ files +- `base.config` for ConcatVCF process +- File specification for recalibrationReport in RecalibrateBam process (got error on AWS Batch) + +## [1.2.4] - 2017-10-27 + +### `Fixed` +- Better CPU requirements for `ConcatVCF` (fix #488) +- Exception handling for `ASCAT` (close #489) +- CPU requirements for `runSingleStrelka` and `runSingleManta` (fix #490) + +## [1.2.3] - 2017-10-18 + +### `Fixed` +- 16 cpus for local executor (fix #475) +- `ASCAT` works for GRCh38 (fix #357) +- Running `Singularity` on /scratch (fix #471) +- No tsv for step `annotate` (fix #480) + +## [1.2.2] - 2017-10-06 + +### `Fixed` + - Typo in `uppmax-localhost.config` (fix #479) + +## [1.2.1] - 2017-10-06 + +### `Changed` +- `runascat` and `runconvertallelecounts` containers are now replaced by `r-base` +- `willmclaren/ensembl-vep:release_90.5` is now base for `vepgrch37` and `vepgrch38` + +### `Removed` +- `vep` container +- `strelka_config.ini` file + +### `Fixed` +- Running `Singularity` on /scratch (fix #471) +- Update function to check Nextflow version (fix #472) +- Remove `returnMin()` function (fix #473) + +## [1.2.0] - 2017-10-02 + +### `Changed` +- Fix version for Manuscript + +## [1.1] - 2017-09-15 + +### `Added` +- Singularity possibilities + +### `Changed` +- Reports made by default +- Intervals file can be a bed file +- Normal sample preprocessing + HaplotypeCaller is possible +- Better Travis CI tests + +### `Fixed` +- Memory requirements + +## [1.0] - 2017-02-16 + +### `Added` +- Docker possibilities + +## [0.9] - 2016-11-16 + +## [0.8] - 2016-11-16 + +## [0.1] - 2016-04-05 diff --git a/README.md b/README.md index b701d39551..dfecb613a0 100644 --- a/README.md +++ b/README.md @@ -78,10 +78,14 @@ The Sarek pipeline comes with documentation in the `doc/` directory: ## Contributions & Support -If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). +If you would like to contribute to this pipeline, please see the [contributing guidelines](https://github.com/SciLifeLab/Sarek/blob/master/.github/CONTRIBUTING.md). For further information or help, don't hesitate to get in touch on [Gitter][gitter-link] or contact us: maxime.garcia@scilifelab.se, szilveszter.juhos@scilifelab.se +## CHANGELOG + +- [CHANGELOG](https://github.com/SciLifeLab/Sarek/blob/master/CHANGELOG.md) + ## Authors * [Sebastian DiLorenzo](https://github.com/Sebastian-D) diff --git a/annotate.nf b/annotate.nf index 6f33461dcc..389c647eba 100644 --- a/annotate.nf +++ b/annotate.nf @@ -37,63 +37,32 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// No tools to annotate -params.annotateTools = '' -// No vcf to annotare -params.annotateVCF = '' -// Reports are generated -params.noReports = false -// Run Sarek in onlyQC mode -params.onlyQC = false -// outDir is current directory -params.outDir = baseDir -// Step is annotate -step = 'annotate' -// Not testing -params.test = '' -// No tools to be used -params.tools = '' -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : [] annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim()} : [] directoryMap = defineDirectoryMap() toolList = defineToolList() -reports = !params.noReports -onlyQC = params.onlyQC -verbose = params.verbose -if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' +if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' /* ================================================================================ @@ -106,7 +75,7 @@ startMessage() vcfToAnnotate = Channel.create() vcfNotToAnnotate = Channel.create() -if (step == 'annotate' && annotateVCF == []) { +if (annotateVCF == []) { Channel.empty().mix( Channel.fromPath("${params.outDir}/VariantCalling/HaplotypeCaller/*.vcf.gz") .flatten().map{vcf -> ['haplotypecaller',vcf]}, @@ -118,9 +87,10 @@ if (step == 'annotate' && annotateVCF == []) { .flatten().map{vcf -> ['mutect2',vcf]}, Channel.fromPath("${params.outDir}/VariantCalling/Strelka/*{somatic,variants}*.vcf.gz") .flatten().map{vcf -> ['strelka',vcf]} - ).choice(vcfToAnnotate, vcfNotToAnnotate) { annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1 } - -} else if (step == 'annotate' && annotateTools == [] && annotateVCF != []) { + ).choice(vcfToAnnotate, vcfNotToAnnotate) { + annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1 + } +} else if (annotateTools == []) { list = "" annotateVCF.each{ list += ",${it}" } list = list.substring(1) @@ -128,8 +98,7 @@ if (step == 'annotate' && annotateVCF == []) { .map{vcf -> ['userspecified',vcf]} else vcfToAnnotate = Channel.fromPath("{$list}") .map{vcf -> ['userspecified',vcf]} - -}else exit 1, "specify only tools or files to annotate, bot both" +} else exit 1, "specify only tools or files to annotate, not both" vcfNotToAnnotate.close() @@ -138,7 +107,7 @@ vcfNotToAnnotate.close() process RunBcftoolsStats { tag {vcf} - publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'copy' + publishDir directoryMap.bcftoolsStats, mode: 'link' input: set variantCaller, file(vcf) from vcfForBCFtools @@ -146,7 +115,7 @@ process RunBcftoolsStats { output: file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport - when: reports + when: !params.noReports script: """ @@ -154,7 +123,7 @@ process RunBcftoolsStats { """ } -if (verbose) bcfReport = bcfReport.view { +if (params.verbose) bcfReport = bcfReport.view { "BCFTools stats report:\n\ File : [${it.fileName}]" } @@ -162,7 +131,7 @@ if (verbose) bcfReport = bcfReport.view { process RunSnpeff { tag {vcf} - publishDir "${params.outDir}/${directoryMap.snpeff}", mode: 'copy' + publishDir directoryMap.snpeff, mode: 'link' input: set variantCaller, file(vcf) from vcfForSnpeff @@ -189,7 +158,7 @@ process RunSnpeff { """ } -if (verbose) snpeffReport = snpeffReport.view { +if (params.verbose) snpeffReport = snpeffReport.view { "snpEff report:\n\ File : ${it.fileName}" } @@ -197,7 +166,7 @@ if (verbose) snpeffReport = snpeffReport.view { process RunVEP { tag {vcf} - publishDir "${params.outDir}/${directoryMap.vep}", mode: 'copy' + publishDir directoryMap.vep, mode: 'link' input: set variantCaller, file(vcf) from vcfForVep @@ -226,7 +195,7 @@ process RunVEP { """ } -if (verbose) vepReport = vepReport.view { +if (params.verbose) vepReport = vepReport.view { "VEP report:\n\ Files : ${it.fileName}" } @@ -237,126 +206,6 @@ if (verbose) vepReport = vepReport.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - -def checkParameterExistence(it, list) { - // Check parameter existence - if (!list.contains(it)) { - println("Unknown parameter: ${it}") - return false - } - return true -} - -def checkParameterList(list, realList) { - // Loop through all parameters to check their existence and spelling - return list.every{ checkParameterExistence(it, realList) } -} - -def checkParamReturnFile(item) { - params."${item}" = params.genomes[params.genome]."${item}" - return file(params."${item}") -} - -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - -def checkReferenceMap(referenceMap) { - // Loop through all the references files to check their existence - referenceMap.every { - referenceFile, fileToCheck -> - checkRefExistence(referenceFile, fileToCheck) - } -} - -def checkRefExistence(referenceFile, fileToCheck) { - if (fileToCheck instanceof List) return fileToCheck.every{ checkRefExistence(referenceFile, it) } - def f = file(fileToCheck) - // this is an expanded wildcard: we can assume all files exist - if (f instanceof List && f.size() > 0) return true - else if (!f.exists()) { - log.info "Missing references: ${referenceFile} ${fileToCheck}" - return false - } - return true -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) @@ -364,15 +213,9 @@ def checkUppmaxProject() { def defineDirectoryMap() { return [ - 'bcftoolsStats' : 'Reports/BCFToolsStats', - 'snpeff' : 'Annotation/SnpEff', - 'vep' : 'Annotation/VEP' - ] -} - -def defineStepList() { - return [ - 'annotate' + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'snpeff' : "${params.outDir}/Annotation/SnpEff", + 'vep' : "${params.outDir}/Annotation/VEP" ] } @@ -433,20 +276,6 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --version" - log.info " displays version number" -} - -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test } def minimalInformationMessage() { @@ -457,10 +286,8 @@ def minimalInformationMessage() { log.info "Launch Dir : " + workflow.launchDir log.info "Work Dir : " + workflow.workDir log.info "Out Dir : " + params.outDir - if (step != 'annotate') log.info "TSV file : ${tsvFile}" log.info "Genome : " + params.genome log.info "Genome_base : " + params.genome_base - log.info "Step : " + step if (tools) log.info "Tools : " + tools.join(', ') if (annotateTools) log.info "Annotate on : " + annotateTools.join(', ') if (annotateVCF) log.info "VCF files : " +annotateVCF.join(',\n ') @@ -477,19 +304,17 @@ def nextflowMessage() { log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}" } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def versionMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/buildContainers.nf b/buildContainers.nf index 23d0bc17c4..380e6025e8 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -7,7 +7,7 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ = S A R E K = ================================================================================ -New Germline (+ Somatic) Analysis Workflow. Started March 2016. + New Germline (+ Somatic) Analysis Workflow. Started March 2016. -------------------------------------------------------------------------------- @Authors Sebastian DiLorenzo [@Sebastian-D] @@ -37,45 +37,24 @@ New Germline (+ Somatic) Analysis Workflow. Started March 2016. ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// containerPath is current Directory -params.containerPath = "${baseDir}" -// all containers to be build -params.containers = 'all' -// Docker will not be used -params.docker = false -// Containers will not be pushed on DockerHub -params.push = false -// DockerHub repository is maxulysse -// TODO Change to a SciLifeLab repository -params.repository = 'maxulysse' -// Singularity will not be used -params.singularity = false - // Define containers to handle (build/push or pull) containersList = defineContainersList() containers = params.containers.split(',').collect {it.trim()} @@ -84,12 +63,6 @@ containers = containers == ['all'] ? containersList : containers // push only to DockerHub, so only when using Docker push = params.docker && params.push ? true : false -// by default the tag will be the current version -tag = params.tag ? params.tag : version - -// to simplify verbose mode -verbose = params.verbose - if (!params.docker && !params.singularity) exit 1, 'No container technology choosed, specify --docker or --singularity, see --help for more information' if (!checkContainers(containers,containersList)) exit 1, 'Unknown container(s), see --help for more information' @@ -106,7 +79,7 @@ dockerContainers = containers singularityContainers = containers process BuildDockerContainers { - tag {"${params.repository}/${container}:${tag}"} + tag {"${params.repository}/${container}:${params.tag}"} input: val container from dockerContainers @@ -118,16 +91,16 @@ process BuildDockerContainers { script: """ - docker build -t ${params.repository}/${container}:${tag} ${baseDir}/containers/${container}/. + docker build -t ${params.repository}/${container}:${params.tag} ${baseDir}/containers/${container}/. """ } -if (verbose) containersBuilt = containersBuilt.view { - "Docker container: ${params.repository}/${it}:${tag} built." +if (params.verbose) containersBuilt = containersBuilt.view { + "Docker container: ${params.repository}/${it}:${params.tag} built." } process PullSingularityContainers { - tag {"${params.repository}/${container}:${tag}"} + tag {"${params.repository}/${container}:${params.tag}"} publishDir "${params.containerPath}", mode: 'move' @@ -135,22 +108,22 @@ process PullSingularityContainers { val container from singularityContainers output: - file("${container}-${tag}.img") into imagePulled + file("${container}-${params.tag}.img") into imagePulled when: params.singularity script: """ - singularity pull --name ${container}-${tag}.img docker://${params.repository}/${container}:${tag} + singularity pull --name ${container}-${params.tag}.img docker://${params.repository}/${container}:${params.tag} """ } -if (verbose) imagePulled = imagePulled.view { +if (params.verbose) imagePulled = imagePulled.view { "Singularity image: ${it.fileName} pulled." } process PushDockerContainers { - tag {params.repository + "/" + container + ":" + tag} + tag {params.repository + "/" + container + ":" + params.tag} input: val container from containersBuilt @@ -162,12 +135,12 @@ process PushDockerContainers { script: """ - docker push ${params.repository}/${container}:${tag} + docker push ${params.repository}/${container}:${params.tag} """ } -if (verbose) containersPushed = containersPushed.view { - "Docker container: ${params.repository}/${it}:${tag} pushed." +if (params.verbose) containersPushed = containersPushed.view { + "Docker container: ${params.repository}/${it}:${params.tag} pushed." } /* @@ -176,11 +149,6 @@ if (verbose) containersPushed = containersPushed.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkContainerExistence(container, list) { try {assert list.contains(container)} catch (AssertionError ae) { @@ -199,63 +167,16 @@ def checkContainers(containers, containersList) { return containerExists ? true : false } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'docker', - 'genome_base', - 'genome', - 'genomes', - 'help', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-GVCF', - 'no-reports', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) } +def grabRevision() { + // Return the same string executed from github or not + return workflow.revision ?: workflow.commitId ?: workflow.scriptId.substring(0,10) +} + def defineContainersList(){ // Return list of authorized containers return [ @@ -278,11 +199,6 @@ def defineContainersList(){ ] } -def grabRevision() { - // Return the same string executed from github or not - return workflow.revision ?: workflow.commitId ?: workflow.scriptId.substring(0,10) -} - def helpMessage() { // Display help message this.sarekMessage() @@ -309,21 +225,7 @@ def helpMessage() { log.info " --containerPath: Select where to download images" log.info " Default: \$PWD" log.info " --tag`: Choose the tag for the containers" - log.info " Default (version number): " + version - log.info " --version" - log.info " displays version number and more informations" -} - -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test + log.info " Default (version number): " + params.version } def minimalInformationMessage() { @@ -343,19 +245,17 @@ def nextflowMessage() { log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}" } +def sarekMessage() { + // Display Sarek message + log.info "Sarek ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def versionMessage() { - // Display version message - log.info "Sarek - Workflow For Somatic And Germline Variations" - log.info " version : " + version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/buildReferences.nf b/buildReferences.nf index f525bb9a8a..4bb4b40465 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -7,7 +7,7 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ = S A R E K = ================================================================================ -New Germline (+ Somatic) Analysis Workflow. Started March 2016. + New Germline (+ Somatic) Analysis Workflow. Started March 2016. -------------------------------------------------------------------------------- @Authors Sebastian DiLorenzo [@Sebastian-D] @@ -40,71 +40,32 @@ New Germline (+ Somatic) Analysis Workflow. Started March 2016. ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// No download of reference source files -params.download = false -// outDir is References/${params.genome} -params.outDir = "${baseDir}/References/${params.genome}" -// refDir is empty -params.refDir = '' - -verbose = params.verbose -download = params.download ? true : false - -if (!download && params.refDir == "" ) exit 1, "No --refDir specified" -if (download && params.refDir != "" ) exit 1, "No need to specify --refDir" - -if (params.genome == "smallGRCh37") { - referencesFiles = - [ - '1000G_phase1.indels.b37.small.vcf.gz', - '1000G_phase3_20130502_SNP_maf0.3.small.loci', - 'b37_cosmic_v74.noCHR.sort.4.1.small.vcf.gz', - 'dbsnp_138.b37.small.vcf.gz', - 'human_g1k_v37_decoy.small.fasta.gz', - 'Mills_and_1000G_gold_standard.indels.b37.small.vcf.gz', - 'small.intervals' - ] -} else if (params.genome == "GRCh37") { - referencesFiles = - [ - '1000G_phase1.indels.b37.vcf.gz', - '1000G_phase3_20130502_SNP_maf0.3.loci.tar.bz2', - 'GRCh37_Cosmic_v83.vcf.tar.bz2', - 'dbsnp_138.b37.vcf.gz', - 'human_g1k_v37_decoy.fasta.gz', - 'Mills_and_1000G_gold_standard.indels.b37.vcf.gz', - 'wgs_calling_regions.grch37.list' - ] -} else exit 1, "Can't build this reference genome" +if (!params.download && params.refDir == "" ) exit 1, "No --refDir specified" +if (params.download && params.refDir != "" ) exit 1, "No need to specify --refDir" + +ch_referencesFiles = defReferencesFiles(params.genome) -if (download && params.genome != "smallGRCh37") exit 1, "Not possible to download ${params.genome} references files" +if (params.download && params.genome != "smallGRCh37") exit 1, "Not possible to download ${params.genome} references files" -if (!download) referencesFiles.each{checkFile(params.refDir + "/" + it)} +if (!params.download) ch_referencesFiles.each{checkFile(params.refDir + "/" + it)} /* ================================================================================ @@ -115,176 +76,171 @@ if (!download) referencesFiles.each{checkFile(params.refDir + "/" + it)} startMessage() process ProcessReference { - tag download ? {"Download: " + reference} : {"Link: " + reference} + tag params.download ? {"Download: " + f_reference} : {"Link: " + f_reference} input: - val(reference) from referencesFiles + val(f_reference) from ch_referencesFiles output: - file(reference) into processedFiles + file(f_reference) into ch_processedFiles script: - if (download) + if (params.download) """ - wget https://github.com/szilvajuhos/smallRef/raw/master/${reference} + wget https://github.com/szilvajuhos/smallRef/raw/master/${f_reference} """ else """ - ln -s ${params.refDir}/${reference} . + ln -s ${params.refDir}/${f_reference} . """ } -if (verbose) processedFiles = processedFiles.view { +if (params.verbose) ch_processedFiles = ch_processedFiles.view { "Files preprocessed : ${it.fileName}" } -compressedfiles = Channel.create() -notCompressedfiles = Channel.create() +ch_compressedfiles = Channel.create() +ch_notCompressedfiles = Channel.create() -processedFiles - .choice(compressedfiles, notCompressedfiles) {it =~ ".(gz|tar.bz2)" ? 0 : 1} +ch_processedFiles + .choice(ch_compressedfiles, ch_notCompressedfiles) {it =~ ".(gz|tar.bz2)" ? 0 : 1} process DecompressFile { - tag {reference} + tag {f_reference} input: - file(reference) from compressedfiles + file(f_reference) from ch_compressedfiles output: - file("*.{vcf,fasta,loci}") into decompressedFiles + file("*.{vcf,fasta,loci}") into ch_decompressedFiles script: - realReference="readlink ${reference}" - if (reference =~ ".gz") + realReferenceFile="readlink ${f_reference}" + if (f_reference =~ ".gz") """ - gzip -d -c \$(${realReference}) > ${reference.baseName} + gzip -d -c \$(${realReferenceFile}) > ${f_reference.baseName} """ - else if (reference =~ ".tar.bz2") + else if (f_reference =~ ".tar.bz2") """ - tar xvjf \$(${realReference}) + tar xvjf \$(${realReferenceFile}) """ } -if (verbose) decompressedFiles = decompressedFiles.view { +if (params.verbose) ch_decompressedFiles = ch_decompressedFiles.view { "Files decomprecessed: ${it.fileName}" } -fastaFile = Channel.create() -otherFiles = Channel.create() -vcfFiles = Channel.create() +ch_fastaFile = Channel.create() +ch_otherFiles = Channel.create() +ch_vcfFiles = Channel.create() -decompressedFiles - .choice(fastaFile, vcfFiles, otherFiles) { +ch_decompressedFiles + .choice(ch_fastaFile, ch_vcfFiles, ch_otherFiles) { it =~ ".fasta" ? 0 : it =~ ".vcf" ? 1 : 2} -notCompressedfiles - .mix(otherFiles) +(ch_fastaFile, ch_fastaFileToKeep) = ch_fastaFile.into(2) +(ch_vcfFiles, ch_vcfFilesToKeep) = ch_vcfFiles.into(2) + +ch_notCompressedfiles + .mix(ch_otherFiles, ch_fastaFileToKeep, ch_vcfFilesToKeep) .collectFile(storeDir: params.outDir) -fastaForBWA = Channel.create() -fastaForPicard = Channel.create() -fastaForSAMTools = Channel.create() +ch_fastaForBWA = Channel.create() +ch_fastaForPicard = Channel.create() +ch_fastaForSAMTools = Channel.create() -fastaFile.into(fastaForBWA,fastaForPicard,fastaForSAMTools) +ch_fastaFile.into(ch_fastaForBWA,ch_fastaForPicard,ch_fastaForSAMTools) process BuildBWAindexes { - tag {reference} + tag {f_reference} - publishDir params.outDir, mode: 'copy' + publishDir params.outDir, mode: 'link' input: - file(reference) from fastaForBWA + file(f_reference) from ch_fastaForBWA output: - file(reference) into fastaFileToKeep file("*.{amb,ann,bwt,pac,sa}") into bwaIndexes script: """ - bwa index ${reference} + bwa index ${f_reference} """ } -if (verbose) fastaFileToKeep.view { - "Fasta File : ${it.fileName}" -} -if (verbose) bwaIndexes.flatten().view { +if (params.verbose) bwaIndexes.flatten().view { "BWA index : ${it.fileName}" } process BuildPicardIndex { - tag {reference} + tag {f_reference} - publishDir params.outDir, mode: 'copy' + publishDir params.outDir, mode: 'link' input: - file(reference) from fastaForPicard + file(f_reference) from ch_fastaForPicard output: - file("*.dict") into picardIndex + file("*.dict") into ch_picardIndex script: """ java -Xmx${task.memory.toGiga()}g \ -jar \$PICARD_HOME/picard.jar \ CreateSequenceDictionary \ - REFERENCE=${reference} \ - OUTPUT=${reference.baseName}.dict + REFERENCE=${f_reference} \ + OUTPUT=${f_reference.baseName}.dict """ } -if (verbose) picardIndex.view { +if (params.verbose) ch_picardIndex.view { "Picard index : ${it.fileName}" } process BuildSAMToolsIndex { - tag {reference} + tag {f_reference} - publishDir params.outDir, mode: 'copy' + publishDir params.outDir, mode: 'link' input: - file(reference) from fastaForSAMTools + file(f_reference) from ch_fastaForSAMTools output: - file("*.fai") into samtoolsIndex + file("*.fai") into ch_samtoolsIndex script: """ - samtools faidx ${reference} + samtools faidx ${f_reference} """ } -if (verbose) samtoolsIndex.view { +if (params.verbose) ch_samtoolsIndex.view { "SAMTools index : ${it.fileName}" } process BuildVCFIndex { - tag {reference} + tag {f_reference} - publishDir params.outDir, mode: 'copy' + publishDir params.outDir, mode: 'link' input: - file(reference) from vcfFiles + file(f_reference) from ch_vcfFiles output: - file(reference) into vcfIndexed - file("*.idx") into vcfIndex + file("${f_reference}.idx") into ch_vcfIndex script: """ - \$IGVTOOLS_HOME/igvtools index ${reference} + \$IGVTOOLS_HOME/igvtools index ${f_reference} """ } -if (verbose) vcfIndexed.view { - "VCF indexed : ${it.fileName}" -} -if (verbose) vcfIndex.view { +if (params.verbose) ch_vcfIndex.view { "VCF index : ${it.fileName}" } @@ -294,11 +250,6 @@ if (verbose) vcfIndex.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek - Workflow To Find Somatic And Germline Variations ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkFile(it) { // Check file existence final f = file(it) @@ -306,68 +257,35 @@ def checkFile(it) { return true } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'docker', - 'download', - 'genome_base', - 'genome', - 'genomes', - 'help', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-GVCF', - 'no-reports', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'ref-dir', - 'refDir', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) } +def defReferencesFiles(genome) { + if (genome == "smallGRCh37") { + return [ + '1000G_phase1.indels.b37.small.vcf.gz', + '1000G_phase3_20130502_SNP_maf0.3.small.loci', + 'b37_cosmic_v74.noCHR.sort.4.1.small.vcf.gz', + 'dbsnp_138.b37.small.vcf.gz', + 'human_g1k_v37_decoy.small.fasta.gz', + 'Mills_and_1000G_gold_standard.indels.b37.small.vcf.gz', + 'small.intervals' + ] + } else if (genome == "GRCh37") { + return [ + '1000G_phase1.indels.b37.vcf.gz', + '1000G_phase3_20130502_SNP_maf0.3.loci.tar.bz2', + 'GRCh37_Cosmic_v83.vcf.tar.bz2', + 'dbsnp_138.b37.vcf.gz', + 'human_g1k_v37_decoy.fasta.gz', + 'Mills_and_1000G_gold_standard.indels.b37.vcf.gz', + 'wgs_calling_regions.grch37.list' + ] + } else exit 1, "Can't build this reference genome" +} + def grabRevision() { // Return the same string executed from github or not return workflow.revision ?: workflow.commitId ?: workflow.scriptId.substring(0,10) @@ -394,20 +312,6 @@ def helpMessage() { log.info " smallGRCh37" log.info " --help" log.info " you're reading it" - log.info " --version" - log.info " displays version number" -} - -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test } def minimalInformationMessage() { @@ -418,8 +322,8 @@ def minimalInformationMessage() { log.info "Work Dir : " + workflow.workDir log.info "Out Dir : " + params.outDir log.info "Genome : " + params.genome - log.info "Containers :" - if (params.repository) log.info " Repository : ${params.repository}" + log.info "Containers" + if (params.repository) log.info " Repository :" + params.repository else log.info " ContainerPath: " + params.containerPath log.info " Tag : " + params.tag } @@ -429,19 +333,17 @@ def nextflowMessage() { log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}" } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def versionMessage() { - // Display version message - log.info "Sarek - Workflow For Somatic And Germline Variations" - log.info " version : " + version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/configuration/base.config b/configuration/base.config index 9ec7de20b0..af94a4545d 100644 --- a/configuration/base.config +++ b/configuration/base.config @@ -10,17 +10,43 @@ vim: syntax=groovy includeConfig 'genomes.config' wf_repository = 'maxulysse' -wf_tag = 'latest' params { - genome = 'GRCh38' - help = false - outDir = '.' - project = '' - repository = wf_repository - tag = wf_tag - verbose = false - version = false + // set up default params + annotateTools = '' // Tools to annotate by annotate.nf + annotateVCF = '' // Files to annotate by annotate.nf + containerPath = '.' // Path to Singularity images + containers = '.' // List of containers to build in buildContainers.nf + docker = false // Don't use Docker to build buildContainers.nf + download = false // Don't download reference files in buildReferences.nf + explicitBqsrNeeded = true // Enable recalibration in main.nf + genome = 'GRCh38' // Default reference genome is GRCh38 + genome_base = '' // Path to the reference files + help = false // Don't give help information + max_cpus = 16 // Base specifications + max_memory = 128.GB // Base specifications + max_time = 240.h // Base specifications + more = false // Don't give version information + nfRequiredVersion = '0.25.0' // Minimum version of nextflow required + noBAMQC = false // Use BAMQC + noGVCF = false // HaplotypeCaller will output gVCF as well + noReports = false // Reports are made by default + nucleotidesPerSecond = 1000.0 // To estimate interval size by default + onlyQC = false // All process will be run and not only the QC tools + outDir = "${PWD}" // Path to output directory + project = '' // UPPMAX project number + push = false // Don't push container to DockerHub + refDir = '' // Path to the references to build + repository = wf_repository // DockerHub containers repository + sample = '' // sample files in tsv format + sampleDir = '' // samples directory (for Germline only) + singularity = false // Don't use singularity to build buildContainers.nf + step = 'mapping' // Default step is mapping + tag = 'latest' // Default tag is latest, to be overwritten by --tag + test = false // Not testing by default + tools = '' // List of tools to use + verbose = false // Enable for more verbose information + version = '2.0.0' // Workflow version } process { diff --git a/configuration/docker.config b/configuration/docker.config index fc1de91f7e..98a8b79578 100644 --- a/configuration/docker.config +++ b/configuration/docker.config @@ -12,4 +12,5 @@ vim: syntax=groovy docker { enabled = true fixOwnership = true + runOptions = "-u \$(id -u):\$(id -g)" } diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config index 2ec1fba63d..289da406d0 100644 --- a/configuration/singularity-path.config +++ b/configuration/singularity-path.config @@ -12,11 +12,6 @@ vim: syntax=groovy singularity { enabled = true - runOptions = "--bind /scratch" -} - -params { - containerPath='containers' } process { diff --git a/configuration/travis.config b/configuration/travis.config index ef7666037f..dded8464e1 100644 --- a/configuration/travis.config +++ b/configuration/travis.config @@ -11,10 +11,13 @@ vim: syntax=groovy params { genome = 'smallGRCh37' - genome_base = params.genome == 'GRCh37' ? '/sw/data/uppnex/ToolBox/ReferenceAssemblies/hg38make/bundle/2.8/b37' : params.genome == 'GRCh38' ? '/sw/data/uppnex/ToolBox/hg38bundle' : 'References/smallGRCh37' + genome_base = 'References/smallGRCh37' + max_cpus = 2 + max_memory = 7.GB + max_time = 1.h } process { - cpus = 2 - memory = 7.GB + cpus = params.max_cpus + memory = params.max_memory } diff --git a/configuration/uppmax-slurm.config b/configuration/uppmax-slurm.config index 1ac1af0c31..d80d400abe 100644 --- a/configuration/uppmax-slurm.config +++ b/configuration/uppmax-slurm.config @@ -20,6 +20,7 @@ process { executor = 'slurm' memory = 110.GB queue = 'node' + scratch = true time = 48.h errorStrategy = {task.exitStatus == 143 ? 'retry' : 'terminate'} diff --git a/containers/fastqc/Dockerfile b/containers/fastqc/Dockerfile index 567fba22ef..85237cb0d1 100644 --- a/containers/fastqc/Dockerfile +++ b/containers/fastqc/Dockerfile @@ -22,6 +22,3 @@ RUN \ && chmod 755 /opt/FastQC/fastqc \ && ln -s /opt/FastQC/fastqc /usr/local/bin/fastqc \ && rm fastqc_v${FASTQC_VERSION}.zip - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/freebayes/Dockerfile b/containers/freebayes/Dockerfile index f93335064d..efacdde8ee 100644 --- a/containers/freebayes/Dockerfile +++ b/containers/freebayes/Dockerfile @@ -28,6 +28,3 @@ RUN \ && make install \ && cd .. \ && rm -rf freebayes - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/gatk/Dockerfile b/containers/gatk/Dockerfile index a13300bf9a..ae8ad2ed0c 100644 --- a/containers/gatk/Dockerfile +++ b/containers/gatk/Dockerfile @@ -6,6 +6,3 @@ LABEL \ maintainer="maxime.garcia@scilifelab.se" ENV GATK_HOME=/usr - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/igvtools/Dockerfile b/containers/igvtools/Dockerfile index fb96d256fd..1629b64cf0 100644 --- a/containers/igvtools/Dockerfile +++ b/containers/igvtools/Dockerfile @@ -23,6 +23,3 @@ RUN \ && unzip igvtools_${IGVTOOLS_VERSION}.zip \ && rm igvtools_${IGVTOOLS_VERSION}.zip \ && mv IGVTools $IGVTOOLS_HOME - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/multiqc/Dockerfile b/containers/multiqc/Dockerfile index 0411767998..1e7325cf42 100644 --- a/containers/multiqc/Dockerfile +++ b/containers/multiqc/Dockerfile @@ -1,9 +1,6 @@ -FROM ewels/multiqc:v1.4 +FROM ewels/multiqc:v1.5 LABEL \ author="Maxime Garcia" \ description="MultiQC image used in Sarek" \ maintainer="maxime.garcia@scilifelab.se" - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/mutect1/Dockerfile b/containers/mutect1/Dockerfile index 88f38bbf9c..7068fb963c 100644 --- a/containers/mutect1/Dockerfile +++ b/containers/mutect1/Dockerfile @@ -23,6 +23,3 @@ RUN \ && unzip muTect-${MUTECT_VERSION}-bin.zip -d ${MUTECT_HOME} \ && rm muTect-${MUTECT_VERSION}-bin.zip \ && mv ${MUTECT_HOME}/muTect-${MUTECT_VERSION}.jar ${MUTECT_HOME}/muTect.jar - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/picard/Dockerfile b/containers/picard/Dockerfile index 5deafe3d6a..8a558102d5 100644 --- a/containers/picard/Dockerfile +++ b/containers/picard/Dockerfile @@ -23,6 +23,3 @@ RUN \ && unzip picard-tools-${PICARD_VERSION}.zip \ && mv picard-tools-${PICARD_VERSION} ${PICARD_HOME} \ && rm picard-tools-${PICARD_VERSION}.zip - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/qualimap/Dockerfile b/containers/qualimap/Dockerfile index c4be0f8249..4702b84b73 100644 --- a/containers/qualimap/Dockerfile +++ b/containers/qualimap/Dockerfile @@ -23,6 +23,3 @@ RUN \ && unzip qualimap_v${QUALIMAP_VERSION}.zip -d /opt/ \ && rm qualimap_v${QUALIMAP_VERSION}.zip \ && mv /opt/qualimap_v${QUALIMAP_VERSION} /opt/qualimap - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/r-base/Dockerfile b/containers/r-base/Dockerfile index a2263230c3..2f38953fa6 100644 --- a/containers/r-base/Dockerfile +++ b/containers/r-base/Dockerfile @@ -7,6 +7,3 @@ maintainer="maxime.garcia@scilifelab.se" # Install libraries RUN echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile \ && Rscript -e "install.packages('RColorBrewer')" - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/runallelecount/Dockerfile b/containers/runallelecount/Dockerfile index 5b7bf6c1b8..ac4b26494a 100644 --- a/containers/runallelecount/Dockerfile +++ b/containers/runallelecount/Dockerfile @@ -32,6 +32,3 @@ RUN \ && cd /opt/alleleCount-${ALLELECOUNT_VERSION} \ && ./setup.sh /opt/ \ && rm /opt/v${ALLELECOUNT_VERSION}.tar.gz - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/sarek/build.sh b/containers/sarek/build.sh index 67aa36275d..fbe57b5aee 100755 --- a/containers/sarek/build.sh +++ b/containers/sarek/build.sh @@ -101,6 +101,3 @@ apt-get remove -y \ zlib1g-dev apt-get clean rm -rf /build /var/lib/apt/lists/* /opt/get-pip.py - -# Create UPPMAX directories -mkdir /pica /proj /scratch /sw diff --git a/containers/snpeff/Dockerfile b/containers/snpeff/Dockerfile index 4181e4bf69..bcd7e8f5c5 100644 --- a/containers/snpeff/Dockerfile +++ b/containers/snpeff/Dockerfile @@ -27,6 +27,3 @@ RUN \ http://downloads.sourceforge.net/project/snpeff/snpEff_v${SNPEFF_VERSION}_core.zip \ && unzip snpEff_v${SNPEFF_VERSION}_core.zip -d /opt/ \ && rm snpEff_v${SNPEFF_VERSION}_core.zip - -# Create UPPMAX directories -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/vepgrch37/Dockerfile b/containers/vepgrch37/Dockerfile index af8108b89c..42e4cdacb9 100644 --- a/containers/vepgrch37/Dockerfile +++ b/containers/vepgrch37/Dockerfile @@ -1,4 +1,4 @@ -FROM willmclaren/ensembl-vep:release_90.6 +FROM willmclaren/ensembl-vep:release_92 LABEL \ author="Maxime Garcia" \ @@ -8,7 +8,7 @@ LABEL \ # Setup ENV variables ENV \ GENOME=GRCh37 \ - VEP_VERSION=90 + VEP_VERSION=91 # Download Genome RUN \ @@ -18,7 +18,3 @@ RUN \ ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && tar xzf homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && rm homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz - -# Create UPPMAX directories -USER root -RUN mkdir /pica /proj /scratch /sw diff --git a/containers/vepgrch38/Dockerfile b/containers/vepgrch38/Dockerfile index 03334d11c8..04185c8f13 100644 --- a/containers/vepgrch38/Dockerfile +++ b/containers/vepgrch38/Dockerfile @@ -1,4 +1,4 @@ -FROM willmclaren/ensembl-vep:release_90.6 +FROM willmclaren/ensembl-vep:release_92 LABEL \ author="Maxime Garcia" \ @@ -8,7 +8,7 @@ LABEL \ # Setup ENV variables ENV \ GENOME=GRCh38 \ - VEP_VERSION=90 + VEP_VERSION=91 # Download Genome RUN \ @@ -18,7 +18,3 @@ RUN \ ftp://ftp.ensembl.org/pub/release-${VEP_VERSION}/variation/VEP/homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && tar xzf homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz \ && rm homo_sapiens_vep_${VEP_VERSION}_${GENOME}.tar.gz - -# Create UPPMAX directories -USER root -RUN mkdir /pica /proj /scratch /sw diff --git a/doc/CONFIG.md b/doc/CONFIG.md index 5e570e1694..368324d1c4 100644 --- a/doc/CONFIG.md +++ b/doc/CONFIG.md @@ -10,46 +10,44 @@ We provides several configuration files and profiles for Sarek. The standard one Every configuration file can be modified for your own use. If you want you can specify the use of a config file using `-c ` -### [`containers.config`](../configuration/containers.config) +### [`containers.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/containers.config) -Contain images for all process. +Define Containers for all process. Images will be pulled automatically. Use in your own profile if needed. -### [`docker.config`](../configuration/docker.config) +### [`docker.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/docker.config) -Contain Docker images for all process. +Define Docker Containers for all process. Images will be pulled automatically. Use in your own profile if needed. -### [`genomes.config`](../configuration/genomes.config) +### [`genomes.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/genomes.config) Contain path to all references. Modify it if you want to change genome version, or the path to your references files. -### [`singularity-path.config`](../configuration/singularity-path.config) +### [`singularity-path.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/singularity-path.config) -To be used when downloading singularity containers, like on a secure UPPMAX cluster. +Define path to Singularity Containers for all process. +To be used when downloading Singularity Containers, like on a secure UPPMAX cluster. Images will not be pulled automatically. You need to set them up before. -### [`singularity.config`](../configuration/singularity.config) +### [`singularity.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/singularity.config) -Contain Singularity images for all process. +Define Singularity Containers for all process. Images will be pulled automatically. Use in your own profile if needed. -### [`travis.config`](../configuration/travis.config) +### [`travis.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/travis.config) To be used for Travis (2 cpus) or on small computer for testing purpose -### [`uppmax-localhost.config`](../configuration/uppmax-localhost.config) - -To be used on a typical localhost on a UPPMAX cluster (16 cpus) - -### [`uppmax-slurm.config`](../configuration/uppmax-slurm.config) +### [`uppmax-slurm.config`](https://github.com/SciLifeLab/Sarek/blob/master/configuration/uppmax-slurm.config) Slurm configuration for a UPPMAX cluster +Will run the workflow on `/scratch` using the Nextflow [`scratch`](https://www.nextflow.io/docs/latest/process.html#scratch) directive ## profiles @@ -65,19 +63,16 @@ Docker images will be pulled automatically. This is the default profile for use on a localhost on a UPPMAX cluster with Singularity. Singularity images need to be set up. -### `download` - -This is the default profile for use on a localhost on a UPPMAX cluster with Singularity. -Singularity images will be pulled automatically. - ### `slurm` This is another profile for use on a UPPMAX cluster using the job scheduler slurm with Singularity. +Will run the workflow on `/scratch`. Singularity images need to be set up. ### `slurmDownload` This is another profile for use on a UPPMAX cluster using the job scheduler slurm with Singularity. +Will run the workflow on `/scratch`. Singularity images will be pulled automatically. ### `singularity` diff --git a/doc/INSTALL.md b/doc/INSTALL.md index ed0954438b..06fca77f50 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -55,10 +55,10 @@ The following tutorial explain how to run Sarek on a small dataset using a small > nextflow run SciLifeLab/Sarek/buildReferences.nf --download --genome smallGRCh37 -profile docker # Test Sarek on a test tiny set using Singularity -> nextflow run SciLifeLab/Sarek --test --genome smallGRCh37 --noReports -profile singularity +> nextflow run SciLifeLab/Sarek/main.nf --test --genome smallGRCh37 --noReports -profile singularity # Or test Sarek on a test tiny set using Docker -> nextflow run SciLifeLab/Sarek --test --genome smallGRCh37 --noReports -profile docker +> nextflow run SciLifeLab/Sarek/main.nf --test --genome smallGRCh37 --noReports -profile docker ``` ## Update diff --git a/doc/INSTALL_BIANCA.md b/doc/INSTALL_BIANCA.md index 5e67c384e1..c69e44f953 100644 --- a/doc/INSTALL_BIANCA.md +++ b/doc/INSTALL_BIANCA.md @@ -9,7 +9,7 @@ Sarek use Singularity containers to package all the different tools. As `bianca` is secure, no direct download is available, so Sarek and the Singularity containers will have to be installed and updated manually. -You can either download Sarek and the containers on your computer or on `rackham`, make an archive, and send it to `bianca` using `FileZilla` or `sftp` given your preferences. +You can either download Sarek and the containers on your computer (you will need Nextflow and Singularity for that) or on `rackham`, make an archive, and send it to `bianca` using `FileZilla` or `sftp` given your preferences. All Reference files are already stored in `bianca`. @@ -38,7 +38,7 @@ Wrote Sarek-[snapID].tar.gz # To get the containers # This script will need Singularity and Nextflow installed -> ./scripts/do_all.sh --pull +> ./scripts/do_all.sh --pull --tag # Send the containers to bianca using the same method # They will be in the containers/ directory as .img files @@ -81,7 +81,7 @@ The principle is to have every member of your project to be able to use the same And then Sarek can be used with: ```bash -> nextflow run ~/Sarek/main.nf ... +> nextflow run ~/Sarek/main.nf -profile slurm --project [PROJECT] ... ``` ## Update Sarek diff --git a/doc/INSTALL_RACKHAM.md b/doc/INSTALL_RACKHAM.md index 9d73715db6..360fe8e6fe 100644 --- a/doc/INSTALL_RACKHAM.md +++ b/doc/INSTALL_RACKHAM.md @@ -66,14 +66,14 @@ For more information, follow the [reference files documentation](REFERENCES.md). ## Use Sarek with slurm -To use Sarek on rackham you will need to use the `slurm` profile. +To use Sarek on rackham you will need to use the `slurmDownload` profile. ```bash # Connect to rackham > ssh -AX [USER]@rackham.uppmax.uu.se # Run the workflow directly on the login node -> nextflow run SciLifeLab/Sarek --sample [FILE.TSV] --genome [GENOME] --project [PROJECT] -profile slurm-download +> nextflow run SciLifeLab/Sarek/main.nf --project [PROJECT] -profile slurmDownload ``` -------------------------------------------------------------------------------- diff --git a/doc/PROCESS.md b/doc/PROCESS.md index e46b9d6808..2a20c0562d 100644 --- a/doc/PROCESS.md +++ b/doc/PROCESS.md @@ -1,6 +1,7 @@ # Workflow processes -Several processes are run within the workflow. We divide them for the moment into 5 main steps: +Several processes are run within the workflow. +We divide them for the moment into 5 main steps: ## Preprocessing: @@ -40,7 +41,6 @@ Several processes are run within the workflow. We divide them for the moment int - RunFastQC - Run FastQC for QC on fastq files - RunSamtoolsStats - Run Samtools stats on recalibrated BAM files - RunBamQC - Run qualimap BamQC on recalibrated BAM files -- RunBcftoolsStats - Run BCFTools stats on vcf before annotation - RunBcftoolsStats - Run BCFTools stats on vcf files ## Annotation: diff --git a/doc/REFERENCES.md b/doc/REFERENCES.md index 007d30dd48..9ebe6281e6 100644 --- a/doc/REFERENCES.md +++ b/doc/REFERENCES.md @@ -1,6 +1,6 @@ # Genomes and reference files -CAW currently uses GRCh38 by default. The settings are in `genomes.config`, they can be tailored to your needs. The [`buildReferences.nf`](#buildreferencesnf) script can be use to build the indexes based on the reference files. +Sarek currently uses GRCh38 by default. The settings are in `genomes.config`, they can be tailored to your needs. The [`buildReferences.nf`](#buildreferencesnf) script can be use to build the indexes based on the reference files. ## GRCh37 @@ -21,18 +21,13 @@ The following files need to be downloaded: From our repo, get the [`intervals` list file](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/repeats/wgs_calling_regions.grch37.list). More information about this file in the [intervals documentation](INTERVALS.md) -The rest of the references files are stored in in [export.uppmax.uu.se](https://export.uppmax.uu.se/b2015110/caw-references/b37/) and also on the repository [CAW-References](https://github.com/MaxUlysse/CAW-References) using [GIT-LFS](https://git-lfs.github.com/): - -- '1000G\_phase3\_20130502\_SNP\_maf0.3.loci' -- 'b37\_cosmic\_v74.noCHR.sort.4.1.vcf' - You can create your own cosmic reference for any human reference as specified below. ### COSMIC files -To annotate with COSMIC variants during MuTect1/2 Variant Calling you need to create a compatible VCF file. -Download the coding and non-coding VCF files from [COSMIC](http://cancer.sanger.ac.uk/cosmic/download) and -process them with the [Create\_Cosmic.sh](https://github.com/SciLifeLab/Sarek/tree/master/scripts/Create_Cosmic.sh) +To annotate with COSMIC variants during MuTect1/2 Variant Calling you need to create a compatible VCF file. +Download the coding and non-coding VCF files from [COSMIC](http://cancer.sanger.ac.uk/cosmic/download) and +process them with the [Create\_Cosmic.sh](https://github.com/SciLifeLab/Sarek/tree/master/scripts/Create_Cosmic.sh) script. The script requires a fasta index `.fai`, of the reference file you are using. Example: @@ -54,7 +49,7 @@ igvtools index Use `--genome GRCh38` to map against GRCh38. Before doing so and if you are not on UPPMAX, you need to adjust the settings in `genomes.config` to your needs. -To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](mailto:ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/). +To get the needed files, download the GATK bundle for GRCh38 from [ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/](ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/). The MD5SUM of `Homo_sapiens_assembly38.fasta` included in that file is 7ff134953dcca8c8997453bbb80b6b5e. @@ -93,10 +88,10 @@ nextflow run buildReferences.nf --refDir --genome ### `--genome` -Same parameter used for `main.nf` +Same parameter used for other scripts. - GRCh37 -- GRCh38 +- GRCh38 (not yet available) - smallGRCh37 -------------------------------------------------------------------------------- diff --git a/doc/TESTS.md b/doc/TESTS.md index fa8aaddb8a..4d030b603b 100644 --- a/doc/TESTS.md +++ b/doc/TESTS.md @@ -13,12 +13,15 @@ Four optional arguments are supported: - `-s` || `--sample`: Use to change the test sample (default=`data/tsv/tiny.tsv`) - `-t` || `--test`: - - `MAPPING`: will try preprocessing - - `REALIGN`: will try realignment - - `RECALIBRATE`: will try recalibration - - `ANNOTATESNPEFF`: will try variant calling and annotation using snpEff - - `ANNOTATEVEP`: will try variant calling and annotation using VEP - - `ALL`: will try all the previous tests (default) + - `DIR`: test `mapping` with an input directory, all other tests use a TSV file + - `STEP`: test `mapping`, `realign` and `recalibrate` + - `GERMLINE`: test `mapping` and Variant Calling with `HaplotypeCaller` + - `TOOLS`: test `mapping` and Variant Calling with `FreeBayes`, `HaplotypeCaller`, `MuTect1`, `MuTect2`, `Strelka` + - `MANTA`: test `mapping` and Variant Calling with `Manta` + - `ANNOTATESNPEFF`: test annotation using `snpEFF` + - `ANNOTATEVEP`: test annotation using `VEP` + - `BUILDCONTAINERS`: test building all containers except `snpeffgrch37`, `snpeffgrch38`, `vepgrch37` and `vepgrch38` + - `ALL`: test all the previous tests (default) ## Usage @@ -27,10 +30,10 @@ Four optional arguments are supported: ./scripts/test.sh # Will try all tests using Docker ./scripts/test.sh -p docker -# Will try MAPPING tests using Singularity -./scripts/test.sh -t MAPPING -# Will try MAPPING tests using Singularity with GRCh37 genome -./scripts/test.sh -t MAPPING -g GRCh37 +# Will try `STEP` tests using Singularity +./scripts/test.sh -t `STEP` +# Will try `STEP` tests using Singularity with GRCh37 genome +./scripts/test.sh -t `STEP` -g GRCh37 # Will try all tests using Singularity on manta test data ./scripts/test.sh -s data/tsv/tiny-manta.tsv ``` diff --git a/doc/TSV.md b/doc/TSV.md index 1d3f68c3c0..1140706248 100644 --- a/doc/TSV.md +++ b/doc/TSV.md @@ -12,6 +12,8 @@ It's a Tab Separated Value file, based on: `subject gender status sample lane fa - `bam` is the bam file - `bai` is the index +All examples are given for a normal/tumor pair. If no tumors are listed in the TSV file, then the workflow will proceed as if it was a single normal sample instead of a normal/tumor pair. + # Example TSV file for a normal/tumor pair with FASTQ files In this sample for the normal case there are 3 read groups, and 2 for the tumor. It is recommended to add the absolute path of the paired FASTQ files, but relative path should work also. Note, the delimiter is the tab (\t) character: @@ -36,7 +38,8 @@ G15511 XX 1 D0ENMT pathToFiles/G15511.D0ENMT.md.real.bam pathToFiles All the files will be created in the Preprocessing/NonRealigned/ directory, and by default a corresponding TSV file will also be deposited there. Generally, getting MuTect1 and Strelka calls on the preprocessed files should be done by: ```bash -nextflow run SciLifeLab/Sarek --sample Preprocessing/NonRealigned/mysample.tsv --step realign --tools Mutect2,Strelka +nextflow run SciLifeLab/Sarek/main.nf --sample Preprocessing/NonRealigned/mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/somaticVC.nf --sample Preprocessing/Recalibrated/mysample.tsv --tools Mutect2,Strelka ``` # Example TSV file for a normal/tumor pair with recalibrated BAM files @@ -51,7 +54,7 @@ G15511 XX 1 D0ENMT pathToFiles/G15511.D0ENMT.md.real.bam pathToFi All the files will be in he Preprocessing/Recalibrated/ directory, and by default a corresponding TSV file will also be deposited there. Generally, getting MuTect1 and Strelka calls on the recalibrated files should be done by: ```bash -nextflow run SciLifeLab/Sarek --sample Preprocessing/Recalibrated/mysample.tsv --step variantcalling --tool Mutect2,Strelka +nextflow run SciLifeLab/Sarek/somaticVC.nf --sample Preprocessing/Recalibrated/mysample.tsv --tools Mutect2,Strelka ``` -------------------------------------------------------------------------------- diff --git a/doc/USAGE.md b/doc/USAGE.md index 5fd9120ef7..5dea8d6dc8 100644 --- a/doc/USAGE.md +++ b/doc/USAGE.md @@ -1,9 +1,16 @@ # Usage -I would recommand to run Nextflow within a [screen](https://www.gnu.org/software/screen/) or [tmux](https://tmux.github.io/) session. It is recommanded to run only one instance of Sarek for one patient in the same directory. The typical reduced command line is: +I would recommand to run Nextflow within a [screen](https://www.gnu.org/software/screen/) or [tmux](https://tmux.github.io/) session. +It is recommended to run only one instance of Sarek for one patient in the same directory. +Sarek uses several scripts, a wrapper is currently being made to simplify the command lines. +Currently the typical reduced command lines are: ```bash -nextflow run SciLifeLab/Sarek --sample --step --tools +nextflow run SciLifeLab/Sarek/main.nf --sample --step +nextflow run SciLifeLab/Sarek/germlineVC.nf --sample --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --sample --tools +nextflow run SciLifeLab/Sarek/annotate.nf --tools (--annotateTools ||--annotateVCF ) +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` All parameters, options and variables can be specified with configuration files and profile (cf [configuration documentation](#profiles)). @@ -59,38 +66,34 @@ Test run Sarek on a smaller dataset, that way you don't have to specify `--sampl Choose which tools will be used in the workflow. Different tools to be separated by commas. Possible values are: -- ascat (use ascat for CNV) -- haplotypecaller (use HaplotypeCaller for VC) -- manta (use Manta for SV) -- mutect1 (use MuTect1 for VC) -- mutect2 (use MuTect2 for VC) -- strelka (use Strelka for VC) -- snpeff (use snpEff for Annotation) -- vep (use VEP for Annotation) +- haplotypecaller (use `HaplotypeCaller` for VC) (germlineVC) +- manta (use `Manta` for SV) (germlineVC,somaticVC) +- strelka (use `Strelka` for VC) (germlineVC,somaticVC) +- ascat (use `ASCAT` for CNV) (somaticVC) +- mutect1 (use `MuTect1` for VC) (somaticVC) +- mutect2 (use `MuTect2` for VC) (somaticVC) +- snpeff (use `snpEff` for Annotation) (annotate) +- vep (use `VEP` for Annotation) (annotate) -`--tools` option is case insensitive to avoid easy introduction of errors when choosing tools. So you can write `--tools mutect2,snpEff` or `--tools MuTect2,snpeff` without worrying about case sensitivity. +`--tools` option is case insensitive to avoid easy introduction of errors when choosing tools. So you can write `--tools mutect2,ascat` or `--tools MuTect2,ASCAT` without worrying about case sensitivity. ### --annotateTools `tool1[,tool2,tool3...]` Choose which tools to annotate. Different tools to be separated by commas. Possible values are: -- haplotypecaller (Annotate HaplotypeCaller output) -- manta (Annotate Manta output) -- mutect1 (Annotate MuTect1 output) -- mutect2 (Annotate MuTect2 output) -- strelka (Annotate Strelka output) +- haplotypecaller (Annotate `HaplotypeCaller` output) +- manta (Annotate `Manta` output) +- mutect1 (Annotate `MuTect1` output) +- mutect2 (Annotate `MuTect2` output) +- strelka (Annotate `Strelka` output) ### --annotateVCF `file1[,file2,file3...]` -Choose which vcf to annotate. Different vcf to be separated by commas. +Choose vcf to annotate. Different vcfs to be separated by commas. ### --verbose Display more information about files being processed. -### --version - -Display version number and information. - ## Containers ### --containerPath `Path to the singularity containers (default=containers/)` @@ -170,7 +173,7 @@ nextflow pull SciLifeLab/Sarek If there is a feature or bugfix you want to use in a resumed or re-analyzed run, you have to update the workflow to the latest version. By default it is not updated automatically, so use something like: ```bash -nextflow run -latest SciLifeLab/Sarek --sample mysample.tsv -resume +nextflow run -latest SciLifeLab/Sarek/main.nf ... -resume ``` -------------------------------------------------------------------------------- diff --git a/doc/USE_CASES.md b/doc/USE_CASES.md index 271bd7e5eb..8dabca0fbc 100644 --- a/doc/USE_CASES.md +++ b/doc/USE_CASES.md @@ -3,21 +3,27 @@ The workflow has three pre-processing options: `mapping`, `realign` and `recalibrate`. Using the `mapping` directive one will have a pair of mapped, deduplicated and recalibrated BAM files in the `Preprocessing/Recalibrated/` directory. Furthermore, during this process a deduplicated BAM file is created in the `Preprocessing/NonRealigned/` directory. This is the usual option you have to give when you are starting from raw FASTQ data: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --tools # For somatic only +nextflow run SciLifeLab/Sarek/annotate.nf --tool --annotateVCF myfile.vcf # For somatic only +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` `mapping` will start by default, you do not have to give any additional parameters, only the TSV file describing the sample (see below). In the [genomes.config](https://raw.githubusercontent.com/SciLifeLab/Sarek/master/configuration/genomes.config) configuration file we are defining the intervals file as well, this is used to define regions for variant call and realignment (in a scatter and gather fashion when possible). The intervals are chromosomes cut at their centromeres (so each chromosome arm processed separately) also additional unassigned contigs. We are ignoring the hs37d5 contig that contains concatenated decoy sequences. -During the execution of the workflow a `trace.txt`, a `timeline.html` and a `report.html` files are generated automatically. These files contain statistics about resources used and processes finished. If you start a new flow or restart/resume a sample, the previous version will be renamed as `trace.txt.1`, `timeline.html.1` and `report.html.1` respectively. Also, older version are renamed with incremented numbers. +During the execution of the workflow a `Sarek-trace.txt`, a `Sarek-timeline.html` and a `Sarek-report.html` files are generated automatically. These files contain statistics about resources used and processes finished. If you start a new workflow or restart/resume a sample, the previous version will be renamed as `Sarek-trace.txt.1`, `Sarek-timeline.html.1` and `Sarek-report.html.1` respectively. Also, older version are renamed with incremented numbers. ## Starting from raw FASTQ - pair of FASTQ files The workflow should be started in this case with the smallest set of options as written above: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` The TSV file should have at least one tab-separated lines: @@ -35,6 +41,23 @@ The columns are: 5. first set of reads 6. second set of reads +## Starting from raw FASTQ on a normal sample only (with `--sampleDir`) + +The `--sampleDir` option can be used to point Sarek to a directory with FASTQ files: +```bash +nextflow run SciLifeLab/Sarek/main.nf --sampleDir path/to/FASTQ/files +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/runMultiQC.nf +``` +The given directory is searched recursively for FASTQ files that are named `*_R1_*.fastq.gz`, and a matching pair with the same name except `_R2_` instead of `_R1_` is expected to exist alongside. All of the found FASTQ files are considered to belong to the sample. Each FASTQ file pair gets its own read group (`@RG`) in the resulting BAM file. + +### Metadata when using `--sampleDir` + +When using `--sampleDir`, the metadata about the sample that are written to the BAM header in the `@RG` tag are determined in the following way. + +- The sample name (`SM`) is derived from the the last component of the path given to `--sampleDir`. That is, you should make sure that that directory has a meaningful name! For example, with `--sampleDir=/my/fastqs/sample123`, the sample name will be `sample123`. +- The read group id is set to *flowcell.samplename.lane*. The flowcell id and lane number are auto-detected from the name of the first read in the FASTQ file. + ## Starting from raw FASTQ - having pair of FASTQ files for tumor/normal samples (one lane for each sample) The workflow command line is just the same as before, but the TSV contains extra lines. You can see the second column is used to distinguish normal and tumor samples. You can add as many relapse samples as many you have, providing their name in the third column is different. Each will be compared to the normal one-by-one. Obviously, if you do not have relapse samples, you can leave out this last line. @@ -73,7 +96,9 @@ SUBJECT_ID XX 1 SAMPLEIDR 9 /samples/relapse9_1.fastq.gz /sample NGI Production in the previous years delivered many preprocessed samples; these BAM files are not recalibrated. To have BAMs suitable for variant calling, realignement of pairs is necessary: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` And the corresponding TSV file should be like: @@ -89,7 +114,12 @@ At the end of this step you should have recalibrated BAM files in the `Preproces NGI Production in the previous years delivered many preprocessed samples; these BAM files are not recalibrated. To have BAMs suitable for variant calling, realignement of pairs is necessary: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv --step realign +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --tools +nextflow run SciLifeLab/Sarek/annotate.nf --tool --annotateVCF myfile.vcf +nextflow run SciLifeLab/Sarek/runMultiQC.nf + ``` And the corresponding TSV file should be like (obviously, if you do not have relapse samples, you can leave out this last line): @@ -107,7 +137,11 @@ At the end of this step you should have recalibrated BAM files in the `Preproces If the BAM files were realigned together, you can start from recalibration: ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv --step recalibrate +nextflow run SciLifeLab/Sarek/main.nf --sample mysample.tsv --step recalibrate +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --tools +nextflow run SciLifeLab/Sarek/annotate.nf --tool --annotateVCF myfile.vcf +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` And the corresponding TSV file should be like (obviously, if you do not have relapse samples, you can leave out this last line): @@ -123,7 +157,8 @@ SUBJECT_ID XX 1 SAMPLEIDR /samples/SAMPLEIDR.bam /samples/SAMPLEIDR At this step we are assuming that all the required preprocessing is over, we only want to run variant callers or other tools using recalibrated BAMs. ```bash -nextflow run SciLifeLab/Sarek/ --sample mysample.tsv --step variantcalling --tools +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` And the corresponding TSV file should be like: @@ -139,7 +174,10 @@ If you want to restart a previous run of the pipeline, you may not have a recali At this step we are assuming that all the required preprocessing is over, we only want to run variant callers or other tools using recalibrated BAMs. ```bash -nextflow run SciLifeLab/Sarek --sample mysample.tsv --step variantcalling --tools +nextflow run SciLifeLab/Sarek/germlineVC.nf --tools +nextflow run SciLifeLab/Sarek/somaticVC.nf --tools +nextflow run SciLifeLab/Sarek/annotate.nf --tool --annotateVCF myfile.vcf +nextflow run SciLifeLab/Sarek/runMultiQC.nf ``` And the corresponding TSV file should be like (obviously, if you do not have relapse samples, you can leave out this last line): @@ -152,25 +190,6 @@ SUBJECT_ID XX 1 SAMPLEIDR /samples/SAMPLEIDR.bam /samples/SAMPLEIDR If you want to restart a previous run of the pipeline, you may not have a recalibrated BAM file. This is the case if HaplotypeCaller was the only tool (recalibration is done on-the-fly with HaplotypeCaller to improve performance and save space). In this case, you need to start with `--step=recalibrate` (see previous section). - -## Running the pipeline on a normal sample only (with `--sampleDir`) - -Sarek can also be used to process a single normal sample. The tools that require tumor/normal pairs should not be run in this case. - -When running a normal-only sample, it is not necessary to create a TSV file describing the input. Instead, the `--sampleDir` option can be used to point Sarek to a directory with FASTQ files: -```bash -nextflow run SciLifeLab/Sarek --tools=HaplotypeCaller --sampleDir path/to/FASTQ/files -``` -The given directory is searched recursively for FASTQ files that are named `*_R1_*.fastq.gz`, and a matching pair with the same name except `_R2_` instead of `_R1_` is expected to exist alongside. All of the found FASTQ files are considered to belong to the sample. Each FASTQ file pair gets its own read group (`@RG`) in the resulting BAM file. - -### Metadata when using `--sampleDir` - -When using `--sampleDir`, the metadata about the sample that are written to the BAM header in the `@RG` tag are determined in the following way. - -- The sample name (`SM`) is derived from the the last component of the path given to `--sampleDir`. That is, you should make sure that that directory has a meaningful name! For example, with `--sampleDir=/my/fastqs/sample123`, the sample name will be `sample123`. -- The read group id is set to *flowcell.samplename.lane*. The flowcell id and lane number are auto-detected from the name of the first read in the FASTQ file. - - -------------------------------------------------------------------------------- [![](images/SciLifeLab_logo.png "SciLifeLab")][scilifelab-link] diff --git a/germlineVC.nf b/germlineVC.nf index c3db5859fa..567c65969f 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -43,68 +43,30 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// GVCF are generated -params.noGVCF = false -// Reports are generated -params.noReports = false -// BAMQC is used -params.noBAMQC = false -// Run Sarek in onlyQC mode -params.onlyQC = false -// outDir is current directory -params.outDir = baseDir -// No sample is defined -params.sample = '' -// Step is variantcalling -step = 'variantcalling' -// Not testing -params.test = '' -// No tools to be used -params.tools = '' -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] directoryMap = defineDirectoryMap() referenceMap = defineReferenceMap() -stepList = defineStepList() toolList = defineToolList() -nucleotidesPerSecond = 1000.0 // used to estimate variant calling runtime -gvcf = !params.noGVCF -reports = !params.noReports -onlyQC = params.onlyQC -verbose = params.verbose - -if (!checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information' -if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' + if (!checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information' if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' @@ -121,7 +83,7 @@ if (params.test && params.genome in ['GRCh37', 'GRCh38']) { tsvPath = '' if (params.sample) tsvPath = params.sample -else tsvPath = "${params.outDir}/${directoryMap.recalibrated}/recalibrated.tsv" +else tsvPath = "${directoryMap.recalibrated}/recalibrated.tsv" // Set up the bamFiles channel @@ -141,7 +103,7 @@ if (tsvPath) { startMessage() -if (verbose) bamFiles = bamFiles.view { +if (params.verbose) bamFiles = bamFiles.view { "BAMs to process:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -154,7 +116,7 @@ recalTables = recalTables.map{ it + [null] } // null recalibration table means: recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status -if (verbose) recalibratedBam = recalibratedBam.view { +if (params.verbose) recalibratedBam = recalibratedBam.view { "Recalibrated BAM for variant Calling:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -163,7 +125,7 @@ if (verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'copy' + publishDir directoryMap.samtoolsStats, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -171,7 +133,7 @@ process RunSamtoolsStats { output: file ("${bam}.samtools.stats.out") into samtoolsStatsReport - when: reports + when: !params.noReports script: """ @@ -179,7 +141,7 @@ process RunSamtoolsStats { """ } -if (verbose) samtoolsStatsReport = samtoolsStatsReport.view { +if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { "SAMTools stats report:\n\ File : [${it.fileName}]" } @@ -187,7 +149,7 @@ if (verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'copy' + publishDir directoryMap.bamQC, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC @@ -195,7 +157,7 @@ process RunBamQC { output: file("${idSample}") into bamQCreport - when: reports && !params.noBAMQC + when: !params.noReports && !params.noBAMQC script: """ @@ -207,7 +169,7 @@ process RunBamQC { """ } -if (verbose) bamQCreport = bamQCreport.view { +if (params.verbose) bamQCreport = bamQCreport.view { "BamQC report:\n\ Dir : [${it.fileName}]" } @@ -267,7 +229,7 @@ process CreateIntervalBeds { t = \$5 # runtime estimate if (t == "") { # no runtime estimate in this row, assume default value - t = (\$3 - \$2) / ${nucleotidesPerSecond} + t = (\$3 - \$2) / ${params.nucleotidesPerSecond} } if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) { # start a new chunk @@ -299,7 +261,7 @@ bedIntervals = bedIntervals else { start = fields[1].toInteger() end = fields[2].toInteger() - duration += (end - start) / nucleotidesPerSecond + duration += (end - start) / params.nucleotidesPerSecond } } [duration, intervalFile] @@ -307,7 +269,7 @@ bedIntervals = bedIntervals .flatten().collate(2) .map{duration, intervalFile -> intervalFile} -if (verbose) bedIntervals = bedIntervals.view { +if (params.verbose) bedIntervals = bedIntervals.view { " Interv: ${it.baseName}" } @@ -362,7 +324,7 @@ process RunHaplotypecaller { set val("gvcf-hc"), idPatient, idSample, idSample, file("${intervalBed.baseName}_${idSample}.g.vcf") into hcGenomicVCF set idPatient, idSample, file(intervalBed), file("${intervalBed.baseName}_${idSample}.g.vcf") into vcfsToGenotype - when: 'haplotypecaller' in tools && !onlyQC + when: 'haplotypecaller' in tools && !params.onlyQC script: BQSR = (recalTable != null) ? "--BQSR $recalTable" : '' @@ -383,7 +345,7 @@ process RunHaplotypecaller { } hcGenomicVCF = hcGenomicVCF.groupTuple(by:[0,1,2,3]) -if (!gvcf) hcGenomicVCF.close() +if (params.noGVCF) hcGenomicVCF.close() process RunGenotypeGVCFs { tag {idSample + "-" + intervalBed.baseName} @@ -401,7 +363,7 @@ process RunGenotypeGVCFs { output: set val("haplotypecaller"), idPatient, idSample, idSample, file("${intervalBed.baseName}_${idSample}.vcf") into hcGenotypedVCF - when: 'haplotypecaller' in tools && !onlyQC + when: 'haplotypecaller' in tools && !params.onlyQC script: // Using -L is important for speed @@ -423,7 +385,7 @@ hcGenotypedVCF = hcGenotypedVCF.groupTuple(by:[0,1,2,3]) // so we can have a single sorted VCF containing all the calls for a given caller vcfsToMerge = hcGenomicVCF.mix(hcGenotypedVCF) -if (verbose) vcfsToMerge = vcfsToMerge.view { +if (params.verbose) vcfsToMerge = vcfsToMerge.view { "VCFs To be merged:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : ${it[4].fileName}" @@ -432,7 +394,7 @@ if (verbose) vcfsToMerge = vcfsToMerge.view { process ConcatVCF { tag {variantCaller + "-" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap."$variantCaller"}", mode: 'copy' + publishDir "${directoryMap."$variantCaller"}", mode: 'link' input: set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge @@ -442,7 +404,7 @@ process ConcatVCF { set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated file("*.vcf.gz.tbi") into vcfConcatenatedTbi - when: ( 'haplotypecaller' in tools || 'mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !onlyQC + when: ( 'haplotypecaller' in tools || 'mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC script: if (variantCaller == 'haplotypecaller') outputFile = "${variantCaller}_${idSampleNormal}.vcf" @@ -491,7 +453,7 @@ process ConcatVCF { """ } -if (verbose) vcfConcatenated = vcfConcatenated.view { +if (params.verbose) vcfConcatenated = vcfConcatenated.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ File : ${it[4].fileName}" @@ -500,7 +462,7 @@ if (verbose) vcfConcatenated = vcfConcatenated.view { process RunSingleStrelka { tag {idSample} - publishDir "${params.outDir}/${directoryMap.strelka}", mode: 'copy' + publishDir directoryMap.strelka, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleStrelka @@ -512,7 +474,7 @@ process RunSingleStrelka { output: set val("singlestrelka"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into singleStrelkaOutput - when: 'strelka' in tools && !onlyQC + when: 'strelka' in tools && !params.onlyQC script: """ @@ -534,7 +496,7 @@ process RunSingleStrelka { """ } -if (verbose) singleStrelkaOutput = singleStrelkaOutput.view { +if (params.verbose) singleStrelkaOutput = singleStrelkaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: ${it[2]}\n\ Files : ${it[3].fileName}\n\ @@ -544,7 +506,7 @@ if (verbose) singleStrelkaOutput = singleStrelkaOutput.view { process RunSingleManta { tag {idSample + " - Single Diploid"} - publishDir "${params.outDir}/${directoryMap.manta}", mode: 'copy' + publishDir directoryMap.manta, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta @@ -556,7 +518,7 @@ process RunSingleManta { output: set val("singlemanta"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into singleMantaOutput - when: 'manta' in tools && status == 0 && !onlyQC + when: 'manta' in tools && status == 0 && !params.onlyQC script: """ @@ -582,7 +544,7 @@ process RunSingleManta { """ } -if (verbose) singleMantaOutput = singleMantaOutput.view { +if (params.verbose) singleMantaOutput = singleMantaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: ${it[2]}\n\ Files : ${it[3].fileName}\n\ @@ -602,7 +564,7 @@ vcfForBCFtools = Channel.empty().mix( process RunBcftoolsStats { tag {vcf} - publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'copy' + publishDir directoryMap.bcftoolsStats, mode: 'link' input: set variantCaller, file(vcf) from vcfForBCFtools @@ -610,7 +572,7 @@ process RunBcftoolsStats { output: file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport - when: reports + when: !params.noReports script: """ @@ -618,7 +580,7 @@ process RunBcftoolsStats { """ } -if (verbose) bcfReport = bcfReport.view { +if (params.verbose) bcfReport = bcfReport.view { "BCFTools stats report:\n\ File : [${it.fileName}]" } @@ -631,11 +593,6 @@ bcfReport.close() ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkFileExtension(it, extension) { // Check file extension if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information" @@ -660,82 +617,6 @@ def checkParamReturnFile(item) { return file(params."${item}") } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkReferenceMap(referenceMap) { // Loop through all the references files to check their existence referenceMap.every { @@ -761,26 +642,20 @@ def checkUppmaxProject() { return !(workflow.profile == 'slurm' && !params.project) } -def checkExactlyOne(list) { - final n = 0 - list.each{n += it ? 1 : 0} - return n == 1 -} - def defineDirectoryMap() { return [ - 'recalibrated' : 'Preprocessing/Recalibrated', - 'bamQC' : 'Reports/bamQC', - 'bcftoolsStats' : 'Reports/BCFToolsStats', - 'samtoolsStats' : 'Reports/SamToolsStats', - 'ascat' : 'VariantCalling/Ascat', - 'freebayes' : 'VariantCalling/FreeBayes', - 'haplotypecaller' : 'VariantCalling/HaplotypeCaller', - 'gvcf-hc' : 'VariantCalling/HaplotypeCallerGVCF', - 'manta' : 'VariantCalling/Manta', - 'mutect1' : 'VariantCalling/MuTect1', - 'mutect2' : 'VariantCalling/MuTect2', - 'strelka' : 'VariantCalling/Strelka' + 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", + 'bamQC' : "${params.outDir}/Reports/bamQC", + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", + 'ascat' : "${params.outDir}/VariantCalling/Ascat", + 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes", + 'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller", + 'gvcf-hc' : "${params.outDir}/VariantCalling/HaplotypeCallerGVCF", + 'manta' : "${params.outDir}/VariantCalling/Manta", + 'mutect1' : "${params.outDir}/VariantCalling/MuTect1", + 'mutect2' : "${params.outDir}/VariantCalling/MuTect2", + 'strelka' : "${params.outDir}/VariantCalling/Strelka" ] } @@ -805,12 +680,6 @@ def defineReferenceMap() { ] } -def defineStepList() { - return [ - 'variantcalling' - ] -} - def defineToolList() { return [ 'ascat', @@ -857,7 +726,6 @@ def extractGenders(channel) { } def generateIntervalsForVC(bams, intervals) { - def (bamsNew, bamsForVC) = bams.into(2) def (intervalsNew, vcIntervals) = intervals.into(2) def bamsForVCNew = bamsForVC.combine(vcIntervals) @@ -931,20 +799,6 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --version" - log.info " displays version number" -} - -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test } def minimalInformationMessage() { @@ -958,7 +812,6 @@ def minimalInformationMessage() { log.info "TSV file : ${tsvFile}" log.info "Genome : " + params.genome log.info "Genome_base : " + params.genome_base - log.info "Step : " + step log.info "Tools : " + tools.join(', ') log.info "Containers :" if (params.repository) log.info " Repository : ${params.repository}" @@ -1002,19 +855,17 @@ def returnTSV(it, number) { return it } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def versionMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy new file mode 100644 index 0000000000..51bf1dbb9e --- /dev/null +++ b/lib/SarekUtils.groovy @@ -0,0 +1,93 @@ +class MyUtils { + static def checkParams(it) { + // Check if params is in this given list + return it in [ + 'annotate-tools', + 'annotate-VCF', + 'annotateTools', + 'annotateVCF', + 'build', + 'call-name', + 'callName', + 'contact-mail', + 'contactMail', + 'container-path', + 'containerPath', + 'containers', + 'docker', + 'download', + 'explicit-bqsr-needed', + 'explicitBqsrNeeded', + 'genome_base', + 'genome', + 'genomes', + 'help', + 'max_cpus', + 'max_memory', + 'max_time', + 'more', + 'nf-required-version', + 'nfRequiredVersion', + 'no-BAMQC', + 'no-GVCF', + 'no-reports', + 'noBAMQC', + 'noGVCF', + 'noReports', + 'nucleotides-per-second', + 'nucleotidesPerSecond', + 'only-QC', + 'onlyQC', + 'out-dir', + 'outDir', + 'params', + 'project', + 'push', + 'ref-dir', + 'refDir', + 'repository', + 'run-time', + 'runTime', + 'sample-dir', + 'sample', + 'sampleDir', + 'single-CPUMem', + 'singleCPUMem', + 'singularity', + 'step', + 'tag', + 'test', + 'tools', + 'total-memory', + 'totalMemory', + 'vcflist', + 'verbose', + 'version'] + } + + static def checkParameterList(list, realList) { + // Loop through all parameters to check their existence and spelling + return list.every{ checkParameterExistence(it, realList) } + } + + static def checkParameterExistence(it, list) { + // Check parameter existence + if (!list.contains(it)) { + println("Unknown parameter: ${it}") + return false + } + return true + } + + static def isAllowedParams(params) { + // Compare params to list of verified params + final test = true + params.each{ + if (!checkParams(it.toString().split('=')[0])) { + println "params ${it.toString().split('=')[0]} is unknown" + test = false + } + } + return test + } +} diff --git a/main.nf b/main.nf index 3211de4bd7..38e5c880c1 100644 --- a/main.nf +++ b/main.nf @@ -44,63 +44,30 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// Reports are generated -params.noReports = false -// BQSR are explicitly asked -params.explicitBqsrNeeded = true -// BAMQC is used -params.noBAMQC = false -// Run Sarek in onlyQC mode -params.onlyQC = false -// outDir is current directory -params.outDir = baseDir -// No sample is defined -params.sample = '' -// No sampleDir is defined -params.sampleDir = '' -// Step is mapping -params.step = 'mapping' -// No testing -params.test = '' -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - step = params.step.toLowerCase() if (step == 'preprocessing') step = 'mapping' directoryMap = defineDirectoryMap() referenceMap = defineReferenceMap() stepList = defineStepList() -reports = !params.noReports -onlyQC = params.onlyQC -verbose = params.verbose if (!checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information' if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' @@ -119,8 +86,8 @@ if (params.sample) tsvPath = params.sample if (!params.sample && !params.sampleDir) { tsvPaths = [ 'mapping': "${workflow.projectDir}/data/tsv/tiny.tsv", - 'realign': "${params.outDir}/${directoryMap.nonRealigned}/nonRealigned.tsv", - 'recalibrate': "${params.outDir}/${directoryMap.nonRecalibrated}/nonRecalibrated.tsv" + 'realign': "${directoryMap.nonRealigned}/nonRealigned.tsv", + 'recalibrate': "${directoryMap.nonRecalibrated}/nonRecalibrated.tsv" ] if (params.test || step != 'mapping') tsvPath = tsvPaths[step] } @@ -162,13 +129,13 @@ startMessage() (fastqFiles, fastqFilesforFastQC) = fastqFiles.into(2) -if (verbose) fastqFiles = fastqFiles.view { +if (params.verbose) fastqFiles = fastqFiles.view { "FASTQs to preprocess:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\tRun : ${it[3]}\n\ Files : [${it[4].fileName}, ${it[5].fileName}]" } -if (verbose) bamFiles = bamFiles.view { +if (params.verbose) bamFiles = bamFiles.view { "BAMs to process:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -177,7 +144,7 @@ if (verbose) bamFiles = bamFiles.view { process RunFastQC { tag {idPatient + "-" + idRun} - publishDir "${params.outDir}/${directoryMap.fastQC}/${idRun}", mode: 'copy' + publishDir "${directoryMap.fastQC}/${idRun}", mode: 'link' input: set idPatient, status, idSample, idRun, file(fastqFile1), file(fastqFile2) from fastqFilesforFastQC @@ -185,7 +152,7 @@ process RunFastQC { output: file "*_fastqc.{zip,html}" into fastQCreport - when: step == 'mapping' && reports + when: step == 'mapping' && !params.noReports script: """ @@ -193,7 +160,7 @@ process RunFastQC { """ } -if (verbose) fastQCreport = fastQCreport.view { +if (params.verbose) fastQCreport = fastQCreport.view { "FastQC report:\n\ Files : [${it[0].fileName}, ${it[1].fileName}]" } @@ -208,7 +175,7 @@ process MapReads { output: set idPatient, status, idSample, idRun, file("${idRun}.bam") into mappedBam - when: step == 'mapping' && !onlyQC + when: step == 'mapping' && !params.onlyQC script: readGroup = "@RG\\tID:${idRun}\\tPU:${idRun}\\tSM:${idSample}\\tLB:${idSample}\\tPL:illumina" @@ -221,7 +188,7 @@ process MapReads { """ } -if (verbose) mappedBam = mappedBam.view { +if (params.verbose) mappedBam = mappedBam.view { "Mapped BAM (single or to be merged):\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\tRun : ${it[3]}\n\ File : [${it[4].fileName}]" @@ -248,7 +215,7 @@ process MergeBams { output: set idPatient, status, idSample, file("${idSample}.bam") into mergedBam - when: step == 'mapping' && !onlyQC + when: step == 'mapping' && !params.onlyQC script: """ @@ -256,13 +223,13 @@ process MergeBams { """ } -if (verbose) singleBam = singleBam.view { +if (params.verbose) singleBam = singleBam.view { "Single BAM:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ File : [${it[3].fileName}]" } -if (verbose) mergedBam = mergedBam.view { +if (params.verbose) mergedBam = mergedBam.view { "Merged BAM:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ File : [${it[3].fileName}]" @@ -270,7 +237,7 @@ if (verbose) mergedBam = mergedBam.view { mergedBam = mergedBam.mix(singleBam) -if (verbose) mergedBam = mergedBam.view { +if (params.verbose) mergedBam = mergedBam.view { "BAM for MarkDuplicates:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ File : [${it[3].fileName}]" @@ -279,7 +246,7 @@ if (verbose) mergedBam = mergedBam.view { process MarkDuplicates { tag {idPatient + "-" + idSample} - publishDir params.outDir, saveAs: { it == "${bam}.metrics" ? "${directoryMap.markDuplicatesQC}/${it}" : "${directoryMap.nonRealigned}/${it}" }, mode: 'copy' + publishDir params.outDir, saveAs: { it == "${bam}.metrics" ? "${directoryMap.markDuplicatesQC}/${it}" : "${directoryMap.nonRealigned}/${it}" }, mode: 'link' input: set idPatient, status, idSample, file(bam) from mergedBam @@ -289,7 +256,7 @@ process MarkDuplicates { set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai") into markDuplicatesTSV file ("${bam}.metrics") into markDuplicatesReport - when: step == 'mapping' && !onlyQC + when: step == 'mapping' && !params.onlyQC script: """ @@ -308,9 +275,9 @@ process MarkDuplicates { // Creating a TSV file to restart from this step markDuplicatesTSV.map { idPatient, status, idSample, bam, bai -> gender = patientGenders[idPatient] - "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outDir}/${directoryMap.nonRealigned}/${bam}\t${params.outDir}/${directoryMap.nonRealigned}/${bai}\n" + "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.nonRealigned}/${bam}\t${directoryMap.nonRealigned}/${bai}\n" }.collectFile( - name: 'nonRealigned.tsv', sort: true, storeDir: "${params.outDir}/${directoryMap.nonRealigned}" + name: 'nonRealigned.tsv', sort: true, storeDir: "${directoryMap.nonRealigned}" ) // Create intervals for realignement using both tumor+normal as input @@ -328,21 +295,21 @@ else if (step == 'realign') duplicatesGrouped = bamFiles.map{ // and the other to the IndelRealigner process (duplicatesInterval, duplicatesRealign) = duplicatesGrouped.into(2) -if (verbose) duplicatesInterval = duplicatesInterval.view { +if (params.verbose) duplicatesInterval = duplicatesInterval.view { "BAMs for RealignerTargetCreator:\n\ ID : ${it[0]}\n\ Files : ${it[1].fileName}\n\ Files : ${it[2].fileName}" } -if (verbose) duplicatesRealign = duplicatesRealign.view { +if (params.verbose) duplicatesRealign = duplicatesRealign.view { "BAMs to phase:\n\ ID : ${it[0]}\n\ Files : ${it[1].fileName}\n\ Files : ${it[2].fileName}" } -if (verbose) markDuplicatesReport = markDuplicatesReport.view { +if (params.verbose) markDuplicatesReport = markDuplicatesReport.view { "MarkDuplicates report:\n\ File : [${it.fileName}]" } @@ -367,7 +334,7 @@ process RealignerTargetCreator { output: set idPatient, file("${idPatient}.intervals") into intervals - when: ( step == 'mapping' || step == 'realign' ) && !onlyQC + when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC script: bams = bam.collect{"-I ${it}"}.join(' ') @@ -385,7 +352,7 @@ process RealignerTargetCreator { """ } -if (verbose) intervals = intervals.view { +if (params.verbose) intervals = intervals.view { "Intervals to phase:\n\ ID : ${it[0]}\n\ File : [${it[1].fileName}]" @@ -401,7 +368,7 @@ bamsAndIntervals = duplicatesRealign intervals[1] )} -if (verbose) bamsAndIntervals = bamsAndIntervals.view { +if (params.verbose) bamsAndIntervals = bamsAndIntervals.view { "BAMs and Intervals phased for IndelRealigner:\n\ ID : ${it[0]}\n\ Files : ${it[1].fileName}\n\ @@ -413,6 +380,8 @@ if (verbose) bamsAndIntervals = bamsAndIntervals.view { process IndelRealigner { tag {idPatient} + publishDir directoryMap.nonRecalibrated, mode: 'link' + input: set idPatient, file(bam), file(bai), file(intervals) from bamsAndIntervals set file(genomeFile), file(genomeIndex), file(genomeDict), file(knownIndels), file(knownIndelsIndex) from Channel.value([ @@ -425,7 +394,7 @@ process IndelRealigner { output: set idPatient, file("*.real.bam"), file("*.real.bai") into realignedBam mode flatten - when: ( step == 'mapping' || step == 'realign' ) && !onlyQC + when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC script: bams = bam.collect{"-I ${it}"}.join(' ') @@ -450,7 +419,7 @@ realignedBam = realignedBam.map { [idPatient, status, idSample, bam, bai] } -if (verbose) realignedBam = realignedBam.view { +if (params.verbose) realignedBam = realignedBam.view { "Realigned BAM to CreateRecalibrationTable:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -459,7 +428,7 @@ if (verbose) realignedBam = realignedBam.view { process CreateRecalibrationTable { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.nonRecalibrated}", mode: 'copy' + publishDir directoryMap.nonRecalibrated, mode: 'link', overwrite: false input: set idPatient, status, idSample, file(bam), file(bai) from realignedBam @@ -478,7 +447,7 @@ process CreateRecalibrationTable { set idPatient, status, idSample, file(bam), file(bai), file("${idSample}.recal.table") into recalibrationTable set idPatient, status, idSample, val("${idSample}_${status}.md.real.bam"), val("${idSample}_${status}.md.real.bai"), val("${idSample}.recal.table") into recalibrationTableTSV - when: ( step == 'mapping' || step == 'realign' ) && !onlyQC + when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC script: known = knownIndels.collect{ "-knownSites ${it}" }.join(' ') @@ -501,14 +470,14 @@ process CreateRecalibrationTable { // Create a TSV file to restart from this step recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable -> gender = patientGenders[idPatient] - "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outDir}/${directoryMap.nonRecalibrated}/${bam}\t${params.outDir}/${directoryMap.nonRecalibrated}/${bai}\t${params.outDir}/${directoryMap.nonRecalibrated}/${recalTable}\n" + "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.nonRecalibrated}/${bam}\t${directoryMap.nonRecalibrated}/${bai}\t${directoryMap.nonRecalibrated}/${recalTable}\n" }.collectFile( - name: 'nonRecalibrated.tsv', sort: true, storeDir: "${params.outDir}/${directoryMap.nonRecalibrated}" + name: 'nonRecalibrated.tsv', sort: true, storeDir: directoryMap.nonRecalibrated ) if (step == 'recalibrate') recalibrationTable = bamFiles -if (verbose) recalibrationTable = recalibrationTable.view { +if (params.verbose) recalibrationTable = recalibrationTable.view { "Base recalibrated table for RecalibrateBam:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}, ${it[5].fileName}]" @@ -526,7 +495,7 @@ recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status process RecalibrateBam { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.recalibrated}", mode: 'copy' + publishDir directoryMap.recalibrated, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable @@ -543,7 +512,7 @@ process RecalibrateBam { // HaplotypeCaller can do BQSR on the fly, so do not create a // recalibrated BAM explicitly. - when: params.explicitBqsrNeeded && !onlyQC + when: params.explicitBqsrNeeded && !params.onlyQC script: """ @@ -560,12 +529,12 @@ process RecalibrateBam { // Creating a TSV file to restart from this step recalibratedBamTSV.map { idPatient, status, idSample, bam, bai -> gender = patientGenders[idPatient] - "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outDir}/${directoryMap.recalibrated}/${bam}\t${params.outDir}/${directoryMap.recalibrated}/${bai}\n" + "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.recalibrated}/${bam}\t${directoryMap.recalibrated}/${bai}\n" }.collectFile( - name: 'recalibrated.tsv', sort: true, storeDir: "${params.outDir}/${directoryMap.recalibrated}" + name: 'recalibrated.tsv', sort: true, storeDir: directoryMap.recalibrated ) -if (verbose) recalibratedBam = recalibratedBam.view { +if (params.verbose) recalibratedBam = recalibratedBam.view { "Recalibrated BAM for variant Calling:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -574,7 +543,7 @@ if (verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'copy' + publishDir directoryMap.samtoolsStats, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -582,7 +551,7 @@ process RunSamtoolsStats { output: file ("${bam}.samtools.stats.out") into samtoolsStatsReport - when: reports + when: !params.noReports script: """ @@ -590,7 +559,7 @@ process RunSamtoolsStats { """ } -if (verbose) samtoolsStatsReport = samtoolsStatsReport.view { +if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { "SAMTools stats report:\n\ File : [${it.fileName}]" } @@ -598,7 +567,7 @@ if (verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'copy' + publishDir directoryMap.bamQC, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC @@ -606,7 +575,7 @@ process RunBamQC { output: file("${idSample}") into bamQCreport - when: reports && !params.noBAMQC + when: !params.noReports && !params.noBAMQC script: """ @@ -618,7 +587,7 @@ process RunBamQC { """ } -if (verbose) bamQCreport = bamQCreport.view { +if (params.verbose) bamQCreport = bamQCreport.view { "BamQC report:\n\ Dir : [${it.fileName}]" } @@ -629,11 +598,6 @@ if (verbose) bamQCreport = bamQCreport.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkFileExtension(it, extension) { // Check file extension if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information" @@ -648,93 +612,11 @@ def checkParameterExistence(it, list) { return true } -def checkParameterList(list, realList) { - // Loop through all parameters to check their existence and spelling - return list.every{ checkParameterExistence(it, realList) } -} - def checkParamReturnFile(item) { params."${item}" = params.genomes[params.genome]."${item}" return file(params."${item}") } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'explicitBqsrNeeded', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkReferenceMap(referenceMap) { // Loop through all the references files to check their existence referenceMap.every { @@ -768,14 +650,14 @@ def checkExactlyOne(list) { def defineDirectoryMap() { return [ - 'nonRealigned' : 'Preprocessing/NonRealigned', - 'nonRecalibrated' : 'Preprocessing/NonRecalibrated', - 'recalibrated' : 'Preprocessing/Recalibrated', - 'bamQC' : 'Reports/bamQC', - 'bcftoolsStats' : 'Reports/BCFToolsStats', - 'fastQC' : 'Reports/FastQC', - 'markDuplicatesQC' : 'Reports/MarkDuplicates', - 'samtoolsStats' : 'Reports/SamToolsStats' + 'nonRealigned' : "${params.outDir}/Preprocessing/NonRealigned", + 'nonRecalibrated' : "${params.outDir}/Preprocessing/NonRecalibrated", + 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", + 'bamQC' : "${params.outDir}/Reports/bamQC", + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'fastQC' : "${params.outDir}/Reports/FastQC", + 'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates", + 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats" ] } @@ -989,20 +871,6 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --version" - log.info " displays version number" -} - -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test } def minimalInformationMessage() { @@ -1059,19 +927,17 @@ def returnTSV(it, number) { return it } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def versionMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/nextflow.config b/nextflow.config index 81f34b3caf..14d0c2c474 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,57 +20,57 @@ env { } profiles { + // Default profile for UPPMAX secure clusters // Runs the pipeline locally on a single 16-core node // Singularity images need to be set up - standard { // Default profile for UPPMAX secure clusters + standard { includeConfig 'configuration/base.config' includeConfig 'configuration/uppmax-localhost.config' includeConfig 'configuration/singularity-path.config' } + // slurm profile for UPPMAX secure clusters // Runs the pipeline using the job scheduler // Singularity images need to be set up - slurm { // slurm profile for UPPMAX secure clusters + slurm { includeConfig 'configuration/base.config' includeConfig 'configuration/uppmax-slurm.config' includeConfig 'configuration/singularity-path.config' } - // Runs the pipeline locally on a single 16-core node - // Singularity images will be pulled automatically - download { // profile for UPPMAX clusters - includeConfig 'configuration/base.config' - includeConfig 'configuration/uppmax-localhost.config' - includeConfig 'configuration/singularity.config' - } + // slurm profile for UPPMAX clusters // Runs the pipeline using the job scheduler // Singularity images will be pulled automatically - slurmDownload { // slurm profile for UPPMAX clusters + slurmDownload { includeConfig 'configuration/base.config' includeConfig 'configuration/uppmax-slurm.config' includeConfig 'configuration/singularity.config' } + // Small testing with Docker profile // Docker images will be pulled automatically - docker { // For small testing testing with Docker + docker { includeConfig 'configuration/base.config' includeConfig 'configuration/travis.config' includeConfig 'configuration/docker.config' includeConfig 'configuration/containers.config' } + // AWS Batch with Docker profile // Docker images will be pulled automatically - awsbatch { // For running on AWS Batch with Docker + awsbatch { includeConfig 'configuration/base.config' includeConfig 'configuration/aws-batch.config' includeConfig 'configuration/docker.config' includeConfig 'configuration/containers.config' - } + } + // Small testing with Singularity profile // Singularity images will be pulled automatically - singularity { // For small testing + singularity { includeConfig 'configuration/base.config' includeConfig 'configuration/travis.config' includeConfig 'configuration/singularity.config' includeConfig 'configuration/containers.config' } + // Small testing with Singularity profile // Singularity images need to be set up - singularityPath { // For small testing + singularityPath { includeConfig 'configuration/base.config' includeConfig 'configuration/travis.config' includeConfig 'configuration/singularity-path.config' diff --git a/runMultiQC.nf b/runMultiQC.nf index b54b0c3378..51cd8fb1fe 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -36,45 +36,25 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// Reports are generated -params.noReports = false -// outDir is current directory -params.outDir = baseDir -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - directoryMap = defineDirectoryMap() - -reports = !params.noReports -verbose = params.verbose - /* ================================================================================ = P R O C E S S E S = @@ -84,14 +64,14 @@ verbose = params.verbose startMessage() process GenerateMultiQCconfig { - publishDir "${params.outDir}/${directoryMap.multiQC}", mode: 'copy' + publishDir directoryMap.multiQC, mode: 'link' input: output: file("multiqc_config.yaml") into multiQCconfig - when: reports + when: !params.noReports script: """ @@ -100,7 +80,7 @@ process GenerateMultiQCconfig { echo "custom_logo_url: http://opensource.scilifelab.se/projects/sarek" >> multiqc_config.yaml echo "custom_logo_title: 'Sarek'" >> multiqc_config.yaml echo "report_header_info:" >> multiqc_config.yaml - echo "- Sarek version: ${version}" >> multiqc_config.yaml + echo "- Sarek version: ${params.version}" >> multiqc_config.yaml echo "- Contact Name: ${params.callName}" >> multiqc_config.yaml echo "- Contact E-mail: ${params.contactMail}" >> multiqc_config.yaml echo "- Directory: ${workflow.launchDir}" >> multiqc_config.yaml @@ -115,23 +95,23 @@ process GenerateMultiQCconfig { """ } -if (verbose && reports) multiQCconfig = multiQCconfig.view { +if (params.verbose && !params.noReports) multiQCconfig = multiQCconfig.view { "MultiQC config:\n\ File : [${it.fileName}]" } reportsForMultiQC = Channel.empty() .mix( - Channel.fromPath("${params.outDir}/Reports/bamQC/*", type: 'dir'), - Channel.fromPath("${params.outDir}/Reports/BCFToolsStats/*"), - Channel.fromPath("${params.outDir}/Reports/FastQC/*/*"), - Channel.fromPath("${params.outDir}/Reports/MarkDuplicates/*"), - Channel.fromPath("${params.outDir}/Reports/SamToolsStats/*"), + Channel.fromPath("${directoryMap.bamQC}/*", type: 'dir'), + Channel.fromPath("${directoryMap.bcftoolsStats}/*"), + Channel.fromPath("${directoryMap.fastQC}/*/*"), + Channel.fromPath("${directoryMap.markDuplicatesQC}/*"), + Channel.fromPath("${directoryMap.samtoolsStats}/*"), multiQCconfig ).collect() process RunMultiQC { - publishDir "${params.outDir}/${directoryMap.multiQC}", mode: 'copy' + publishDir directoryMap.multiQC, mode: 'link' input: file ('*') from reportsForMultiQC @@ -139,7 +119,7 @@ process RunMultiQC { output: set file("*multiqc_report.html"), file("*multiqc_data") into multiQCReport - when: reports + when: !params.noReports script: """ @@ -147,7 +127,7 @@ process RunMultiQC { """ } -if (verbose) multiQCReport = multiQCReport.view { +if (params.verbose) multiQCReport = multiQCReport.view { "MultiQC report:\n\ File : [${it[0].fileName}]\n\ Dir : [${it[1].fileName}]" @@ -159,97 +139,6 @@ if (verbose) multiQCReport = multiQCReport.view { ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - -def checkParameterExistence(it, list) { - // Check parameter existence - if (!list.contains(it)) { - println("Unknown parameter: ${it}") - return false - } - return true -} - -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'explicitBqsrNeeded', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkUppmaxProject() { // check if UPPMAX project number is specified return !(workflow.profile == 'slurm' && !params.project) @@ -257,7 +146,12 @@ def checkUppmaxProject() { def defineDirectoryMap() { return [ - 'multiQC' : 'Reports/MultiQC' + 'bamQC' : "${params.outDir}/Reports/bamQC", + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'fastQC' : "${params.outDir}/Reports/FastQC", + 'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates", + 'multiQC' : "${params.outDir}/Reports/MultiQC", + 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats" ] } @@ -303,18 +197,6 @@ def helpMessage() { log.info " displays version number" } -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test -} - def minimalInformationMessage() { // Minimal information message log.info "Command Line: " + workflow.commandLine @@ -336,19 +218,17 @@ def nextflowMessage() { log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}" } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def versionMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage() diff --git a/scripts/do_all.sh b/scripts/do_all.sh index 72f5d98bcb..56f42821c3 100755 --- a/scripts/do_all.sh +++ b/scripts/do_all.sh @@ -4,7 +4,7 @@ set -xeuo pipefail PROFILE=singularity PUSH='' REPOSITORY=maxulysse -TAG=1.3 +TAG=latest TOOL=docker while [[ $# -gt 0 ]] diff --git a/somaticVC.nf b/somaticVC.nf index f996409bb6..231b70eb4b 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -48,68 +48,30 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '2.0.0' - // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented -nf_required_version = '0.25.0' try { - if( ! nextflow.version.matches(">= ${nf_required_version}") ){ + if( ! nextflow.version.matches(">= ${params.nfRequiredVersion}") ){ throw GroovyException('Nextflow version too old') } } catch (all) { log.error "====================================================\n" + - " Nextflow version ${nf_required_version} required! You are running v${workflow.nextflow.version}.\n" + + " Nextflow version ${params.nfRequiredVersion} required! You are running v${workflow.nextflow.version}.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } if (params.help) exit 0, helpMessage() -if (params.version) exit 0, versionMessage() -if (!isAllowedParams(params)) exit 1, "params unknown, see --help for more information" +if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " -// Default params: -// Such params are overridden by command line or configuration definitions - -// GVCF are generated -params.noGVCF = false -// Reports are generated -params.noReports = false -// BAMQC is used -params.noBAMQC = false -// Run Sarek in onlyQC mode -params.onlyQC = false -// outDir is current directory -params.outDir = baseDir -// No sample is defined -params.sample = '' -// Step is variantcalling -step = 'variantcalling' -// Not testing -params.test = '' -// No tools to be used -params.tools = '' -// Params are defined in config files -params.containerPath = '' -params.repository = '' -params.tag = '' - tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : [] directoryMap = defineDirectoryMap() referenceMap = defineReferenceMap() -stepList = defineStepList() toolList = defineToolList() -nucleotidesPerSecond = 1000.0 // used to estimate variant calling runtime -gvcf = !params.noGVCF -reports = !params.noReports -onlyQC = params.onlyQC -verbose = params.verbose - -if (!checkParameterExistence(step, stepList)) exit 1, 'Unknown step, see --help for more information' -if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' + if (!checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information' if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information' @@ -126,7 +88,7 @@ if (params.test && params.genome in ['GRCh37', 'GRCh38']) { tsvPath = '' if (params.sample) tsvPath = params.sample -else tsvPath = "${params.outDir}/${directoryMap.recalibrated}/recalibrated.tsv" +else tsvPath = "${directoryMap.recalibrated}/recalibrated.tsv" // Set up the bamFiles channel @@ -146,7 +108,7 @@ if (tsvPath) { startMessage() -if (verbose) bamFiles = bamFiles.view { +if (params.verbose) bamFiles = bamFiles.view { "BAMs to process:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -159,7 +121,7 @@ recalTables = recalTables.map{ it + [null] } // null recalibration table means: recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status -if (verbose) recalibratedBam = recalibratedBam.view { +if (params.verbose) recalibratedBam = recalibratedBam.view { "Recalibrated BAM for variant Calling:\n\ ID : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\ Files : [${it[3].fileName}, ${it[4].fileName}]" @@ -168,7 +130,7 @@ if (verbose) recalibratedBam = recalibratedBam.view { process RunSamtoolsStats { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.samtoolsStats}", mode: 'copy' + publishDir directoryMap.samtoolsStats, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats @@ -176,7 +138,7 @@ process RunSamtoolsStats { output: file ("${bam}.samtools.stats.out") into samtoolsStatsReport - when: reports + when: !params.noReports script: """ @@ -184,7 +146,7 @@ process RunSamtoolsStats { """ } -if (verbose) samtoolsStatsReport = samtoolsStatsReport.view { +if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view { "SAMTools stats report:\n\ File : [${it.fileName}]" } @@ -192,7 +154,7 @@ if (verbose) samtoolsStatsReport = samtoolsStatsReport.view { process RunBamQC { tag {idPatient + "-" + idSample} - publishDir "${params.outDir}/${directoryMap.bamQC}", mode: 'copy' + publishDir directoryMap.bamQC, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC @@ -200,7 +162,7 @@ process RunBamQC { output: file("${idSample}") into bamQCreport - when: reports && !params.noBAMQC + when: !params.noReports && !params.noBAMQC script: """ @@ -212,7 +174,7 @@ process RunBamQC { """ } -if (verbose) bamQCreport = bamQCreport.view { +if (params.verbose) bamQCreport = bamQCreport.view { "BamQC report:\n\ Dir : [${it.fileName}]" } @@ -274,7 +236,7 @@ process CreateIntervalBeds { t = \$5 # runtime estimate if (t == "") { # no runtime estimate in this row, assume default value - t = (\$3 - \$2) / ${nucleotidesPerSecond} + t = (\$3 - \$2) / ${params.nucleotidesPerSecond} } if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) { # start a new chunk @@ -306,7 +268,7 @@ bedIntervals = bedIntervals else { start = fields[1].toInteger() end = fields[2].toInteger() - duration += (end - start) / nucleotidesPerSecond + duration += (end - start) / params.nucleotidesPerSecond } } [duration, intervalFile] @@ -314,7 +276,7 @@ bedIntervals = bedIntervals .flatten().collate(2) .map{duration, intervalFile -> intervalFile} -if (verbose) bedIntervals = bedIntervals.view { +if (params.verbose) bedIntervals = bedIntervals.view { " Interv: ${it.baseName}" } @@ -357,7 +319,7 @@ process RunMutect1 { output: set val("mutect1"), idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into mutect1Output - when: 'mutect1' in tools && !onlyQC + when: 'mutect1' in tools && !params.onlyQC script: """ @@ -396,7 +358,7 @@ process RunMutect2 { output: set val("mutect2"), idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into mutect2Output - when: 'mutect2' in tools && !onlyQC + when: 'mutect2' in tools && !params.onlyQC script: """ @@ -426,7 +388,7 @@ process RunFreeBayes { output: set val("freebayes"), idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into freebayesOutput - when: 'freebayes' in tools && !onlyQC + when: 'freebayes' in tools && !params.onlyQC script: """ @@ -452,7 +414,7 @@ freebayesOutput = freebayesOutput.groupTuple(by:[0,1,2,3]) // so we can have a single sorted VCF containing all the calls for a given caller vcfsToMerge = mutect1Output.mix(mutect2Output, freebayesOutput) -if (verbose) vcfsToMerge = vcfsToMerge.view { +if (params.verbose) vcfsToMerge = vcfsToMerge.view { "VCFs To be merged:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : ${it[4].fileName}" @@ -461,7 +423,7 @@ if (verbose) vcfsToMerge = vcfsToMerge.view { process ConcatVCF { tag {variantCaller + "_" + idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap."$variantCaller"}", mode: 'copy' + publishDir "${directoryMap."$variantCaller"}", mode: 'link' input: set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge @@ -471,7 +433,7 @@ process ConcatVCF { set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated file("*.vcf.gz.tbi") into vcfConcatenatedTbi - when: ('mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !onlyQC + when: ('mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC script: outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf" @@ -518,7 +480,7 @@ process ConcatVCF { """ } -if (verbose) vcfConcatenated = vcfConcatenated.view { +if (params.verbose) vcfConcatenated = vcfConcatenated.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ File : ${it[4].fileName}" @@ -527,7 +489,7 @@ if (verbose) vcfConcatenated = vcfConcatenated.view { process RunStrelka { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.strelka}", mode: 'copy' + publishDir directoryMap.strelka, mode: 'link' input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForStrelka @@ -540,7 +502,7 @@ process RunStrelka { output: set val("strelka"), idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into strelkaOutput - when: 'strelka' in tools && !onlyQC + when: 'strelka' in tools && !params.onlyQC script: """ @@ -563,7 +525,7 @@ process RunStrelka { """ } -if (verbose) strelkaOutput = strelkaOutput.view { +if (params.verbose) strelkaOutput = strelkaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : ${it[4].fileName}\n\ @@ -573,7 +535,7 @@ if (verbose) strelkaOutput = strelkaOutput.view { process RunManta { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.manta}", mode: 'copy' + publishDir directoryMap.manta, mode: 'link' input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForManta @@ -585,7 +547,7 @@ process RunManta { output: set val("manta"), idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into mantaOutput - when: 'manta' in tools && !onlyQC + when: 'manta' in tools && !params.onlyQC script: """ @@ -616,7 +578,7 @@ process RunManta { """ } -if (verbose) mantaOutput = mantaOutput.view { +if (params.verbose) mantaOutput = mantaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : ${it[4].fileName}\n\ @@ -626,7 +588,7 @@ if (verbose) mantaOutput = mantaOutput.view { process RunSingleManta { tag {idSample + " - Tumor-Only"} - publishDir "${params.outDir}/${directoryMap.manta}", mode: 'copy' + publishDir directoryMap.manta, mode: 'link' input: set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta @@ -638,7 +600,7 @@ process RunSingleManta { output: set val("singlemanta"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into singleMantaOutput - when: 'manta' in tools && status == 1 && !onlyQC + when: 'manta' in tools && status == 1 && !params.onlyQC script: """ @@ -664,7 +626,7 @@ process RunSingleManta { """ } -if (verbose) singleMantaOutput = singleMantaOutput.view { +if (params.verbose) singleMantaOutput = singleMantaOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: ${it[2]}\n\ Files : ${it[3].fileName}\n\ @@ -688,7 +650,7 @@ process RunAlleleCount { output: set idPatient, status, idSample, file("${idSample}.alleleCount") into alleleCountOutput - when: 'ascat' in tools && !onlyQC + when: 'ascat' in tools && !params.onlyQC script: """ @@ -719,7 +681,7 @@ alleleCountOutput = alleleCountOutput.map { process RunConvertAlleleCounts { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.ascat}", mode: 'copy' + publishDir directoryMap.ascat, mode: 'link' input: set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCountOutput @@ -727,7 +689,7 @@ process RunConvertAlleleCounts { output: set idPatient, idSampleNormal, idSampleTumor, file("${idSampleNormal}.BAF"), file("${idSampleNormal}.LogR"), file("${idSampleTumor}.BAF"), file("${idSampleTumor}.LogR") into convertAlleleCountsOutput - when: 'ascat' in tools && !onlyQC + when: 'ascat' in tools && !params.onlyQC script: gender = patientGenders[idPatient] @@ -741,7 +703,7 @@ process RunConvertAlleleCounts { process RunAscat { tag {idSampleTumor + "_vs_" + idSampleNormal} - publishDir "${params.outDir}/${directoryMap.ascat}", mode: 'copy' + publishDir directoryMap.ascat, mode: 'link' input: set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOutput @@ -749,7 +711,7 @@ process RunAscat { output: set val("ascat"), idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.*.{png,txt}") into ascatOutput - when: 'ascat' in tools && !onlyQC + when: 'ascat' in tools && !params.onlyQC script: """ @@ -759,7 +721,7 @@ process RunAscat { """ } -if (verbose) ascatOutput = ascatOutput.view { +if (params.verbose) ascatOutput = ascatOutput.view { "Variant Calling output:\n\ Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ Files : [${it[4].fileName}]" @@ -793,7 +755,7 @@ vcfForBCFtools = Channel.empty().mix( process RunBcftoolsStats { tag {vcf} - publishDir "${params.outDir}/${directoryMap.bcftoolsStats}", mode: 'copy' + publishDir directoryMap.bcftoolsStats, mode: 'link' input: set variantCaller, file(vcf) from vcfForBCFtools @@ -801,7 +763,7 @@ process RunBcftoolsStats { output: file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport - when: reports + when: !params.noReports script: """ @@ -809,7 +771,7 @@ process RunBcftoolsStats { """ } -if (verbose) bcfReport = bcfReport.view { +if (params.verbose) bcfReport = bcfReport.view { "BCFTools stats report:\n\ File : [${it.fileName}]" } @@ -822,11 +784,6 @@ bcfReport.close() ================================================================================ */ -def sarekMessage() { - // Display Sarek message - log.info "Sarek ~ ${version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") -} - def checkFileExtension(it, extension) { // Check file extension if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information" @@ -851,82 +808,6 @@ def checkParamReturnFile(item) { return file(params."${item}") } -def checkParams(it) { - // Check if params is in this given list - return it in [ - 'ac-loci', - 'acLoci', - 'annotate-tools', - 'annotate-VCF', - 'annotateTools', - 'annotateVCF', - 'build', - 'bwa-index', - 'bwaIndex', - 'call-name', - 'callName', - 'contact-mail', - 'contactMail', - 'container-path', - 'containerPath', - 'containers', - 'cosmic-index', - 'cosmic', - 'cosmicIndex', - 'dbsnp-index', - 'dbsnp', - 'docker', - 'genome_base', - 'genome-dict', - 'genome-file', - 'genome-index', - 'genome', - 'genomeDict', - 'genomeFile', - 'genomeIndex', - 'genomes', - 'help', - 'intervals', - 'known-indels-index', - 'known-indels', - 'knownIndels', - 'knownIndelsIndex', - 'max_cpus', - 'max_memory', - 'max_time', - 'no-BAMQC', - 'no-GVCF', - 'no-reports', - 'noBAMQC', - 'noGVCF', - 'noReports', - 'only-QC', - 'onlyQC', - 'out-dir', - 'outDir', - 'params', - 'project', - 'push', - 'repository', - 'run-time', - 'runTime', - 'sample-dir', - 'sample', - 'sampleDir', - 'single-CPUMem', - 'singleCPUMem', - 'singularity', - 'step', - 'tag', - 'test', - 'tools', - 'total-memory', - 'totalMemory', - 'vcflist', - 'verbose', - 'version'] -} - def checkReferenceMap(referenceMap) { // Loop through all the references files to check their existence referenceMap.every { @@ -952,26 +833,18 @@ def checkUppmaxProject() { return !(workflow.profile == 'slurm' && !params.project) } -def checkExactlyOne(list) { - final n = 0 - list.each{n += it ? 1 : 0} - return n == 1 -} - def defineDirectoryMap() { return [ - 'recalibrated' : 'Preprocessing/Recalibrated', - 'bamQC' : 'Reports/bamQC', - 'bcftoolsStats' : 'Reports/BCFToolsStats', - 'samtoolsStats' : 'Reports/SamToolsStats', - 'ascat' : 'VariantCalling/Ascat', - 'freebayes' : 'VariantCalling/FreeBayes', - 'haplotypecaller' : 'VariantCalling/HaplotypeCaller', - 'gvcf-hc' : 'VariantCalling/HaplotypeCallerGVCF', - 'manta' : 'VariantCalling/Manta', - 'mutect1' : 'VariantCalling/MuTect1', - 'mutect2' : 'VariantCalling/MuTect2', - 'strelka' : 'VariantCalling/Strelka' + 'recalibrated' : "${params.outDir}/Preprocessing/Recalibrated", + 'bamQC' : "${params.outDir}/Reports/bamQC", + 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", + 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", + 'ascat' : "${params.outDir}/VariantCalling/Ascat", + 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes", + 'manta' : "${params.outDir}/VariantCalling/Manta", + 'mutect1' : "${params.outDir}/VariantCalling/MuTect1", + 'mutect2' : "${params.outDir}/VariantCalling/MuTect2", + 'strelka' : "${params.outDir}/VariantCalling/Strelka" ] } @@ -996,12 +869,6 @@ def defineReferenceMap() { ] } -def defineStepList() { - return [ - 'variantcalling' - ] -} - def defineToolList() { return [ 'ascat', @@ -1122,20 +989,6 @@ def helpMessage() { log.info " you're reading it" log.info " --verbose" log.info " Adds more verbosity to workflow" - log.info " --version" - log.info " displays version number" -} - -def isAllowedParams(params) { - // Compare params to list of verified params - final test = true - params.each{ - if (!checkParams(it.toString().split('=')[0])) { - println "params ${it.toString().split('=')[0]} is unknown" - test = false - } - } - return test } def minimalInformationMessage() { @@ -1149,7 +1002,6 @@ def minimalInformationMessage() { log.info "TSV file : ${tsvFile}" log.info "Genome : " + params.genome log.info "Genome_base : " + params.genome_base - log.info "Step : " + step log.info "Tools : " + tools.join(', ') log.info "Containers :" if (params.repository) log.info " Repository : ${params.repository}" @@ -1193,19 +1045,17 @@ def returnTSV(it, number) { return it } +def sarekMessage() { + // Display Sarek message + log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "") +} + def startMessage() { // Display start message this.sarekMessage() this.minimalInformationMessage() } -def versionMessage() { - // Display version message - log.info "Sarek" - log.info " version : " + version - log.info workflow.commitId ? "Git info : ${workflow.repository} - ${workflow.revision} [${workflow.commitId}]" : " revision : " + this.grabRevision() -} - workflow.onComplete { // Display complete message this.nextflowMessage()