Skip to content

Commit

Permalink
Merge pull request #97 from kpalin/dev
Browse files Browse the repository at this point in the history
Move AC INFO as AD FORMAT
  • Loading branch information
vibansal authored Jan 22, 2024
2 parents a8e6007 + c6f0141 commit d256e5b
Show file tree
Hide file tree
Showing 14 changed files with 597 additions and 463 deletions.
3 changes: 3 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM mcr.microsoft.com/devcontainers/rust:latest
# Install the xz-utils package
RUN apt-get update && apt-get install -y cmake
6 changes: 6 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"build": {
// Path is relative to the devcontainer.json file.
"dockerfile": "Dockerfile"
}
}
59 changes: 59 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "Debug executable 'longshot'",
"cargo": {
"args": [
"build",
"--bin=longshot",
"--package=longshot"
],
"filter": {
"name": "longshot",
"kind": "bin"
}
},
"args": [
"--bam",
"example_data/pacbio_reads_30x.bam",
"--ref",
"example_data/genome.fa",
"--out",
"example_data/longshot_output.vcf"
],
"cwd": "${workspaceFolder}"
},
{
"type": "lldb",
"request": "launch",
"name": "Debug unit tests in executable 'longshot'",
"cargo": {
"args": [
"test",
"--no-run",
"--bin=longshot",
"--package=longshot"
],
"filter": {
"name": "longshot",
"kind": "bin"
}
},
"args": [
"--bam",
"example_data/pacbio_reads_30x.bam",
"--ref",
"example_data/genome.fa",
"--out",
"example_data/longshot_output.vcf"
],
"cwd": "${workspaceFolder}"
}
]
}
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "longshot"
version = "0.4.3"
version = "1.0.0"
authors = ["Peter Edge <[email protected]>"]

[dependencies]
Expand Down
35 changes: 19 additions & 16 deletions src/call_genotypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
// use declarations
use bio::stats::{LogProb, PHREDProb, Prob};
use chrono::prelude::*;
use rand::{Rng, SeedableRng};
use rand::seq::SliceRandom;
use rand::rngs::StdRng;
use rand::seq::SliceRandom;
use rand::{Rng, SeedableRng};
use std::cmp::{max, min};

use errors::*;
Expand Down Expand Up @@ -200,7 +200,7 @@ pub fn call_genotypes_no_haplotypes(
&var.alleles,
max_p_miscall,
)
.chain_err(|| "Error calculating genotype posteriors for haplotype-free genotyping")?;
.chain_err(|| "Error calculating genotype posteriors for haplotype-free genotyping")?;

// get the genotype with maximum genotype posterior
let (max_g, max_post) = posts.max_genotype_post(false, false);
Expand Down Expand Up @@ -413,7 +413,8 @@ pub fn call_genotypes_with_haplotypes(
if var.alleles.len() >= 2
&& (var.genotype.0 != var.genotype.1)
&& var.alleles[0].len() == 1
&& var.alleles[1].len() == 1 {
&& var.alleles[1].len() == 1
{
var_phased[i] = true;

if var.genotype.1 > var.genotype.0 {
Expand Down Expand Up @@ -597,8 +598,10 @@ pub fn call_genotypes_with_haplotypes(
}

// get the value of the read likelihood given each haplotype
let (mut p_read_h0, mut p_read_h1) = (p_read_hap[0][call.frag_ix as usize],
p_read_hap[1][call.frag_ix as usize]);
let (mut p_read_h0, mut p_read_h1) = (
p_read_hap[0][call.frag_ix as usize],
p_read_hap[1][call.frag_ix as usize],
);

if var_phased[v as usize] {
// for each haplotype allele
Expand Down Expand Up @@ -677,10 +680,10 @@ pub fn call_genotypes_with_haplotypes(
if var_phased[v as usize] {
for &(frag_ix, p_read_h0, p_read_h1) in
&p_read_lst_genotype[max_g.0 as usize][max_g.1 as usize]
{
p_read_hap[0][frag_ix] = p_read_h0;
p_read_hap[1][frag_ix] = p_read_h1;
}
{
p_read_hap[0][frag_ix] = p_read_h0;
p_read_hap[1][frag_ix] = p_read_h1;
}
}
}

Expand All @@ -699,7 +702,7 @@ pub fn call_genotypes_with_haplotypes(
num_phased = 0;
for var in varlist.lst.iter() {
if var.alleles.len() >= 2
&& (var.genotype.0 != var.genotype.1 )
&& (var.genotype.0 != var.genotype.1)
&& var.alleles[0].len() == 1
&& var.alleles[1].len() == 1
{
Expand Down Expand Up @@ -790,7 +793,7 @@ pub fn call_genotypes_with_haplotypes(
"{}.{}.haplotype_genotype_iteration.vcf",
program_step, hapcut2_iter
)
.to_owned();
.to_owned();
print_variant_debug(
varlist,
&interval,
Expand Down Expand Up @@ -826,11 +829,11 @@ mod tests {
fn test_generate_fragcall_pileup() {
let fcall = |f_ix, v_ix, a| {
FragCall {
frag_ix: f_ix, // index into fragment list
var_ix: v_ix, // index into variant list
allele: a, // allele call
frag_ix: f_ix, // index into fragment list
var_ix: v_ix, // index into variant list
allele: a, // allele call
qual: LogProb::from(Prob(0.01)), // LogProb probability the call is an error
one_minus_qual: LogProb::from(Prob(0.99))
one_minus_qual: LogProb::from(Prob(0.99)),
}
};
let p50 = LogProb::from(Prob(0.5));
Expand Down
61 changes: 47 additions & 14 deletions src/call_potential_snvs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ pub fn call_potential_snvs(
'C' | 'c' => 1,
'G' | 'g' => 2,
'T' | 't' => 3,
_ => 4
_ => 4,
};

counts[b] += 1;
Expand Down Expand Up @@ -329,15 +329,17 @@ pub fn call_potential_snvs(

let (prior_00, prior_01, prior_11) =
genotype_priors_table[ref_allele_ix][var_allele1_ix];
let ((_, prior_02, prior_22), prior_12) =
if var_allele2 == 'N' {
((LogProb::ln_one(), LogProb::ln_zero(), LogProb::ln_zero()), LogProb::ln_zero())
} else {
(genotype_priors_table[ref_allele_ix][var_allele2_ix],
let ((_, prior_02, prior_22), prior_12) = if var_allele2 == 'N' {
(
(LogProb::ln_one(), LogProb::ln_zero(), LogProb::ln_zero()),
LogProb::ln_zero(),
)
} else {
(genotype_priors_table[ref_allele_ix][var_allele2_ix],
genotype_priors
.get_prior(&vec![ref_allele.to_string(), var_allele1.to_string(), var_allele2.to_string()], Genotype(1, 2))
.chain_err(|| "Error getting genotype priors for second allele when calculating genotypes.")?)
};
};

// we dereference these so that they are f64 but in natural log space
// we want to be able to multiply them by some integer (raise to power),
Expand All @@ -352,12 +354,39 @@ pub fn call_potential_snvs(
// raise the probability of observing allele to the power of number of times we observed that allele
// fastest way of multiplying probabilities for independent events, where the
// probabilities are all the same (either quality score or 1 - quality score)
let p00 = LogProb(*prior_00 + p_call * ref_count as f64 + p_miscall * (var_count1 + var_count2) as f64);
let p01 = LogProb(ln_two + *prior_01 + p_het * (ref_count + var_count1) as f64 + p_miscall * var_count2 as f64);
let p02 = LogProb(ln_two + *prior_02 + p_het * (ref_count + var_count2) as f64 + p_miscall * var_count1 as f64);
let p11 = LogProb(*prior_11 + p_call * var_count1 as f64 + p_miscall * (ref_count + var_count2) as f64);
let p12 = LogProb(ln_two + *prior_12 + p_het * (var_count1 + var_count2) as f64 + p_miscall * ref_count as f64);
let p22 = LogProb(*prior_22 + p_call * var_count2 as f64 + p_miscall * (ref_count + var_count1) as f64);
let p00 = LogProb(
*prior_00
+ p_call * ref_count as f64
+ p_miscall * (var_count1 + var_count2) as f64,
);
let p01 = LogProb(
ln_two
+ *prior_01
+ p_het * (ref_count + var_count1) as f64
+ p_miscall * var_count2 as f64,
);
let p02 = LogProb(
ln_two
+ *prior_02
+ p_het * (ref_count + var_count2) as f64
+ p_miscall * var_count1 as f64,
);
let p11 = LogProb(
*prior_11
+ p_call * var_count1 as f64
+ p_miscall * (ref_count + var_count2) as f64,
);
let p12 = LogProb(
ln_two
+ *prior_12
+ p_het * (var_count1 + var_count2) as f64
+ p_miscall * ref_count as f64,
);
let p22 = LogProb(
*prior_22
+ p_call * var_count2 as f64
+ p_miscall * (ref_count + var_count1) as f64,
);

// calculate the posterior probability of 0/0 genotype
let p_total = LogProb::ln_sum_exp(&[p00, p01, p02, p11, p12, p22]);
Expand All @@ -375,7 +404,11 @@ pub fn call_potential_snvs(
allele_vec = vec![ref_allele.to_string(), var_allele1.to_string()];
allele_num = 2;
} else {
allele_vec = vec![ref_allele.to_string(), var_allele1.to_string(), var_allele2.to_string()];
allele_vec = vec![
ref_allele.to_string(),
var_allele1.to_string(),
var_allele2.to_string(),
];
allele_num = 3;
}
let tid: usize = pileup.tid() as usize;
Expand Down
19 changes: 10 additions & 9 deletions src/estimate_alignment_parameters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -465,17 +465,18 @@ pub fn estimate_alignment_parameters(
emission_counts.add(read_emission_counts);

prev_tid = tid;
nreads +=1;
if nreads >= max_reads_estimation && max_reads_estimation > 0 { break; }

}
if nreads >= max_reads_estimation && max_reads_estimation > 0 {
eprintln!("using first {} reads for estimating HMM parameters",nreads);
break;
nreads += 1;
if nreads >= max_reads_estimation && max_reads_estimation > 0 {
break;
}
}
if nreads >= max_reads_estimation && max_reads_estimation > 0 {
eprintln!("using first {} reads for estimating HMM parameters", nreads);
break;
}
}
if nreads <= 1000 {
// eprintln!("low number of reads for estimating HMM parameters {}",nreads);
if nreads <= 1000 {
// eprintln!("low number of reads for estimating HMM parameters {}",nreads);
}

// place the transition and emission counts together in an AlignmentCounts struct
Expand Down
Loading

0 comments on commit d256e5b

Please sign in to comment.