Skip to content

Commit

Permalink
update for ashni 15 day mice (+ misc)
Browse files Browse the repository at this point in the history
  • Loading branch information
psathyrella committed Aug 25, 2024
1 parent e79381b commit 286e981
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 12 deletions.
2 changes: 1 addition & 1 deletion bin/parse-output.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class MultiplyInheritedFormatter(argparse.RawTextHelpFormatter, argparse.Argumen
print(' no cluster path in input file, so just using all %d sequences (in %d clusters) in annotations' % (sum(len(c) for c in clusters_to_use), len(clusters_to_use)))
else:
ipartition = cpath.i_best if args.partition_index is None else args.partition_index
print(' found %d clusters in %s' % (len(cpath.partitions[ipartition]), 'best partition' if args.partition_index is None else 'partition at index %d (of %d)' % (ipartition, len(cpath.partitions))))
print(' found %d clusters with %d seqs in %s' % (len(cpath.partitions[ipartition]), sum(len(c) for c in cpath.partitions[ipartition]), 'best partition' if args.partition_index is None else 'partition at index %d (of %d)' % (ipartition, len(cpath.partitions))))
modified = False
if args.cluster_index is None:
clusters_to_use = cpath.partitions[ipartition]
Expand Down
4 changes: 2 additions & 2 deletions bin/run-paired-loci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ common="--n-sub-procs 15 --n-max-procs 5 --single-light-locus igk --base-outdir
# echo $bin --label pairfix --version v1 --n-replicates 3 --n-leaves-list hist --n-sim-events-list 3000 --scratch-mute-freq-list 0.07 --bulk-data-fraction-list 0:0.5:0.8:0.9:0.95 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --inference-extra-args=\"--pair-unpaired-seqs-with-paired-family\" --final-plot-xvar bulk-data-fraction --perf-metrics all-pcfrac:f1:precision:sensitivity --make-hist-plots --use-val-cfgs --empty-bin-range 0:200 $common
# echo $bin --label test-antn --version imbal-v3 --n-replicates 2 --tree-imbalance-list None:0.04:0.07 --scratch-mute-freq-list 0.15 --n-leaves-list 50 --simu-extra-args=\"--flat-mute-freq --same-mute-freq-for-all-seqs\" --n-sim-events-list 50 --antn-perf --perf-metrics naive-hdist $common # NOTE also made :0.13:0.14:0.16
# echo $bin --label bcr-phylo-antn --version v0 --n-replicates 2 --obs-times-list 50:150:300 --n-sim-seqs-per-generation-list 15:45 --context-depend-list 1 --simu-type bcr-phylo --dont-observe-common-ancestors --antn-perf --perf-metrics naive-hdist $common
simu_extra="--simu-extra-args=\"--target-distance 10 --context-depend 1 --tdist-weights random-uniform --min-target-distance 2 --n-sim-seqs-per-generation 89 --parameter-variances n-sim-seqs-per-generation,23 --aa-paratope-positions N=60 --aa-struct-positions N=100 --leaf-sampling-scheme high-affinity\""
echo $bin --label gct-valid --version v5 --n-replicates 3 --obs-times-list 15:20:30:40:50 --n-sim-events-list 70 --carry-cap-list 1000 --simu-type bcr-phylo --perf-metrics coar:rf:mrca --calc-antns --inference-extra-args=\"--no-indels --simultaneous-true-clonal-seqs\" --plot-metrics tree-perf --final-plot-xvar obs-times --final-plot-xvar obs-times $simu_extra $common # NOTE also have sampling times 10, 100, 150 for most methods
simu_extra="--simu-extra-args=\"--target-distance 10 --context-depend 1 --tdist-weights random-uniform --min-target-distance 2 --n-sim-seqs-per-generation 89 --parameter-variances n-sim-seqs-per-generation,23 --aa-paratope-positions N=60 --aa-struct-positions N=100 --leaf-sampling-scheme high-affinity --n-naive-seq-copies 100\""
echo $bin --label gct-valid --version v6 --n-replicates 3 --obs-times-list 15:20:30:40:50 --n-sim-events-list 70 --carry-cap-list 1000 --simu-type bcr-phylo --perf-metrics coar:rf:mrca --calc-antns --inference-extra-args=\"--no-indels --simultaneous-true-clonal-seqs\" --plot-metrics tree-perf --final-plot-xvar obs-times --final-plot-xvar obs-times $simu_extra $common # NOTE also have sampling times 10, 100, 150 for most methods
# echo $bin --label gct-valid --version gcdyn-v1 --n-replicates 2 --simu-type gcdyn --n-sim-events-list 70 --obs-times-list 15:30 --perf-metrics coar:rf:mrca --calc-antns --inference-extra-args=\"--no-indels --simultaneous-true-clonal-seqs\" --plot-metrics tree-perf $common

# NOTE have to set --n-sub-procs to 1 for partition step, and re-set --n-sim-events-list for each --n-leaves value (500 leaves: 10 events, 100:50, 50:100):
Expand Down
9 changes: 6 additions & 3 deletions projects/cf-gcdyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
parser.add_argument('--n-trials-list')
parser.add_argument('--dl-bundle-size-list', help='size of bundles during dl inference (must be equal to or less than simulation bundle size)')
parser.add_argument('--epochs-list')
parser.add_argument('--batch-size-list')
parser.add_argument('--dropout-rate-list')
parser.add_argument('--learning-rate-list')
parser.add_argument('--ema-momentum-list')
Expand All @@ -66,11 +67,11 @@
# parser.add_argument('--gcreplay-data-dir', default='/fh/fast/matsen_e/%s/gcdyn/gcreplay-observed'%os.getenv('USER'))
parser.add_argument('--gcreplay-germline-dir', default='datascripts/meta/taraki-gctree-2021-10/germlines')
parser.add_argument('--dl-model-dir')
parser.add_argument('--data-dir', default='/fh/fast/matsen_e/data/taraki-gctree-2021-10/beast-processed-data/v0')
parser.add_argument('--data-dir', default='/fh/fast/matsen_e/data/taraki-gctree-2021-10/beast-processed-data/v4')
args = parser.parse_args()
args.scan_vars = {
'simu' : ['seed', 'birth-response', 'xscale-values', 'xshift-values', 'xscale-range', 'xshift-range', 'yscale-range', 'initial-birth-rate-range', 'carry-cap-range', 'init-population', 'time-to-sampling-range', 'n-seqs-range', 'n-trials', 'simu-bundle-size'],
'dl-infer' : ['dl-bundle-size', 'epochs', 'dropout-rate', 'learning-rate', 'ema-momentum', 'prebundle-layer-cfg', 'dont-scale-params', 'params-to-predict'],
'dl-infer' : ['dl-bundle-size', 'epochs', 'batch-size', 'dropout-rate', 'learning-rate', 'ema-momentum', 'prebundle-layer-cfg', 'dont-scale-params', 'params-to-predict'],
'data' : ['data-samples'],
}
args.scan_vars['group-expts'] = copy.deepcopy(args.scan_vars['dl-infer'])
Expand Down Expand Up @@ -161,7 +162,9 @@ def add_scan_args(cmd, skip_fcn=None): # using nargs='+' syntax for these rathe
if action in ['simu', 'check-dl', 'merge-simu']:
cmd = 'gcd-simulate' if action in ['simu', 'check-dl'] else 'python %s/scripts/%s.py' % (args.gcddir, 'combine-simu-files.py')
if action in ['simu', 'check-dl']:
cmd += ' --outdir %s --tree-inference-method iqtree --debug 1' % odr # --debug 1
cmd += ' --outdir %s --debug 1' % odr # --debug 1
# --make-plots
# --tree-inference-method iqtree
if args.test:
cmd += ' --test'
cmd = add_scan_args(cmd, skip_fcn=lambda v: v not in args.scan_vars[action] or action=='check-dl' and v not in check_dl_args)
Expand Down
8 changes: 4 additions & 4 deletions projects/replay-plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@

colors = {
'gct-data' : '#cc0000',
'gct-data-d15' : '#006600',
'gct-data-d20' : '#cc0000',
'gct-data-d15' : '#ea7979',
'gct-data-d20' : '#cc0000',
'gct-data-w10' : '#2b65ec',
'bst-data-d20' : '#006600',
'iqt-data' : '#a821c7',
Expand Down Expand Up @@ -441,7 +441,7 @@ def compare_plots(htype, plotdir, hists, labels, hname, diff_vals):
"""
parser = argparse.ArgumentParser(usage=ustr)
parser.add_argument('--gcreplay-dir', default='/fh/fast/matsen_e/data/taraki-gctree-2021-10/gcreplay', help='dir with gctree results on gcreplay data from which we read seqs, affinity, mutation info, and trees)')
parser.add_argument('--beast-dir', default='/fh/fast/matsen_e/data/taraki-gctree-2021-10/beast-processed-data/v3', help='dir with beast results on gcreplay data (same format as simulation)')
parser.add_argument('--beast-dir', default='/fh/fast/matsen_e/data/taraki-gctree-2021-10/beast-processed-data/v4', help='dir with beast results on gcreplay data (same format as simulation)')
parser.add_argument('--iqtree-data-dir', default='/fh/fast/matsen_e/data/taraki-gctree-2021-10/iqtree-processed-data/v1', help='dir with iqtree results on gcreplay data (from datascripts/taraki-gctree-2021-10/iqtree-run.py then projects/gcdyn/scripts/data-parse.py')
parser.add_argument('--simu-like-dir', help='Dir from which to read simulation results, either from gcdyn or bcr-phylo (if the latter, set --bcr-phylo)')
parser.add_argument('--outdir')
Expand All @@ -457,7 +457,7 @@ def compare_plots(htype, plotdir, hists, labels, hname, diff_vals):
parser.add_argument("--random-seed", type=int, default=1)
parser.add_argument("--default-naive-affinity", type=float, default=1./100, help="this is the default for bcr-phylo, so maybe be correct if we don\'t have an unmutated sequence")
args = parser.parse_args()
args.plot_labels = utils.get_arg_list(args.plot_labels, choices=['gct-data', 'gct-data-d15', 'gct-data-d20', 'gct-data-w10', 'bst-data-d20', 'iqt-data', 'iqt-data-d20', 'simu', 'simu-iqtree'])
args.plot_labels = utils.get_arg_list(args.plot_labels, choices=['gct-data', 'gct-data-d15', 'gct-data-d20', 'gct-data-w10', 'bst-data-d15', 'bst-data-d20', 'iqt-data', 'iqt-data-d20', 'simu', 'simu-iqtree'])
if len(args.plot_labels) > 3 and not args.write_legend_only_plots:
print(' note; setting --write-legend-only-plots since --plot-labels is longer than 3')
args.write_legend_only_plots = True
Expand Down
2 changes: 1 addition & 1 deletion python/datautils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def reverse_gcid(gcid):

# ----------------------------------------------------------------------------------------
def fix_btt_id(gcid):
mstr = utils.get_single_entry(re.findall('btt-PR-.-.', gcid))
mstr = utils.get_single_entry(re.findall('btt-PR-.-[0-9][0-9]*', gcid))
btstr, prstr, prn1, prn2 = mstr.split('-')
assert btstr == 'btt' and prstr == 'PR'
return gcid.replace(mstr, 'PR%d.%02d' % (int(prn1), int(prn2)))
Expand Down
2 changes: 1 addition & 1 deletion python/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from . import hutils
from .clusterpath import ClusterPath

# green dark red light blue light red sky blue pink/purple grey
# green dark red light blue light red sky blue pink/purple grey
default_colors = ['#006600', '#990012', '#2b65ec', '#cc0000', '#3399ff', '#a821c7', '#808080']
default_linewidths = ['5', '3', '2', '2', '2']
default_markersizes = ['20', '15', '8', '5', '5', '5']
Expand Down

0 comments on commit 286e981

Please sign in to comment.