Merge branch '2to3' into main

psathyrella · Jan 30, 2024 · 049240d · 049240d
2 parents ef0e797 + 2f3f1c8
commit 049240d
Show file tree

Hide file tree

Showing 3,923 changed files with 816,147 additions and 802,624 deletions.
diff --git a/.gitignore b/.gitignore
@@ -53,3 +53,4 @@ packages/RPANDA/lib/
 /packages/soNNia/
 /projects/gcdyn/
 /projects/gcreplay/
+/.coverage
diff --git a/bin/add-chimeras.py b/bin/add-chimeras.py
@@ -1,18 +1,21 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+from __future__ import absolute_import, division, unicode_literals
+from __future__ import print_function
 import argparse
 import collections
 import numpy
 import random
 import sys
 import os
 import csv
+from io import open
 
 partis_dir = os.path.dirname(os.path.realpath(__file__)).replace('/bin', '')
 if not os.path.exists(partis_dir):
-    print 'WARNING current script dir %s doesn\'t exist, so python path may not be correctly set' % partis_dir
-sys.path.insert(1, partis_dir + '/python')
-import utils
-import seqfileopener
+    print('WARNING current script dir %s doesn\'t exist, so python path may not be correctly set' % partis_dir)
+sys.path.insert(1, partis_dir) # + '/python')
+import python.utils as utils
+import python.seqfileopener as seqfileopener
 
 parser = argparse.ArgumentParser()
 parser.add_argument('infile')
@@ -24,32 +27,32 @@
 
 input_info, _, _ = seqfileopener.read_sequence_file(args.infile, is_data=False)
 if len(input_info) < 50:
-    print '%s making chimeras with only %d sequences, and since we choose from among the existing sequence for templates this won\'t be very effective' % (utils.color('yellow', 'warning'), len(input_info))
+    print('%s making chimeras with only %d sequences, and since we choose from among the existing sequence for templates this won\'t be very effective' % (utils.color('yellow', 'warning'), len(input_info)))
 
 n_chimeric = 0
 outfo = collections.OrderedDict()
 for uid, seqfo in input_info.items():
     if args.debug:
-        print uid
+        print(uid)
 
     if numpy.random.uniform(0, 1) > args.chimera_freq:  # no chimeras for this sequence
         if args.debug:
-            print '        non-chimeric'
+            print('        non-chimeric')
         continue
 
     break_point = random.randint(args.min_chunk_len, len(seqfo['seqs'][0]) - args.min_chunk_len)
-    switch_uid = numpy.random.choice(input_info.keys())
+    switch_uid = numpy.random.choice(input_info)
     switch_seq = input_info[switch_uid]['seqs'][0][ : break_point]
 
     if args.debug:
-        print '    switching to %s at %d:' % (switch_uid, break_point)
-        print '          %s' % switch_seq
-        print '          %s%s' % (' ' * len(switch_seq), seqfo['seqs'][0][break_point : ])
+        print('    switching to %s at %d:' % (switch_uid, break_point))
+        print('          %s' % switch_seq)
+        print('          %s%s' % (' ' * len(switch_seq), seqfo['seqs'][0][break_point : ]))
 
     outfo[uid] = switch_seq + seqfo['seqs'][0][break_point : ]
     n_chimeric += 1
 
-print 'writing %d / %d chimeric sequences to %s' % (n_chimeric, len(input_info), args.outfile)
+print('writing %d / %d chimeric sequences to %s' % (n_chimeric, len(input_info), args.outfile))
 with open(args.outfile, 'w') as outfile:
     for uid, seq in outfo.items():
         outfile.write('>%s\n%s\n' % (uid, seq))
diff --git a/bin/add-seqs-to-outputs.py b/bin/add-seqs-to-outputs.py
@@ -1,4 +1,6 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+from __future__ import absolute_import, division, unicode_literals
+from __future__ import print_function
 import csv
 import os
 import sys
@@ -10,11 +12,11 @@
 
 # if you move this script, you'll need to change this method of getting the imports
 partis_dir = os.path.dirname(os.path.realpath(__file__)).replace('/bin', '')
-sys.path.insert(1, partis_dir + '/python')
+sys.path.insert(1, partis_dir) # + '/python')
 
-import utils
-import glutils
-from clusterpath import ClusterPath
+import python.utils as utils
+import python.glutils as glutils
+from python.clusterpath import ClusterPath
 
 dstr = """
 Add seqs from the fasta file --new-seq-file to an annotation from --partis-output-file.
@@ -34,18 +36,18 @@
 args = parser.parse_args()
 
 new_seqfos = utils.read_fastx(args.new_seq_file, sanitize_seqs=True)
-print '    read %d seqs from %s' % (len(new_seqfos), args.new_seq_file)
+print('    read %d seqs from %s' % (len(new_seqfos), args.new_seq_file))
 
 glfo = None
 if utils.getsuffix(args.partis_output_file) == '.csv':
-    print '    reading deprecated csv format, so need to read germline info from somewhere else, using --glfo-dir %s, hopefully it works' % args.glfo_dir
+    print('    reading deprecated csv format, so need to read germline info from somewhere else, using --glfo-dir %s, hopefully it works' % args.glfo_dir)
     glfo = glutils.read_glfo(args.glfo_dir, locus=args.locus)
 
 glfo, annotation_list, cpath = utils.read_output(args.partis_output_file, glfo=glfo, locus=args.locus)
 if args.partition_index is not None:
-    print '  using non-best partition index %d (best is %d)' % (args.partition_index, cpath.i_best)
+    print('  using non-best partition index %d (best is %d)' % (args.partition_index, cpath.i_best))
 partition = cpath.partitions[cpath.i_best if args.partition_index is None else args.partition_index]
-print '    read partition with %d clusters from %s' % (len(partition), args.partis_output_file)
+print('    read partition with %d clusters from %s' % (len(partition), args.partis_output_file))
 
 new_uids = set(sfo['name'] for sfo in new_seqfos)
 clusters_with_overlap = []
@@ -60,16 +62,16 @@
     # raise Exception('too many clusters %d in the partition overlaps with sequences from the fasta file' % len(clusters_with_overlap))
     clusters_with_overlap = sorted(clusters_with_overlap, key=lambda p: len(p[1]), reverse=True)
     ostrs = ['%d %d'%(len(c), len(o)) for c, o in clusters_with_overlap]
-    print '  %s more than one cluster overlaps with sequences from fasta file, just taking first one (size overlap): %s,  %s' % (utils.color('yellow', 'warning'), utils.color('red', ostrs[0]), ',  '.join(ostrs[1:]))
+    print('  %s more than one cluster overlaps with sequences from fasta file, just taking first one (size overlap): %s,  %s' % (utils.color('yellow', 'warning'), utils.color('red', ostrs[0]), ',  '.join(ostrs[1:])))
 old_cluster = clusters_with_overlap[0][0]
 
-print '    adding %d fasta sequences to cluster of size %d (%d fasta sequences were already in cluster)' % (len(new_uids - set(old_cluster)), len(old_cluster), len(new_uids & set(old_cluster)))
+print('    adding %d fasta sequences to cluster of size %d (%d fasta sequences were already in cluster)' % (len(new_uids - set(old_cluster)), len(old_cluster), len(new_uids & set(old_cluster))))
 sfos_to_add = [sfo for sfo in new_seqfos if sfo['name'] not in old_cluster]
 annotation_dict = utils.get_annotation_dict(annotation_list)
 annotation = annotation_dict[':'.join(old_cluster)]
 
 if args.n_test_subset_seqs is not None:
-    print '  taking only first %d seqs from fasta and annotation' % args.n_test_subset_seqs
+    print('  taking only first %d seqs from fasta and annotation' % args.n_test_subset_seqs)
     utils.restrict_to_iseqs(annotation, list(range(args.n_test_subset_seqs)), glfo)
     sfos_to_add = sfos_to_add[:args.n_test_subset_seqs]
 utils.add_seqs_to_line(annotation, sfos_to_add, glfo, debug=args.debug)