Skip to content

Commit

Permalink
Release 1.14
Browse files Browse the repository at this point in the history
  • Loading branch information
daviesrob committed Oct 22, 2021
2 parents 911cb8e + b136e5d commit c37e041
Show file tree
Hide file tree
Showing 50 changed files with 2,017 additions and 188 deletions.
4 changes: 3 additions & 1 deletion .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ test_template: &TEST
test_script: |
make test-shlib-exports
make test
if test "x$DO_UNTRACKED_FILE_CHECK" = "xyes"; then make check-untracked ; fi
#--------------------------------------------------
# Task: linux builds.
Expand All @@ -68,6 +68,7 @@ gcc_task:
matrix:
- environment:
DO_MAINTAINER_CHECKS: yes
DO_UNTRACKED_FILE_CHECK: yes
USE_CONFIG: no
- environment:
USE_CONFIG: yes
Expand Down Expand Up @@ -96,6 +97,7 @@ ubuntu_task:
matrix:
- environment:
USE_CONFIG: yes
DO_UNTRACKED_FILE_CHECK: yes
- environment:
USE_CONFIG: yes
CFLAGS: -g -Wall -O3 -fsanitize=address
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ shlib-exports-*.txt
/test/hts_endian
/test/longrefs/*.tmp.*
/test/pileup
/test/pileup_mod
/test/plugins-dlhts
/test/sam
/test/tabix/*.tmp.*
Expand All @@ -61,6 +62,7 @@ shlib-exports-*.txt
/test/test_introspection
/test/test_kfunc
/test/test_kstring
/test/test_mod
/test/test-parse-reg
/test/test_realn
/test/test-regidx
Expand Down
26 changes: 22 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,14 @@ BUILT_TEST_PROGRAMS = \
test/fieldarith \
test/hfile \
test/pileup \
test/pileup_mod \
test/plugins-dlhts \
test/sam \
test/test_bgzf \
test/test_expr \
test/test_kfunc \
test/test_kstring \
test/test_mod \
test/test_realn \
test/test-regidx \
test/test_str2int \
Expand Down Expand Up @@ -129,8 +131,8 @@ LIBHTS_SOVERSION = 3
# is not strictly necessary and should be removed the next time
# LIBHTS_SOVERSION is bumped (see #1144 and
# https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23)
MACH_O_COMPATIBILITY_VERSION = 3.1.13
MACH_O_CURRENT_VERSION = 3.1.13
MACH_O_COMPATIBILITY_VERSION = 3.1.14
MACH_O_CURRENT_VERSION = 3.1.14

# $(NUMERIC_VERSION) is for items that must have a numeric X.Y.Z string
# even if this is a dirty or untagged Git working tree.
Expand Down Expand Up @@ -363,7 +365,7 @@ hfile_gcs.o hfile_gcs.pico: hfile_gcs.c config.h $(htslib_hts_h) $(htslib_kstrin
hfile_libcurl.o hfile_libcurl.pico: hfile_libcurl.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h)
hfile_s3_write.o hfile_s3_write.pico: hfile_s3_write.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_h)
hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h) $(htslib_kstring_h)
hts.o hts.pico: hts.c config.h $(htslib_hts_expr_h) $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h)
hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h)
hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(textutils_internal_h)
hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c
vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h)
Expand Down Expand Up @@ -471,6 +473,13 @@ maintainer-check:
test/maintainer/check_copyright.pl .
test/maintainer/check_spaces.pl .

# Look for untracked files in the git repository.
check-untracked:
@if test -e .git && git status --porcelain | grep '^\?'; then \
echo 'Untracked files detected (see above). Please either clean up, add to .gitignore, or for test output files consider naming them to match *.tmp or *.tmp.*' ; \
false ; \
fi

# Create a shorthand. We use $(SRC) or $(srcprefix) rather than $(srcdir)/
# for brevity in test and install rules, and so that build logs do not have
# ./ sprinkled throughout.
Expand All @@ -497,6 +506,7 @@ check test: $(BUILT_PROGRAMS) $(BUILT_TEST_PROGRAMS) $(BUILT_PLUGINS) $(HTSCODEC
cd test/tabix && ./test-tabix.sh tabix.tst
cd test/mpileup && ./test-pileup.sh mpileup.tst
cd test/fastq && ./test-fastq.sh
cd test/base_mods && ./base-mods.sh base-mods.tst
REF_PATH=: test/sam test/ce.fa test/faidx.fa test/fastqs.fq
test/test-regidx
cd test && REF_PATH=: ./test.pl $${TEST_OPTS:-}
Expand All @@ -516,6 +526,9 @@ test/hfile: test/hfile.o libhts.a
test/pileup: test/pileup.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/pileup.o libhts.a $(LIBS) -lpthread

test/pileup_mod: test/pileup_mod.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/pileup_mod.o libhts.a $(LIBS) -lpthread

test/plugins-dlhts: test/plugins-dlhts.o
$(CC) $(LDFLAGS) -o $@ test/plugins-dlhts.o $(LIBS)

Expand All @@ -534,6 +547,9 @@ test/test_kfunc: test/test_kfunc.o libhts.a
test/test_kstring: test/test_kstring.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_kstring.o libhts.a -lz $(LIBS) -lpthread

test/test_mod: test/test_mod.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_mod.o libhts.a $(LIBS) -lpthread

test/test_realn: test/test_realn.o libhts.a
$(CC) $(LDFLAGS) -o $@ test/test_realn.o libhts.a $(LIBS) -lpthread

Expand Down Expand Up @@ -622,12 +638,14 @@ test/fuzz/hts_open_fuzzer.o: test/fuzz/hts_open_fuzzer.c config.h $(htslib_hfile
test/fieldarith.o: test/fieldarith.c config.h $(htslib_sam_h)
test/hfile.o: test/hfile.c config.h $(htslib_hfile_h) $(htslib_hts_defs_h) $(htslib_kstring_h)
test/pileup.o: test/pileup.c config.h $(htslib_sam_h) $(htslib_kstring_h)
test/pileup_mod.o: test/pileup_mod.c config.h $(htslib_sam_h)
test/plugins-dlhts.o: test/plugins-dlhts.c config.h
test/sam.o: test/sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_faidx_h) $(htslib_khash_h) $(htslib_hts_log_h)
test/test_bgzf.o: test/test_bgzf.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hfile_internal_h)
test/test_expr.o: test/test_expr.c config.h $(htslib_hts_expr_h)
test/test_kfunc.o: test/test_kfunc.c config.h $(htslib_kfunc_h)
test/test_kstring.o: test/test_kstring.c config.h $(htslib_kstring_h)
test/test_mod.o: test/test_mod.c config.h $(htslib_sam_h)
test/test-parse-reg.o: test/test-parse-reg.c config.h $(htslib_hts_h) $(htslib_sam_h)
test/test_realn.o: test/test_realn.c config.h $(htslib_hts_h) $(htslib_sam_h) $(htslib_faidx_h)
test/test-regidx.o: test/test-regidx.c config.h $(htslib_kstring_h) $(htslib_regidx_h) $(htslib_hts_defs_h) $(textutils_internal_h)
Expand Down Expand Up @@ -790,7 +808,7 @@ distdir:
force:


.PHONY: all check clean distclean distdir force
.PHONY: all check check-untracked clean distclean distdir force
.PHONY: install install-pkgconfig installdirs lib-shared lib-static
.PHONY: maintainer-check maintainer-clean mostlyclean plugins
.PHONY: print-config print-version show-version tags
Expand Down
95 changes: 94 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,96 @@
Noteworthy changes in release 1.14 (22nd October 2021)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Features and Updates
--------------------

* Added a keep option to bgzip to leave the original file untouched. This
brings bgzip into line with gzip. (PR #1331, thanks to Alex Petty)

* "endpos" has been added to the filter language, giving the position
of the rightmost mapped base as measured by the CIGAR string. For
unmapped reads it is the same as "pos". (PR #1307, thanks to John Marshall)

* Interfaces have been added to interpret the new base modification tags
added to the SAMtags document in samtools/hts-specs#418. (PR #1132)

* New API functions hts_flush()/sam_flush()/bcf_flush() for flushing output
htsFile/samFile/vcfFile streams. (PR #1326, thanks to John Marshall)

* The synced_bcf_reader now sorts lines with symbolic alleles by END tag as
well as POS. (PR #1321)

* Added synced_bcf_reader options BCF_SR_REGIONS_OVERLAP and
BCF_SR_TARGETS_OVERLAP for better control of records that start outside
the desired region but overlap it are handled. Fixes samtools/bcftools#1420
and samtools/bcftools#1421 raised by John Marshall. (PR #1327)

* HTSlib will now accept long-cigar CG:B: tags made by htsjdk which don't
quite follow the specification properly (using signed values instead of
unsigned). Thanks to Colin Diesh for reporting an example file. (PR #1317)

* The warning printed when the BGZF reader finds a file with no EOF block
has been changed to be less alarming. Unfortunately some third-party
BGZF encoders don't write EOF blocks at the end of files. Thanks to
Keiran Raine for reporting an example file. (PR #1323)

* The FASTA and FASTQ readers get an option to skip over the first item on
the header line, and use the second as the read name. It allows the original
name to be restored on some of the fastq files served from the European
Nucleotide Archive (ENA). (PR #1325)

* HTSlib is now more strict when parsing the VCF samples line (beginning
#CHROM). It will only accept tabs between the mandatory field names and
sample names must be separated with tabs. (PR #1328)

* HTSlib will now warn if it looks like the header has been corrupted
by diagnostic messages from the program that made it. This can happen when
using `nohup`, which by default mixes stdout and stderr into the same
stream. (PR#1339, thanks to John Marshall)

* File format detection will now recognise signatures for XZ, Zstd and D4
files (note that HTSlib will not read them yet). (PR #1340, thanks to
John Marshall)

Build changes
-------------

These are compiler, configuration and makefile based changes.

* Some redundant tests have been removed from the test harness, speeding it up.
(PR #1308)

* The version.sh script now works better on shallow checkouts. (PR #1324)

* A check-untracked Makefile target has been added to catch untracked files
(mostly) left by the test harness. (PR #1324)

Bug fixes
---------

* Fixed a case where flushing the thread pool could very occasionally cause
a deadlock. (PR #1309)

* Fixed a bug where some CRAM files could fail to decode if the required_fields
option was in use. Thanks to Matt Sexton for reporting the issue.
(PR #1314, fixes samtools/samtools#1475)

* Fixed a regression where the S3 plugin could not read public files unless
you supplied some Amazon credentials. Thanks to Chris Saunders for reporting.
(PR #1332, fixes samtools/samtools#1491)

* Fixed a possible CRAM thread deadlock discovered by @ryancaicse.
(PR #1330, fixes #1329)

* Some set-but-unused variables have been removed. (PR #1334)

* Fixed a bug which prevented "flag.read2" from working in the filter
language unless it was at the end of the expression. Thanks to Vamsi Kodali
for reporting the issue. (PR #1342)

* Fixed a memory leak that could happen if CRAM fails to inflate a LZMA
block. (PR #1340, thanks to John Marshall)

Noteworthy changes in release 1.13 (7th July 2021)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down Expand Up @@ -519,7 +612,7 @@ Bug fixes

* Fixed potential integer overflows in the VCF parser and ensured that
the total length of FORMAT fields cannot go over 2Gbytes. [fuzz] (#1044,
#1104)
#1104; latter is CVE-2020-36403 affecting HTSlib versions 1.10 to 1.10.2)

* Download index files atomically in idx_test_and_fetch(). This prevents
corruption when running parallel jobs on S3 files. Thanks to John Marshall.
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ HTSlib implements a generalized BAM index, with file extension `.csi`
(coordinate-sorted index). The HTSlib file reader first looks for the new index
and then for the old if the new index is absent.

This project also includes the popular tabix indexer, which indexes both `.tbi`
This project also includes the popular tabix indexer, which creates both `.tbi`
and `.csi` formats, and the bgzip compression utility.

[1]: http://samtools.github.io/hts-specs/
Expand Down
26 changes: 24 additions & 2 deletions bcf_sr_sort.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright (C) 2017-2019 Genome Research Ltd.
Copyright (C) 2017-2021 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -259,6 +259,7 @@ static int cmpstringp(const void *p1, const void *p2)
return strcmp(* (char * const *) p1, * (char * const *) p2);
}

#define DEBUG_VSETS 0
#if DEBUG_VSETS
void debug_vsets(sr_sort_t *srt)
{
Expand All @@ -280,6 +281,7 @@ void debug_vsets(sr_sort_t *srt)
}
#endif

#define DEBUG_VBUF 0
#if DEBUG_VBUF
void debug_vbuf(sr_sort_t *srt)
{
Expand Down Expand Up @@ -380,13 +382,33 @@ static int bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,

if ( srt->str.l ) kputc(';',&srt->str);
srt->off[srt->noff++] = srt->str.l;
size_t beg = srt->str.l;
size_t beg = srt->str.l;
int end_pos = -1;
for (ivar=1; ivar<line->n_allele; ivar++)
{
if ( ivar>1 ) kputc(',',&srt->str);
kputs(line->d.allele[0],&srt->str);
kputc('>',&srt->str);
kputs(line->d.allele[ivar],&srt->str);

// If symbolic allele, check also the END tag in case there are multiple events,
// such as <DEL>s, starting at the same positions
if ( line->d.allele[ivar][0]=='<' )
{
if ( end_pos==-1 )
{
bcf_info_t *end_info = bcf_get_info(reader->header,line,"END");
if ( end_info )
end_pos = (int)end_info->v1.i; // this is only to create a unique id, we don't mind a potential int64 overflow
else
end_pos = 0;
}
if ( end_pos )
{
kputc('/',&srt->str);
kputw(end_pos, &srt->str);
}
}
}
if ( line->n_allele==1 )
{
Expand Down
8 changes: 3 additions & 5 deletions bgzf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ int bgzf_read_block(BGZF *fp)
if (j->hit_eof) {
if (!fp->last_block_eof && !fp->no_eof_block) {
fp->no_eof_block = 1;
hts_log_warning("EOF marker is absent. The input is probably truncated");
hts_log_warning("EOF marker is absent. The input may be truncated");
}
fp->mt->hit_eof = 1;
}
Expand Down Expand Up @@ -1124,7 +1124,7 @@ int bgzf_read_block(BGZF *fp)
if (count == 0) { // no data read
if (!fp->last_block_eof && !fp->no_eof_block && !fp->is_gzip) {
fp->no_eof_block = 1;
hts_log_warning("EOF marker is absent. The input is probably truncated");
hts_log_warning("EOF marker is absent. The input may be truncated");
}
fp->block_length = 0;
return 0;
Expand Down Expand Up @@ -1467,7 +1467,7 @@ static void *bgzf_mt_writer(void *vp) {
int bgzf_mt_read_block(BGZF *fp, bgzf_job *j)
{
uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block;
int count, size = 0, block_length, remaining;
int count, block_length, remaining;

// NOTE: Guaranteed to be compressed as we block multi-threading in
// uncompressed mode. However it may be gzip compression instead
Expand Down Expand Up @@ -1496,7 +1496,6 @@ int bgzf_mt_read_block(BGZF *fp, bgzf_job *j)
if (count != sizeof(header)) // no data read
return -1;

size = count;
block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1"
if (block_length < BLOCK_HEADER_LENGTH) {
j->errcode |= BGZF_ERR_HEADER;
Expand All @@ -1510,7 +1509,6 @@ int bgzf_mt_read_block(BGZF *fp, bgzf_job *j)
j->errcode |= BGZF_ERR_IO;
return -1;
}
size += count;
j->comp_len = block_length;
j->uncomp_len = BGZF_MAX_BLOCK_SIZE;
j->block_address = block_address;
Expand Down
Loading

0 comments on commit c37e041

Please sign in to comment.