Skip to content

Commit

Permalink
Merge pull request #7 from broadinstitute/vrr_make_index
Browse files Browse the repository at this point in the history
Adds a method to create the index files given a fasta file.
  • Loading branch information
Valentin Ruano Rubio authored Jul 31, 2017
2 parents 7e4b029 + 09aa976 commit 7c053c9
Show file tree
Hide file tree
Showing 14 changed files with 716 additions and 33 deletions.
5 changes: 4 additions & 1 deletion src/main/c/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ JNI_BASE_NAME=org_broadinstitute_hellbender_utils_bwa_BwaMemIndex

all: libbwa.$(LIB_EXT)

libbwa.$(LIB_EXT): $(JNI_BASE_NAME).o jnibwa.o bwa/libbwa.a
libbwa.$(LIB_EXT): $(JNI_BASE_NAME).o jnibwa.o bwtidxbuild.o bwa/libbwa.a
$(CC) -ggdb -dynamiclib -shared -o $@ $^ -lm -lz -lpthread

bwa:
git clone https://github.com/lh3/bwa && cd bwa && git checkout $(BWA_MEM_COMMIT) && echo '#define BWA_COMMIT "'$(BWA_MEM_COMMIT)'"' > bwa_commit.h
sed -i .bak 's/\(LOBJS=.*\)/\1 $$(AOBJS)/g' bwa/Makefile

bwa/libbwa.a: bwa
$(MAKE) CFLAGS="$(CFLAGS)" -C bwa libbwa.a
Expand All @@ -32,6 +33,8 @@ $(JNI_BASE_NAME).o: $(JNI_BASE_NAME).c jnibwa.h bwa

jnibwa.o: jnibwa.c jnibwa.h bwa

bwtidxbuild.o: bwtidxbuild.c bwa

clean:
rm -rf bwa *.o *.$(LIB_EXT)

Expand Down
104 changes: 104 additions & 0 deletions src/main/c/bwtidxbuild.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
// Commented out as no needed unless we debug elapse time (which is itself commented out).
//#include <time.h>
#include <unistd.h>
#include <zlib.h>
#include <errno.h>

#include "bwa/kstring.h"
#include "bwa/bntseq.h"
#include "bwa/bwt.h"
#include "bwa/utils.h"
#include "bwa/rle.h"
#include "bwa/rope.h"

int bwt_idx_build(const char *fasta, const char *prefix, const char *algo_type_str);

// This code is copy&paste&skimming of bwtindex.c bwa_index(int,char**)
// We copied it here to avoid parameter parsing which is amonst other thing non-thread
// safe (uses global variables).
//
// In a more up-to-date version in bwa main branch the bwt_idx_build method is exported by libbwa
// and that would be the natural choice. However in the Apache2 branch that is not
// available yet, so we need to do this hack.
int bwt_idx_build(const char *fasta, const char *prefix, const char *algo_type_str)
{
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
extern bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is);

char *str, *str2, *str3;
int algo_type = 0;
// Commented out clocking and debug print outs, but kept them just in
// case they are needed at some point.
// clock_t t;
int64_t l_pac;

if (strcmp(algo_type_str, "rb2") == 0) algo_type = 1;
else if (strcmp(algo_type_str, "is") == 0) algo_type = 3;
else if (strcmp(algo_type_str, "auto") == 0) algo_type = 0;
else err_fatal(__func__, "unknown algorithm: '%s'.", algo_type_str);

str = (char*)calloc(strlen(prefix) + 10, 1);
str2 = (char*)calloc(strlen(prefix) + 10, 1);
str3 = (char*)calloc(strlen(prefix) + 10, 1);

{ // nucleotide indexing
gzFile fp = xzopen(fasta, "r");
// t = clock();
// fprintf(stderr, "[bwa_index] Pack FASTA... ");
l_pac = bns_fasta2bntseq(fp, prefix, 0);
// fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
err_gzclose(fp);
}
if (algo_type == 0) algo_type = l_pac > 50000000? 1 : 3; // set the algorithm for generating BWT
{
bwt_t *bwt;
strcpy(str, prefix); strcat(str, ".pac");
strcpy(str2, prefix); strcat(str2, ".bwt");
// t = clock();
// fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
bwt = bwt_pac2bwt(str, algo_type == 3);
bwt_dump_bwt(str2, bwt);
bwt_destroy(bwt);
// fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
}
{
bwt_t *bwt;
strcpy(str, prefix); strcat(str, ".bwt");
// t = clock();
// fprintf(stderr, "[bwa_index] Update BWT... ");
bwt = bwt_restore_bwt(str);
bwt_bwtupdate_core(bwt);
bwt_dump_bwt(str, bwt);
bwt_destroy(bwt);
// fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
}
{
gzFile fp = xzopen(fasta, "r");
// t = clock();
// fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
l_pac = bns_fasta2bntseq(fp, prefix, 1);
// fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
err_gzclose(fp);
}
{
bwt_t *bwt;
strcpy(str, prefix); strcat(str, ".bwt");
strcpy(str3, prefix); strcat(str3, ".sa");
// t = clock();
// fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
bwt = bwt_restore_bwt(str);
bwt_cal_sa(bwt, 32);
bwt_dump_sa(str3, bwt);
bwt_destroy(bwt);
// fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
}
free(str3); free(str2); free(str);
return 0;
}
11 changes: 11 additions & 0 deletions src/main/c/jnibwa.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,26 @@
* jnibwa.c
*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
#include <zlib.h>
#include <errno.h>

#include "jnibwa.h"
#include "bwa/kstring.h"
#include "bwa/bntseq.h"
#include "bwa/bwt.h"
#include "bwa/utils.h"
#include "bwa/rle.h"
#include "bwa/rope.h"


static inline void kput32( int32_t val, kstring_t* str ) {
kputsn((char*)&val, sizeof(int32_t), str);
Expand Down
2 changes: 2 additions & 0 deletions src/main/c/jnibwa.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

#include "bwa/bwamem.h"


int jnibwa_createReferenceIndex( char const* refFileName, char const* indexPrefix, char const* algoName);
int jnibwa_createIndexFile( char const* refName, char const* imgSuffix );
bwaidx_t* jnibwa_openIndex( int fd );
int jnibwa_destroyIndex( bwaidx_t* pIdx );
Expand Down
38 changes: 29 additions & 9 deletions src/main/c/org_broadinstitute_hellbender_utils_bwa_BwaMemIndex.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,42 @@
#include "jnibwa.h"
#include "bwa/bwa_commit.h"


char * jstring_to_chars(JNIEnv* env, jstring in) {
const char* tmp = (*env)->GetStringUTFChars(env, in, 0);
char* res = strdup(tmp);
(*env)->ReleaseStringUTFChars(env, in, tmp);
return res;
}


JNIEXPORT jboolean JNICALL
Java_org_broadinstitute_hellbender_utils_bwa_BwaMemIndex_createReferenceIndex( JNIEnv* env, jclass cls, jstring jReferenceFileName, jstring jIndexPrefix, jstring jAlgoName) {

extern int bwt_idx_build(const char*, const char*, const char*);

char *referenceFileName = jstring_to_chars(env, jReferenceFileName);
char *indexPrefix = jstring_to_chars(env, jIndexPrefix);
char *algoName = jstring_to_chars(env, jAlgoName);
jboolean res = !bwt_idx_build( referenceFileName, indexPrefix, algoName);
free(referenceFileName); free(indexPrefix); free(algoName);
return res;
}

JNIEXPORT jboolean JNICALL
Java_org_broadinstitute_hellbender_utils_bwa_BwaMemIndex_createIndexImageFile( JNIEnv* env, jclass cls, jstring referencePrefix, jstring imageFileName ) {
char const* referencePrefixChars = (*env)->GetStringUTFChars(env, referencePrefix, 0);
char const* refName = strdup(referencePrefixChars);
(*env)->ReleaseStringUTFChars(env, referencePrefix, referencePrefixChars);
char const* imageFileNameChars = (*env)->GetStringUTFChars(env, imageFileName, 0);
char const* imgName = strdup(imageFileNameChars);
(*env)->ReleaseStringUTFChars(env, imageFileName, imageFileNameChars);
return !jnibwa_createIndexFile( refName, imgName );
char *refName = jstring_to_chars(env, referencePrefix);
char *imgName = jstring_to_chars(env, imageFileName);
jboolean res = !jnibwa_createIndexFile( refName, imgName );
free(refName); free(imgName);
return res;
}

JNIEXPORT jlong JNICALL
Java_org_broadinstitute_hellbender_utils_bwa_BwaMemIndex_openIndex( JNIEnv* env, jclass cls, jstring memImgFilename ) {
char const* fname = (*env)->GetStringUTFChars(env, memImgFilename, 0);
char *fname = jstring_to_chars(env, memImgFilename);
int fd = open(fname, O_RDONLY);
(*env)->ReleaseStringUTFChars(env, memImgFilename, fname);
free(fname);
if ( fd == -1 ) return 0;
return (jlong)jnibwa_openIndex(fd);
}
Expand Down
Loading

0 comments on commit 7c053c9

Please sign in to comment.