Skip to content

Commit

Permalink
multibam improvements up to 25 BAMs. List can also be comma separated…
Browse files Browse the repository at this point in the history
…. v0.70 ready to release. Happy Republic Day, Turkey.
  • Loading branch information
calkan committed Oct 29, 2018
1 parent e76664b commit 34b3fbd
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 82 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
TARDIS_VERSION := "0.7"
TARDIS_UPDATE := "October 24, 2018"
TARDIS_UPDATE := "October 29, 2018"
TARDIS_DEBUG := 0
BUILD_DATE := "$(shell date)"
CC=gcc
CFLAGS = -O3 -funroll-loops -g -I htslib -I vh -I sonic -DTARDIS_VERSION=\"$(TARDIS_VERSION)\" -DBUILD_DATE=\"$(BUILD_DATE)\" -DTARDIS_UPDATE=\"$(TARDIS_UPDATE)\" -DTARDIS_DEBUG=$(TARDIS_DEBUG)
LDFLAGS = htslib/libhts.a vh/libvh.a sonic/libsonic.a -lz -lm -lpthread -llzma -lbz2
LDFLAGS = htslib/libhts.a vh/libvh.a sonic/libsonic.a -lz -lm -lpthread -llzma -lbz2
SOURCES = tardis.c cmdline.c common.c processbam.c config.c processfq.c external.c variants.c splitread.c processrefgen.c bamonly.c free.c mappings.c
OBJECTS = $(SOURCES:.c=.o)
EXECUTABLE = tardis
Expand Down
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,32 @@ This command first runs mrFAST and creates the DIVET file that contains all poss
DIVET files should be inside the TARDIS directory or under the divet/ folder


Running TARDIS - Multiple BAM/CRAM files
========================================

You can run TARDIS on multiple input files to generate calls from a collection of samples. Note that we tested TARDIS with up to 25 input files, although hard limit is set to 100.

There are three different ways of passing multiple input files to TARDIS:

* Use --bamlist with a text file. The text file should include paths to input BAM/CRAM files; one file per line:
tardis --bamlist myinputs.txt --ref human_g1k_v37.fasta --sonic human_g1k_v37.sonic
--out multiplesamples

* Use -i (or --input) multiple times:
tardis -i myinput1.bam -i myinput2.bam -i myinput3.bam --ref human_g1k_v37.fasta --sonic human_g1k_v37.sonic
--out multiplesamples

* Use -i (or --input) with comma-separated BAM/CRAM list:
tardis -i myinput1.bam,myinput2.bam,myinput3.bam --ref human_g1k_v37.fasta --sonic human_g1k_v37.sonic
--out multiplesamples


All parameters
==============

Basic Parameters:
--bamlist [bamlist file] : A text file that lists input BAM files one file per line.
--input [BAM files] : Input files in sorted and indexed BAM format. You can pass multiple BAMs using multiple --input parameters.
--input/-i [BAM files] : Input files in sorted and indexed BAM format. You can pass multiple BAMs using multiple --input parameters.
--out [output prefix] : Prefix for the output file names.
--ref [reference genome] : Reference genome in FASTA format.
--sonic [sonic file] : SONIC file that contains assembly annotations.
Expand Down
35 changes: 33 additions & 2 deletions cmdline.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ int parse_command_line( int argc, char** argv, parameters* params)
fprintf( stderr, "Number of input BAMs exceeded the maximum value (%d). Exiting.\n", MAX_BAMS);
exit( EXIT_MAXBAMS);
}
set_str( &( params->bam_file_list[( params->num_bams)++]), optarg);
params->num_bams = parse_bam_array(params, optarg);
// set_str( &( params->bam_file_list[( params->num_bams)++]), optarg);
break;

case 'f':
Expand Down Expand Up @@ -357,7 +358,7 @@ void print_help( void)
fprintf( stdout, "Version %s\n\tLast update: %s, build date: %s\n\n", TARDIS_VERSION, TARDIS_UPDATE, BUILD_DATE);
fprintf( stdout, "\tBasic Parameters:\n\n");
fprintf( stdout, "\t--bamlist [bamlist file] : A text file that lists input BAM files one file per line.\n");
fprintf( stdout, "\t--input [BAM files] : Input files in sorted and indexed BAM format. You can pass multiple BAMs using multiple --input parameters.\n");
fprintf( stdout, "\t--input/-i [BAM files] : Input files in sorted and indexed BAM format. You can pass multiple BAMs using multiple --input parameters.\n");
fprintf( stdout, "\t--out [output prefix] : Prefix for the output file names.\n");
fprintf( stdout, "\t--ref [reference genome] : Reference genome in FASTA format.\n");
fprintf( stdout, "\t--sonic [sonic file] : SONIC file that contains assembly annotations.\n\n");
Expand Down Expand Up @@ -409,11 +410,41 @@ int parse_bam_list( parameters** params)
i = 0;
while( fscanf( bam_list, "%s\n", next_path) == 1)
{
if ( i == MAX_BAMS){
fprintf( stderr, "Number of input BAMs exceeded the maximum value (%d). Exiting.\n", MAX_BAMS);
exit( EXIT_MAXBAMS);
}
set_str( &( ( *params)->bam_file_list)[i], next_path);
i = i + 1;
}

fclose( bam_list);
fprintf( stderr, " %d input files found.\n", i);
return i;

}

int parse_bam_array( parameters* params, char *optarg)
{
/* when the BAM/CRAM list is comma separated */
char *next_bam;
int i;

i = params->num_bams;

next_bam = strtok(optarg, ",");
while( next_bam != NULL)
{
printf ("Input file %s\n", next_bam);
if ( i == MAX_BAMS){
fprintf( stderr, "Number of input BAMs exceeded the maximum value (%d). Exiting.\n", MAX_BAMS);
exit( EXIT_MAXBAMS);
}
set_str( &( params->bam_file_list[i]), next_bam);
i = i + 1;
next_bam = strtok(NULL, ",");
}

fprintf( stderr, " %d input files found.\n", i);
return i;
}
1 change: 1 addition & 0 deletions cmdline.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

int parse_command_line( int, char**, parameters*);
int parse_bam_list( parameters** params);
int parse_bam_array( parameters* params, char *optarg);
void print_help( void);

#endif
4 changes: 2 additions & 2 deletions common.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@
#define RETURN_SUCCESS 1
#define RETURN_ERROR 0

#define MAX_BAMS 256
#define MAX_BAMS 100
#define MAXLISTBRKPOINTINTR 10000000
#define MAX_SAMPLES 256
#define MAX_SAMPLES 100

/* Maximum filename length */
#define MAX_LENGTH 1024
Expand Down
71 changes: 0 additions & 71 deletions splitread.c
Original file line number Diff line number Diff line change
Expand Up @@ -355,25 +355,6 @@ posMapSoftClip *almostPerfect_match_seq_ref( parameters *params, int chr_index,
}


/* deprecated
while( ptr != NULL)
{
if( abs( ptr->pos - pos) < 100000)
{
dist = hammingDistance( &( params->ref_seq[ptr->pos]), str, strlen( str));
if( dist <= ( 0.05 * strlen( str)))
{
posMap[posMapSize] = ptr->pos;
orient[posMapSize] = FORWARD;
hammingDisMap[posMapSize] = dist;
posMapSize = posMapSize + 1;
}
}
ptr = ptr->next;
if( posMapSize > 10)
ptr = NULL;
}*/

if( posMapSize < 11)
{
strRev = ( char *)getMem( (str_length + 1) * sizeof( char));
Expand Down Expand Up @@ -429,58 +410,6 @@ posMapSoftClip *almostPerfect_match_seq_ref( parameters *params, int chr_index,
}


/* deprecated
if( posMapSize < 11)
{
strRev = ( char *)getMem( str_length + 1) * sizeof( char));
for( i = 0; i < strlen( str); i++)
{
if( str[i] == 'A')
strRev[str_length - i - 1] = 'T';
else if( str[i] == 'T')
strRev[str_length - i - 1] = 'A';
else if( str[i] == 'G')
strRev[str_length - i - 1] = 'C';
else if( str[i] == 'C')
strRev[str_length - i - 1] = 'G';
else if( str[i] == 'N')
strRev[str_length - i - 1] = 'N';
}
strRev[str_length] = '\0';
strncpy( seed, strRev, HASHKMERLEN);
seed[HASHKMERLEN] = '\0';
if (is_kmer_valid( seed)){
index = hash_function_ref( seed);
ptr = hash_table_SR[index];
}
else
ptr = NULL;
reverseMatch = 0;
while( ptr != NULL)
{
if( abs( ptr->pos - pos) < 100000)
{
dist = hammingDistance( &( ref_seq_per_chr[ptr->pos]), strRev, str_length);
if( dist <= ( 0.05 * strlen( strRev)))
{
posMap[posMapSize] = ptr->pos;
orient[posMapSize] = REVERSE;
hammingDisMap[posMapSize] = dist;
posMapSize = posMapSize + 1;
reverseMatch = 1;
}
}
ptr = ptr->next;
if( posMapSize > 10)
ptr = NULL;
}
freeMem( strRev, str_length+1);
}
*/

if( posMapSize < 12 && posMapSize > 0)
{
for( i = 0; i < posMapSize; i++)
Expand Down
2 changes: 1 addition & 1 deletion vh/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CC=gcc
CFLAGS = -O3 -fno-stack-protector -g -I ../htslib -mcmodel=medium
LDFLAGS = ../htslib/libhts.a -lz -lm -lpthread
LDFLAGS = ../htslib/libhts.a -lz -lm -lpthread
SOURCES = vh_conflict.c vh_buffer.c vh_setcover.c vh_main.c vh_logger.c vh_divethandler.c vh_hash.c vh_createMaxClusterDeletion.c vh_heap.c vh_maximalCluster.c vh_createMaxClusterInversion.c vh_createMaxClusterTDup.c vh_createMaxClusterInvDup.c vh_createMaxClusterInterDup.c vh_createMaxClusterMEI.c vh_createMaxClusterNUMT.c vh_createMaxClusterInsertion.c

OBJECTS = $(SOURCES:.c=.o)
Expand Down
2 changes: 0 additions & 2 deletions vh/vh_maximalCluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,11 +514,9 @@ int vh_outputCluster (ClustersFound * cluster, char SVtype)
list_of_written_reads[totalAddedToList][2] = (int) cluster->readMappingPtrArray[readMapCount]->editDistance;
totalAddedToList++;
if (totalAddedToList >= list_size) {
fprintf( stderr, "totaladdedto list: %d\n", totalAddedToList);
list_of_written_reads = recreate_list_of_reads(list_of_written_reads, list_size, list_size+MAXCLUSTERLIST);
list_size+=MAXCLUSTERLIST;
}
/* ARDA: there is no control here to check if totalAddedToList < MAXCLUSTERLIST. What happens if it exceeds? */
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion vh/vh_setcover.c
Original file line number Diff line number Diff line change
Expand Up @@ -1193,7 +1193,7 @@ void init(bam_info **in_bams, parameters *params)

multiLibs = ( multiLib *) getMem( sizeof( multiLib) * ( multiLibsCount + params->num_bams + 1));
multilib_array_size = multiLibsCount + params->num_bams + 1;
fprintf(stderr, "MultiLib size: %d\n", multilib_array_size);

multiLibsCount = 0;

for( j = 0; j < params->num_bams; j++)
Expand Down

0 comments on commit 34b3fbd

Please sign in to comment.