Changeset: 3ed973da6129 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3ed973da6129 Modified Files: sql/backends/monet5/bam/bam_lib.c Branch: bamloader Log Message:
Make seq_char also work properly on insertions/deletions that occur in the CIGAR string. Furthermore, add some extra error checks diffs (114 lines): diff --git a/sql/backends/monet5/bam/bam_lib.c b/sql/backends/monet5/bam/bam_lib.c --- a/sql/backends/monet5/bam/bam_lib.c +++ b/sql/backends/monet5/bam/bam_lib.c @@ -146,7 +146,8 @@ seq_length(int * ret, str * cigar) int result = 0; str cigar_consumable = *cigar; - if (cigar_consumable[0] == '*' && cigar_consumable[1] == '\0') { + if (cigar_consumable[0] == '\0' || + (cigar_consumable[0] == '*' && cigar_consumable[1] == '\0')) { *ret = -1; return MAL_SUCCEED; } @@ -173,44 +174,58 @@ str seq_char(str * ret, int * ref_pos, str * alg_seq, int * alg_pos, str * alg_cigar) { str cigar_consumable = *alg_cigar; - int seq_pos = 0; - bit at_character = false; - int iterate_until = *ref_pos - *alg_pos; - str result; - - if((result = GDKmalloc(2 * sizeof(char))) == NULL) { - throw(MAL, "seq_char", MAL_MALLOC_FAIL); - } - result[1] = '\0'; + int seq_pos = -1; + int cur_ref_pos = *alg_pos - 1; - if (cigar_consumable[0] == '*' && cigar_consumable[1] == '\0') { - result[0] = '\0'; - *ret = result; + if (cigar_consumable[0] == '\0' || + (cigar_consumable[0] == '*' && cigar_consumable[1] == '\0')) { + *ret = GDKstrdup(str_nil); return MAL_SUCCEED; } - while(cigar_consumable[0] != '\0' && seq_pos < iterate_until) { + while(TRUE) { int cnt; char op; int nr_chars_read; + bit advance_ref_pos; + bit advance_seq_pos; if (sscanf (cigar_consumable, "%d%c%n", &cnt, &op, &nr_chars_read) != 2) throw(MAL, "seq_char", "Error parsing CIGAR string '%s'\n", *alg_cigar); - if (op == 'M' || op == 'D' || op == 'N' || op == '=' - || op == 'X') { + advance_ref_pos = (op == 'M' || op == 'D' || + op == 'N' || op == '=' || op == 'X'); + advance_seq_pos = (op == 'M' || op == 'I'); // TODO: Find out which chars advance the seq pos + if(advance_seq_pos) { seq_pos += cnt; - if(seq_pos > iterate_until) - seq_pos = iterate_until; - at_character = true; - } else { - at_character = false; + } + if (advance_ref_pos) { + cur_ref_pos += cnt; + if(cur_ref_pos >= *ref_pos) { + if(!advance_seq_pos) { + seq_pos = -1; + } else { + seq_pos -= (cur_ref_pos - *ref_pos); + } + break; + } } cigar_consumable += nr_chars_read; + if(cigar_consumable[0] == '\0') { + seq_pos = -1; + break; + } } - result[0] = at_character ? (*alg_seq)[seq_pos] : '\0'; - *ret = result; + if(seq_pos < 0 || seq_pos >= (int)strlen(*alg_seq)) { + *ret = GDKstrdup(str_nil); + return MAL_SUCCEED; + } + if(((*ret) = GDKmalloc(2 * sizeof(char))) == NULL) { + throw(MAL, "seq_char", MAL_MALLOC_FAIL); + } + (*ret)[0] = (*alg_seq)[seq_pos]; + (*ret)[1] = '\0'; return MAL_SUCCEED; } @@ -398,7 +413,9 @@ seq_char_bat(bat * ret, int * ref_pos, b throw(MAL, "seq_char_bat", RUNTIME_OBJECT_MISSING); if(BATcount(seqs) != BATcount(poss) || BATcount(seqs) != BATcount(cigars)) { - throw(MAL, "seq_char_bat", "Misalignment in input BATs"); + throw(MAL, "seq_char_bat", + "Misalignment in input BATs: "BUNFMT"/"BUNFMT"/"BUNFMT, + BATcount(poss), BATcount(seqs), BATcount(cigars)); } /* allocate result BAT */ @@ -428,7 +445,7 @@ seq_char_bat(bat * ret, int * ref_pos, b BBPreleaseref(result->batCacheid); return msg; } - BUNappend(result, (ptr) &r, FALSE); + BUNappend(result, (ptr) r, FALSE); ++seq; ++pos; ++cigar; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list