/sstring.h
C Header | 3435 lines | 2138 code | 313 blank | 984 comment | 417 complexity | 37c2a47faa0c3ff5df42ad761e2f7c0a MD5 | raw file
Possible License(s): GPL-3.0
Large files files are truncated, but you can click here to view the full file
- /*
- * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
- *
- * This file is part of Bowtie 2.
- *
- * Bowtie 2 is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Bowtie 2 is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
- */
- #ifndef SSTRING_H_
- #define SSTRING_H_
- #include <string.h>
- #include <iostream>
- #include "assert_helpers.h"
- #include "alphabet.h"
- #include "random_source.h"
- /**
- * Four kinds of strings defined here:
- *
- * SString:
- * A fixed-length string using heap memory with size set at construction time
- * or when install() member is called.
- *
- * S2bDnaString:
- * Like SString, but stores a list uint32_t words where each word is divided
- * into 16 2-bit slots interpreted as holding one A/C/G/T nucleotide each.
- *
- * TODO: S3bDnaString allowing N. S4bDnaString allowing nucleotide masks.
- *
- * SStringExpandable:
- * A string using heap memory where the size of the backing store is
- * automatically resized as needed. Supports operations like append, insert,
- * erase, etc.
- *
- * SStringFixed:
- * A fixed-length string using stack memory where size is set at compile
- * time.
- *
- * All string classes have some extra facilities that make it easy to print the
- * string, including when the string uses an encoded alphabet. See toZBuf()
- * and toZBufXForm().
- *
- * Global lt, eq, and gt template functions are supplied. They are capable of
- * doing lexicographical comparisons between any of the three categories of
- * strings defined here.
- */
- template<typename T>
- class Class_sstr_len {
- public:
- static inline size_t sstr_len(const T& s) {
- return s.length();
- }
- };
- template<unsigned N>
- class Class_sstr_len<const char[N]> {
- public:
- static inline size_t sstr_len(const char s[N]) {
- return strlen(s);
- }
- };
- template<>
- class Class_sstr_len<const char *> {
- public:
- static inline size_t sstr_len(const char *s) {
- return strlen(s);
- }
- };
- template<>
- class Class_sstr_len<const unsigned char *> {
- public:
- static inline size_t sstr_len(const unsigned char *s) {
- return strlen((const char *)s);
- }
- };
- template<typename T1, typename T2>
- static inline bool sstr_eq(const T1& s1, const T2& s2) {
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
- if(len1 != len2) return false;
- for(size_t i = 0; i < len1; i++) {
- if(s1[i] != s2[i]) return false;
- }
- return true;
- }
- template<typename T1, typename T2>
- static inline bool sstr_neq(const T1& s1, const T2& s2) {
- return !sstr_eq(s1, s2);
- }
- /**
- * Return true iff the given suffix of s1 is equal to the given suffix of s2 up
- * to upto characters.
- */
- template<typename T1, typename T2>
- static inline bool sstr_suf_upto_eq(
- const T1& s1, size_t suf1,
- const T2& s2, size_t suf2,
- size_t upto,
- bool endlt = true)
- {
- assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
- if(len1 > upto) len1 = upto;
- if(len2 > upto) len2 = upto;
- if(len1 != len2) return false;
- for(size_t i = 0; i < len1; i++) {
- if(s1[suf1+i] != s2[suf2+i]) {
- return false;
- }
- }
- return true;
- }
- /**
- * Return true iff the given suffix of s1 is equal to the given suffix of s2 up
- * to upto characters.
- */
- template<typename T1, typename T2>
- static inline bool sstr_suf_upto_neq(
- const T1& s1, size_t suf1,
- const T2& s2, size_t suf2,
- size_t upto,
- bool endlt = true)
- {
- return !sstr_suf_upto_eq(s1, suf1, s2, suf2, upto, endlt);
- }
- /**
- * Return true iff s1 is less than s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_lt(const T1& s1, const T2& s2, bool endlt = true) {
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[i] < s2[i]) {
- return true;
- } else if(s1[i] > s2[i]) {
- return false;
- }
- }
- if(len1 == len2) return false;
- return (len1 < len2) == endlt;
- }
- /**
- * Return true iff the given suffix of s1 is less than the given suffix of s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_suf_lt(
- const T1& s1, size_t suf1,
- const T2& s2, size_t suf2,
- bool endlt = true)
- {
- assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[suf1+i] < s2[suf2+i]) {
- return true;
- } else if(s1[suf1+i] > s2[suf2+i]) {
- return false;
- }
- }
- if(len1 == len2) return false;
- return (len1 < len2) == endlt;
- }
- /**
- * Return true iff the given suffix of s1 is less than the given suffix of s2.
- * Treat s1 and s2 as though they have lengths len1/len2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_suf_lt(
- const T1& s1, size_t suf1, size_t len1,
- const T2& s2, size_t suf2, size_t len2,
- bool endlt = true)
- {
- assert_leq(suf1, len1);
- assert_leq(suf2, len2);
- size_t left1 = len1 - suf1;
- size_t left2 = len2 - suf2;
- size_t minleft = (left1 < left2 ? left1 : left2);
- for(size_t i = 0; i < minleft; i++) {
- if(s1[suf1+i] < s2[suf2+i]) {
- return true;
- } else if(s1[suf1+i] > s2[suf2+i]) {
- return false;
- }
- }
- if(left1 == left2) return false;
- return (left1 < left2) == endlt;
- }
- /**
- * Return true iff the given suffix of s1 is less than the given suffix of s2
- * up to upto characters.
- */
- template<typename T1, typename T2>
- static inline bool sstr_suf_upto_lt(
- const T1& s1, size_t suf1,
- const T2& s2, size_t suf2,
- size_t upto,
- bool endlt = true)
- {
- assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
- if(len1 > upto) len1 = upto;
- if(len2 > upto) len2 = upto;
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[suf1+i] < s2[suf2+i]) {
- return true;
- } else if(s1[suf1+i] > s2[suf2+i]) {
- return false;
- }
- }
- if(len1 == len2) return false;
- return (len1 < len2) == endlt;
- }
- /**
- * Return true iff the given prefix of s1 is less than the given prefix of s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_pre_lt(
- const T1& s1, size_t pre1,
- const T2& s2, size_t pre2,
- bool endlt = true)
- {
- assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = pre1;
- size_t len2 = pre2;
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[i] < s2[i]) {
- return true;
- } else if(s1[i] > s2[i]) {
- return false;
- }
- }
- if(len1 == len2) return false;
- return (len1 < len2) == endlt;
- }
- /**
- * Return true iff s1 is less than or equal to s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_leq(const T1& s1, const T2& s2, bool endlt = true) {
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[i] < s2[i]) {
- return true;
- } else if(s1[i] > s2[i]) {
- return false;
- }
- }
- if(len1 == len2) return true;
- return (len1 < len2) == endlt;
- }
- /**
- * Return true iff the given suffix of s1 is less than or equal to the given
- * suffix of s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_suf_leq(
- const T1& s1, size_t suf1,
- const T2& s2, size_t suf2,
- bool endlt = true)
- {
- assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[suf1+i] < s2[suf2+i]) {
- return true;
- } else if(s1[suf1+i] > s2[suf2+i]) {
- return false;
- }
- }
- if(len1 == len2) return true;
- return (len1 < len2) == endlt;
- }
- /**
- * Return true iff the given prefix of s1 is less than or equal to the given
- * prefix of s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_pre_leq(
- const T1& s1, size_t pre1,
- const T2& s2, size_t pre2,
- bool endlt = true)
- {
- assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = pre1;
- size_t len2 = pre2;
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[i] < s2[i]) {
- return true;
- } else if(s1[i] > s2[i]) {
- return false;
- }
- }
- if(len1 == len2) return true;
- return (len1 < len2) == endlt;
- }
- /**
- * Return true iff s1 is greater than s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_gt(const T1& s1, const T2& s2, bool endlt = true) {
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[i] > s2[i]) {
- return true;
- } else if(s1[i] < s2[i]) {
- return false;
- }
- }
- if(len1 == len2) return false;
- return (len1 > len2) == endlt;
- }
- /**
- * Return true iff the given suffix of s1 is greater than the given suffix of
- * s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_suf_gt(
- const T1& s1, size_t suf1,
- const T2& s2, size_t suf2,
- bool endlt = true)
- {
- assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[suf1+i] > s2[suf2+i]) {
- return true;
- } else if(s1[suf1+i] < s2[suf2+i]) {
- return false;
- }
- }
- if(len1 == len2) return false;
- return (len1 > len2) == endlt;
- }
- /**
- * Return true iff the given prefix of s1 is greater than the given prefix of
- * s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_pre_gt(
- const T1& s1, size_t pre1,
- const T2& s2, size_t pre2,
- bool endlt = true)
- {
- assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = pre1;
- size_t len2 = pre2;
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[i] > s2[i]) {
- return true;
- } else if(s1[i] < s2[i]) {
- return false;
- }
- }
- if(len1 == len2) return false;
- return (len1 > len2) == endlt;
- }
- /**
- * Return true iff s1 is greater than or equal to s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_geq(const T1& s1, const T2& s2, bool endlt = true) {
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[i] > s2[i]) {
- return true;
- } else if(s1[i] < s2[i]) {
- return false;
- }
- }
- if(len1 == len2) return true;
- return (len1 > len2) == endlt;
- }
- /**
- * Return true iff the given suffix of s1 is greater than or equal to the given
- * suffix of s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_suf_geq(
- const T1& s1, size_t suf1,
- const T2& s2, size_t suf2,
- bool endlt = true)
- {
- assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
- size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[suf1+i] > s2[suf2+i]) {
- return true;
- } else if(s1[suf1+i] < s2[suf2+i]) {
- return false;
- }
- }
- if(len1 == len2) return true;
- return (len1 > len2) == endlt;
- }
- /**
- * Return true iff the given prefix of s1 is greater than or equal to the given
- * prefix of s2.
- */
- template<typename T1, typename T2>
- static inline bool sstr_pre_geq(
- const T1& s1, size_t pre1,
- const T2& s2, size_t pre2,
- bool endlt = true)
- {
- assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
- assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
- size_t len1 = pre1;
- size_t len2 = pre2;
- size_t minlen = (len1 < len2 ? len1 : len2);
- for(size_t i = 0; i < minlen; i++) {
- if(s1[i] > s2[i]) {
- return true;
- } else if(s1[i] < s2[i]) {
- return false;
- }
- }
- if(len1 == len2) return true;
- return (len1 > len2) == endlt;
- }
- template<typename T>
- static inline const char * sstr_to_cstr(const T& s) {
- return s.toZBuf();
- }
- template<>
- inline const char * sstr_to_cstr<std::basic_string<char> >(
- const std::basic_string<char>& s)
- {
- return s.c_str();
- }
- /**
- * Simple string class with backing memory whose size is managed by the user
- * using the constructor and install() member function. No behind-the-scenes
- * reallocation or copying takes place.
- */
- template<typename T>
- class SString {
-
- public:
- explicit SString() :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- { }
- explicit SString(size_t sz) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- resize(sz);
- }
- /**
- * Create an SStringExpandable from another SStringExpandable.
- */
- SString(const SString<T>& o) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- *this = o;
- }
- /**
- * Create an SStringExpandable from a std::basic_string of the
- * appropriate type.
- */
- explicit SString(const std::basic_string<T>& str) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- install(str.c_str(), str.length());
- }
- /**
- * Create an SStringExpandable from an array and size.
- */
- explicit SString(const T* b, size_t sz) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- install(b, sz);
- }
- /**
- * Create an SStringExpandable from a zero-terminated array.
- */
- explicit SString(const T* b) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- install(b, strlen(b));
- }
- /**
- * Destroy the expandable string object.
- */
- virtual ~SString() {
- if(cs_ != NULL) {
- delete[] cs_;
- cs_ = NULL;
- }
- if(printcs_ != NULL) {
- delete[] printcs_;
- printcs_ = NULL;
- }
- len_ = 0;
- }
- /**
- * Assignment to other SString.
- */
- SString<T>& operator=(const SString<T>& o) {
- install(o.cs_, o.len_);
- return *this;
- }
- /**
- * Assignment to other SString.
- */
- SString<T>& operator=(const std::basic_string<T>& o) {
- install(o);
- return *this;
- }
- /**
- * Resizes the string without preserving its contents.
- */
- void resize(size_t sz) {
- if(cs_ != NULL) {
- delete cs_;
- cs_ = NULL;
- }
- if(printcs_ != NULL) {
- delete printcs_;
- printcs_ = NULL;
- }
- if(sz != 0) {
- cs_ = new T[sz+1];
- }
- len_ = sz;
- }
- /**
- * Return ith character from the left of either the forward or the
- * reverse version of the read.
- */
- T windowGet(
- size_t i,
- bool fw,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_lt(i, len);
- assert_leq(len, len_ - depth);
- return fw ? cs_[depth+i] : cs_[depth+len-i-1];
- }
- /**
- * Return ith character from the left of either the forward or the
- * reverse-complement version of the read.
- */
- void windowGet(
- T& ret,
- bool fw,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_leq(len, len_ - depth);
- ret.resize(len);
- for(size_t i = 0; i < len; i++) {
- ret.set(fw ? cs_[depth+i] : cs_[depth+len-i-1], i);
- }
- }
- /**
- * Set character at index 'idx' to 'c'.
- */
- inline void set(int c, size_t idx) {
- assert_lt(idx, len_);
- cs_[idx] = c;
- }
- /**
- * Retrieve constant version of element i.
- */
- inline const T& operator[](size_t i) const {
- assert_lt(i, len_);
- return cs_[i];
- }
- /**
- * Retrieve mutable version of element i.
- */
- inline T& operator[](size_t i) {
- assert_lt(i, len_);
- return cs_[i];
- }
- /**
- * Retrieve constant version of element i.
- */
- inline const T& get(size_t i) const {
- assert_lt(i, len_);
- return cs_[i];
- }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string. memcpy is used, not
- * operator=.
- */
- virtual void install(const T* b, size_t sz) {
- if(sz == 0) return;
- resize(sz);
- memcpy(cs_, b, sz * sizeof(T));
- }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string. memcpy is used, not
- * operator=.
- */
- virtual void install(const std::basic_string<T>& b) {
- size_t sz = b.length();
- if(sz == 0) return;
- resize(sz);
- memcpy(cs_, b.c_str(), sz * sizeof(T));
- }
- /**
- * Copy all bytes from zero-terminated buffer 'b' into this string.
- */
- void install(const T* b) {
- install(b, strlen(b));
- }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string, reversing them
- * in the process.
- */
- void installReverse(const char* b, size_t sz) {
- if(sz == 0) return;
- resize(sz);
- for(size_t i = 0; i < sz; i++) {
- cs_[i] = b[sz-i-1];
- }
- len_ = sz;
- }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string, reversing them
- * in the process.
- */
- void installReverse(const SString<T>& b) {
- installReverse(b.cs_, b.len_);
- }
-
- /**
- * Return true iff the two strings are equal.
- */
- bool operator==(const SString<T>& o) {
- return sstr_eq(*this, o);
- }
- /**
- * Return true iff the two strings are not equal.
- */
- bool operator!=(const SString<T>& o) {
- return sstr_neq(*this, o);
- }
- /**
- * Return true iff this string is less than given string.
- */
- bool operator<(const SString<T>& o) {
- return sstr_lt(*this, o);
- }
- /**
- * Return true iff this string is greater than given string.
- */
- bool operator>(const SString<T>& o) {
- return sstr_gt(*this, o);
- }
- /**
- * Return true iff this string is less than or equal to given string.
- */
- bool operator<=(const SString<T>& o) {
- return sstr_leq(*this, o);
- }
- /**
- * Return true iff this string is greater than or equal to given string.
- */
- bool operator>=(const SString<T>& o) {
- return sstr_geq(*this, o);
- }
- /**
- * Reverse the buffer in place.
- */
- void reverse() {
- for(size_t i = 0; i < (len_ >> 1); i++) {
- T tmp = get(i);
- set(get(len_-i-1), i);
- set(tmp, len_-i-1);
- }
- }
- /**
- * Reverse a substring of the buffer in place.
- */
- void reverseWindow(size_t off, size_t len) {
- assert_leq(off, len_);
- assert_leq(off + len, len_);
- size_t mid = len >> 1;
- for(size_t i = 0; i < mid; i++) {
- T tmp = get(off+i);
- set(get(off+len-i-1), off+i);
- set(tmp, off+len-i-1);
- }
- }
- /**
- * Set the first len elements of the buffer to el.
- */
- void fill(size_t len, const T& el) {
- assert_leq(len, len_);
- for(size_t i = 0; i < len; i++) {
- set(el, i);
- }
- }
- /**
- * Set all elements of the buffer to el.
- */
- void fill(const T& el) {
- fill(len_, el);
- }
- /**
- * Return the length of the string.
- */
- inline size_t length() const { return len_; }
- /**
- * Clear the buffer.
- */
- void clear() { len_ = 0; }
- /**
- * Return true iff the buffer is empty.
- */
- inline bool empty() const { return len_ == 0; }
- /**
- * Put a terminator in the 'len_'th element and then return a
- * pointer to the buffer. Useful for printing.
- */
- const char* toZBufXForm(const char *xform) const {
- ASSERT_ONLY(size_t xformElts = strlen(xform));
- // Lazily allocate space for print buffer
- if(printcs_ == NULL) {
- const_cast<char*&>(printcs_) = new char[len_+1];
- }
- char* printcs = const_cast<char*>(printcs_);
- assert(printcs != NULL);
- for(size_t i = 0; i < len_; i++) {
- assert_lt(cs_[i], (int)xformElts);
- printcs[i] = xform[cs_[i]];
- }
- printcs[len_] = 0;
- return printcs_;
- }
- /**
- * Put a terminator in the 'len_'th element and then return a
- * pointer to the buffer. Useful for printing.
- */
- virtual const T* toZBuf() const {
- const_cast<T*>(cs_)[len_] = 0;
- return cs_;
- }
- /**
- * Return a const version of the raw buffer.
- */
- const T* buf() const { return cs_; }
- /**
- * Return a writeable version of the raw buffer.
- */
- T* wbuf() { return cs_; }
- protected:
- T *cs_; // +1 so that we have the option of dropping in a terminating "\0"
- char *printcs_; // +1 so that we have the option of dropping in a terminating "\0"
- size_t len_; // # elements
- };
- /**
- * Simple string class with backing memory whose size is managed by the user
- * using the constructor and install() member function. No behind-the-scenes
- * reallocation or copying takes place.
- */
- class S2bDnaString {
- public:
- explicit S2bDnaString() :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- { }
- explicit S2bDnaString(size_t sz) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- resize(sz);
- }
- /**
- * Copy another object of the same class.
- */
- S2bDnaString(const S2bDnaString& o) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- *this = o;
- }
- /**
- * Create an SStringExpandable from a std::basic_string of the
- * appropriate type.
- */
- explicit S2bDnaString(
- const std::basic_string<char>& str,
- bool chars = false,
- bool colors = false) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- if(chars) {
- if(colors) {
- installColors(str.c_str(), str.length());
- } else {
- installChars(str.c_str(), str.length());
- }
- } else {
- install(str.c_str(), str.length());
- }
- }
- /**
- * Create an SStringExpandable from an array and size.
- */
- explicit S2bDnaString(
- const char* b,
- size_t sz,
- bool chars = false,
- bool colors = false) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- if(chars) {
- if(colors) {
- installColors(b, sz);
- } else {
- installChars(b, sz);
- }
- } else {
- install(b, sz);
- }
- }
- /**
- * Create an SStringFixed from a zero-terminated string.
- */
- explicit S2bDnaString(
- const char* b,
- bool chars = false,
- bool colors = false) :
- cs_(NULL),
- printcs_(NULL),
- len_(0)
- {
- if(chars) {
- if(colors) {
- installColors(b, strlen(b));
- } else {
- installChars(b, strlen(b));
- }
- } else {
- install(b, strlen(b));
- }
- }
- /**
- * Destroy the expandable string object.
- */
- virtual ~S2bDnaString() {
- if(cs_ != NULL) {
- delete[] cs_;
- cs_ = NULL;
- }
- if(printcs_ != NULL) {
- delete[] printcs_;
- printcs_ = NULL;
- }
- len_ = 0;
- }
- /**
- * Assignment to other SString.
- */
- template<typename T>
- S2bDnaString& operator=(const T& o) {
- install(o.c_str(), o.length());
- return *this;
- }
- /**
- * Assignment from a std::basic_string
- */
- template<typename T>
- S2bDnaString& operator=(const std::basic_string<char>& o) {
- install(o);
- return *this;
- }
- /**
- * Resizes the string without preserving its contents.
- */
- void resize(size_t sz) {
- if(cs_ != NULL) {
- delete cs_;
- cs_ = NULL;
- }
- if(printcs_ != NULL) {
- delete printcs_;
- printcs_ = NULL;
- }
- len_ = sz;
- if(sz != 0) {
- cs_ = new uint32_t[nwords()];
- }
- }
- /**
- * Return DNA character corresponding to element 'idx'.
- */
- char toChar(size_t idx) const {
- int c = (int)get(idx);
- assert_range(0, 3, c);
- return "ACGT"[c];
- }
- /**
- * Return color character corresponding to element 'idx'.
- */
- char toColor(size_t idx) const {
- int c = (int)get(idx);
- assert_range(0, 3, c);
- return "0123"[c];
- }
- /**
- * Return ith character from the left of either the forward or the
- * reverse version of the read.
- */
- char windowGet(
- size_t i,
- bool fw,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_lt(i, len);
- assert_leq(len, len_ - depth);
- return fw ? get(depth+i) : get(depth+len-i-1);
- }
- /**
- * Return ith character from the left of either the forward or the
- * reverse-complement version of the read.
- */
- template<typename T>
- void windowGet(
- T& ret,
- bool fw,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_leq(len, len_ - depth);
- ret.resize(len);
- for(size_t i = 0; i < len; i++) {
- ret.set((fw ? get(depth+i) : get(depth+len-i-1)), i);
- }
- }
-
- /**
- * Return length in 32-bit words.
- */
- size_t nwords() const {
- return (len_ + 15) >> 4;
- }
- /**
- * Set character at index 'idx' to 'c'.
- */
- void set(int c, size_t idx) {
- assert_lt(idx, len_);
- assert_range(0, 3, c);
- size_t word = idx >> 4;
- size_t bpoff = (idx & 15) << 1;
- cs_[word] = cs_[word] & ~(uint32_t)(3 << bpoff);
- cs_[word] = cs_[word] | (uint32_t)(c << bpoff);
- }
- /**
- * Set character at index 'idx' to DNA char 'c'.
- */
- void setChar(int c, size_t idx) {
- assert_in(toupper(c), "ACGT");
- int bp = asc2dna[c];
- set(bp, idx);
- }
- /**
- * Set character at index 'idx' to color char 'c'.
- */
- void setColor(int c, size_t idx) {
- assert_in(toupper(c), "0123");
- int co = asc2col[c];
- set(co, idx);
- }
- /**
- * Set the ith 32-bit word to given word.
- */
- void setWord(uint32_t w, size_t i) {
- assert_lt(i, nwords());
- cs_[i] = w;
- }
- /**
- * Retrieve constant version of element i.
- */
- char operator[](size_t i) const {
- assert_lt(i, len_);
- return get(i);
- }
- /**
- * Retrieve constant version of element i.
- */
- char get(size_t i) const {
- assert_lt(i, len_);
- size_t word = i >> 4;
- size_t bpoff = (i & 15) << 1;
- return (char)((cs_[word] >> bpoff) & 3);
- }
- /**
- * Copy packed words from string 'b' into this packed string.
- */
- void install(const uint32_t* b, size_t sz) {
- if(sz == 0) return;
- resize(sz);
- memcpy(cs_, b, sizeof(uint32_t)*nwords());
- }
- /**
- * Copy 'sz' DNA characters encoded as integers from buffer 'b' into this
- * packed string.
- */
- void install(const char* b, size_t sz) {
- if(sz == 0) return;
- resize(sz);
- size_t wordi = 0;
- for(size_t i = 0; i < sz; i += 16) {
- uint32_t word = 0;
- for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) {
- uint32_t bp = (int)b[i+j];
- uint32_t shift = (uint32_t)j << 1;
- assert_range(0, 3, (int)bp);
- word |= (bp << shift);
- }
- cs_[wordi++] = word;
- }
- }
- /**
- * Copy 'sz' DNA characters from buffer 'b' into this packed string.
- */
- void installChars(const char* b, size_t sz) {
- if(sz == 0) return;
- resize(sz);
- size_t wordi = 0;
- for(size_t i = 0; i < sz; i += 16) {
- uint32_t word = 0;
- for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) {
- char c = b[i+j];
- assert_in(toupper(c), "ACGT");
- int bp = asc2dna[(int)c];
- assert_range(0, 3, (int)bp);
- uint32_t shift = (uint32_t)j << 1;
- word |= (bp << shift);
- }
- cs_[wordi++] = word;
- }
- }
- /**
- * Copy 'sz' color characters from buffer 'b' into this packed string.
- */
- void installColors(const char* b, size_t sz) {
- if(sz == 0) return;
- resize(sz);
- size_t wordi = 0;
- for(size_t i = 0; i < sz; i += 16) {
- uint32_t word = 0;
- for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) {
- char c = b[i+j];
- assert_in(c, "0123");
- int bp = asc2col[(int)c];
- assert_range(0, 3, (int)bp);
- uint32_t shift = (uint32_t)j << 1;
- word |= (bp << shift);
- }
- cs_[wordi++] = word;
- }
- }
- /**
- * Copy 'sz' DNA characters from buffer 'b' into this packed string.
- */
- void install(const char* b) {
- install(b, strlen(b));
- }
- /**
- * Copy 'sz' DNA characters from buffer 'b' into this packed string.
- */
- void installChars(const char* b) {
- installChars(b, strlen(b));
- }
- /**
- * Copy 'sz' DNA characters from buffer 'b' into this packed string.
- */
- void installColors(const char* b) {
- installColors(b, strlen(b));
- }
- /**
- * Copy 'sz' DNA characters from buffer 'b' into this packed string.
- */
- void install(const std::basic_string<char>& b) {
- install(b.c_str(), b.length());
- }
- /**
- * Copy 'sz' DNA characters from buffer 'b' into this packed string.
- */
- void installChars(const std::basic_string<char>& b) {
- installChars(b.c_str(), b.length());
- }
- /**
- * Copy 'sz' DNA characters from buffer 'b' into this packed string.
- */
- void installColors(const std::basic_string<char>& b) {
- installColors(b.c_str(), b.length());
- }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string, reversing them
- * in the process.
- */
- void installReverse(const char* b, size_t sz) {
- resize(sz);
- if(sz == 0) return;
- size_t wordi = 0;
- size_t bpi = 0;
- cs_[0] = 0;
- for(size_t i =sz; i > 0; i--) {
- assert_range(0, 3, (int)b[i-1]);
- cs_[wordi] |= ((int)b[i-1] << (bpi<<1));
- if(bpi == 15) {
- wordi++;
- cs_[wordi] = 0;
- bpi = 0;
- } else bpi++;
- }
- }
- /**
- * Copy all chars from buffer of DNA characters 'b' into this string,
- * reversing them in the process.
- */
- void installReverse(const char* b) {
- installReverse(b, strlen(b));
- }
- /**
- * Copy 'sz' bytes from buffer of DNA characters 'b' into this string,
- * reversing them in the process.
- */
- void installReverseChars(const char* b, size_t sz) {
- resize(sz);
- if(sz == 0) return;
- size_t wordi = 0;
- size_t bpi = 0;
- cs_[0] = 0;
- for(size_t i =sz; i > 0; i--) {
- char c = b[i-1];
- assert_in(toupper(c), "ACGT");
- int bp = asc2dna[(int)c];
- assert_range(0, 3, bp);
- cs_[wordi] |= (bp << (bpi<<1));
- if(bpi == 15) {
- wordi++;
- cs_[wordi] = 0;
- bpi = 0;
- } else bpi++;
- }
- }
- /**
- * Copy all chars from buffer of DNA characters 'b' into this string,
- * reversing them in the process.
- */
- void installReverseChars(const char* b) {
- installReverseChars(b, strlen(b));
- }
- /**
- * Copy 'sz' bytes from buffer of color characters 'b' into this string,
- * reversing them in the process.
- */
- void installReverseColors(const char* b, size_t sz) {
- resize(sz);
- if(sz == 0) return;
- size_t wordi = 0;
- size_t bpi = 0;
- cs_[0] = 0;
- for(size_t i =sz; i > 0; i--) {
- char c = b[i-1];
- assert_in(c, "0123");
- int bp = asc2col[(int)c];
- assert_range(0, 3, bp);
- cs_[wordi] |= (bp << (bpi<<1));
- if(bpi == 15) {
- wordi++;
- cs_[wordi] = 0;
- bpi = 0;
- } else bpi++;
- }
- }
- /**
- * Copy all chars from buffer of color characters 'b' into this string,
- * reversing them in the process.
- */
- void installReverseColors(const char* b) {
- installReverseColors(b, strlen(b));
- }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string, reversing them
- * in the process.
- */
- void installReverse(const S2bDnaString& b) {
- resize(b.len_);
- if(b.len_ == 0) return;
- size_t wordi = 0;
- size_t bpi = 0;
- size_t wordb = b.nwords()-1;
- size_t bpb = (b.len_-1) & 15;
- cs_[0] = 0;
- for(size_t i = b.len_; i > 0; i--) {
- int bbp = (int)((b[wordb] >> (bpb << 1)) & 3);
- assert_range(0, 3, bbp);
- cs_[wordi] |= (bbp << (bpi << 1));
- if(bpi == 15) {
- wordi++;
- cs_[wordi] = 0;
- bpi = 0;
- } else bpi++;
- if(bpb == 0) {
- wordb--;
- bpi = 15;
- } else bpi--;
- }
- }
- /**
- * Return true iff the two strings are equal.
- */
- bool operator==(const S2bDnaString& o) {
- return sstr_eq(*this, o);
- }
- /**
- * Return true iff the two strings are not equal.
- */
- bool operator!=(const S2bDnaString& o) {
- return sstr_neq(*this, o);
- }
- /**
- * Return true iff this string is less than given string.
- */
- bool operator<(const S2bDnaString& o) {
- return sstr_lt(*this, o);
- }
- /**
- * Return true iff this string is greater than given string.
- */
- bool operator>(const S2bDnaString& o) {
- return sstr_gt(*this, o);
- }
- /**
- * Return true iff this string is less than or equal to given string.
- */
- bool operator<=(const S2bDnaString& o) {
- return sstr_leq(*this, o);
- }
- /**
- * Return true iff this string is greater than or equal to given string.
- */
- bool operator>=(const S2bDnaString& o) {
- return sstr_geq(*this, o);
- }
- /**
- * Reverse the 2-bit encoded DNA string in-place.
- */
- void reverse() {
- if(len_ <= 1) return;
- size_t wordf = nwords()-1;
- size_t bpf = (len_-1) & 15;
- size_t wordi = 0;
- size_t bpi = 0;
- while(wordf > wordi || (wordf == wordi && bpf > bpi)) {
- int f = (cs_[wordf] >> (bpf << 1)) & 3;
- int i = (cs_[wordi] >> (bpi << 1)) & 3;
- cs_[wordf] &= ~(uint32_t)(3 << (bpf << 1));
- cs_[wordi] &= ~(uint32_t)(3 << (bpi << 1));
- cs_[wordf] |= (uint32_t)(i << (bpf << 1));
- cs_[wordi] |= (uint32_t)(f << (bpi << 1));
- if(bpf == 0) {
- bpf = 15;
- wordf--;
- } else bpf--;
- if(bpi == 15) {
- bpi = 0;
- wordi++;
- } else bpi++;
- }
- }
-
- /**
- * Reverse a substring of the buffer in place.
- */
- void reverseWindow(size_t off, size_t len) {
- assert_leq(off, len_);
- assert_leq(off+len, len_);
- if(len <= 1) return;
- size_t wordf = (off+len-1) >> 4;
- size_t bpf = (off+len-1) & 15;
- size_t wordi = (off ) >> 4;
- size_t bpi = (off ) & 15;
- while(wordf > wordi || (wordf == wordi && bpf > bpi)) {
- int f = (cs_[wordf] >> (bpf << 1)) & 3;
- int i = (cs_[wordi] >> (bpi << 1)) & 3;
- cs_[wordf] &= ~(uint32_t)(3 << (bpf << 1));
- cs_[wordi] &= ~(uint32_t)(3 << (bpi << 1));
- cs_[wordf] |= (uint32_t)(i << (bpf << 1));
- cs_[wordi] |= (uint32_t)(f << (bpi << 1));
- if(bpf == 0) {
- bpf = 15;
- wordf--;
- } else bpf--;
- if(bpi == 15) {
- bpi = 0;
- wordi++;
- } else bpi++;
- }
- }
- /**
- * Set the first len elements of the buffer to el.
- */
- void fill(size_t len, char el) {
- assert_leq(len, len_);
- assert_range(0, 3, (int)el);
- size_t word = 0;
- if(len > 32) {
- // Copy el throughout block
- uint32_t bl = (uint32_t)el;
- bl |= (bl << 2);
- bl |= (bl << 4);
- bl |= (bl << 8);
- bl |= (bl << 16);
- // Fill with blocks
- size_t blen = len >> 4;
- for(; word < blen; word++) {
- cs_[word] = bl;
- }
- len = len & 15;
- }
- size_t bp = 0;
- for(size_t i = 0; i < len; i++) {
- cs_[word] &= ~(uint32_t)(3 << (bp << 1));
- cs_[word] |= (uint32_t)(el << (bp << 1));
- if(bp == 15) {
- word++;
- bp = 0;
- } else bp++;
- }
- }
- /**
- * Set all elements of the buffer to el.
- */
- void fill(char el) {
- fill(len_, el);
- }
- /**
- * Return the ith character in the window defined by fw, color, depth and
- * len.
- */
- char windowGetDna(
- size_t i,
- bool fw,
- bool color,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_lt(i, len);
- assert_leq(len, len_ - depth);
- if(fw) {
- return get(depth+i);
- } else {
- return
- color ?
- get(depth+len-i-1) :
- compDna(get(depth+len-i-1));
- }
- }
- /**
- * Fill the given DNA buffer with the substring specified by fw,
- * color, depth and len.
- */
- template<typename T>
- void windowGetDna(
- T& buf,
- bool fw,
- bool color,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_leq(len, len_ - depth);
- buf.resize(len);
- for(size_t i = 0; i < len; i++) {
- buf.set(
- (fw ?
- get(depth+i) :
- (color ?
- get(depth+len-i-1) :
- compDna(get(depth+len-i-1)))), i);
- }
- }
- /**
- * Return the length of the string.
- */
- inline size_t length() const { return len_; }
- /**
- * Clear the buffer.
- */
- void clear() { len_ = 0; }
- /**
- * Return true iff the buffer is empty.
- */
- inline bool empty() const { return len_ == 0; }
- /**
- * Return a const version of the raw buffer.
- */
- const uint32_t* buf() const { return cs_; }
- /**
- * Return a writeable version of the raw buffer.
- */
- uint32_t* wbuf() { return cs_; }
- /**
- * Note: the size of the string once it's stored in the print buffer is 4
- * times as large as the string as stored in compact 2-bit-per-char words.
- */
- const char* toZBuf() const {
- if(printcs_ == NULL) {
- const_cast<char*&>(printcs_) = new char[len_+1];
- }
- char *printcs = const_cast<char*>(printcs_);
- size_t word = 0, bp = 0;
- for(size_t i = 0; i < len_; i++) {
- int c = (cs_[word] >> (bp << 1)) & 3;
- printcs[i] = "ACGT"[c];
- if(bp == 15) {
- word++;
- bp = 0;
- } else bp++;
- }
- printcs[len_] = '\0';
- return printcs_;
- }
- protected:
- uint32_t *cs_; // 2-bit packed words
- char *printcs_;
- size_t len_; // # elements
- };
- /**
- * Simple string class with backing memory that automatically expands as needed.
- */
- template<typename T, int S = 1024, int M = 2>
- class SStringExpandable {
- public:
- explicit SStringExpandable() :
- cs_(NULL),
- printcs_(NULL),
- len_(0),
- sz_(0)
- { }
- explicit SStringExpandable(size_t sz) :
- cs_(NULL),
- printcs_(NULL),
- len_(0),
- sz_(0)
- {
- expandNoCopy(sz);
- }
- /**
- * Create an SStringExpandable from another SStringExpandable.
- */
- SStringExpandable(const SStringExpandable<T, S>& o) :
- cs_(NULL),
- printcs_(NULL),
- len_(0),
- sz_(0)
- {
- *this = o;
- }
- /**
- * Create an SStringExpandable from a std::basic_string of the
- * appropriate type.
- */
- explicit SStringExpandable(const std::basic_string<T>& str) :
- cs_(NULL),
- printcs_(NULL),
- len_(0),
- sz_(0)
- {
- install(str.c_str(), str.length());
- }
- /**
- * Create an SStringExpandable from an array and size.
- */
- explicit SStringExpandable(const T* b, size_t sz) :
- cs_(NULL),
- printcs_(NULL),
- len_(0),
- sz_(0)
- {
- install(b, sz);
- }
- /**
- * Create an SStringExpandable from a zero-terminated array.
- */
- explicit SStringExpandable(const T* b) :
- cs_(NULL),
- printcs_(NULL),
- len_(0),
- sz_(0)
- {
- install(b, strlen(b));
- }
- /**
- * Destroy the expandable string object.
- */
- virtual ~SStringExpandable() {
- if(cs_ != NULL) {
- delete[] cs_;
- cs_ = NULL;
- }
- if(printcs_ != NULL) {
- delete[] printcs_;
- printcs_ = NULL;
- }
- sz_ = len_ = 0;
- }
- /**
- * Return ith character from the left of either the forward or the
- * reverse-complement version of the read.
- */
- T windowGet(
- size_t i,
- bool fw,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_lt(i, len);
- assert_leq(len, len_ - depth);
- return fw ? cs_[depth+i] : cs_[depth+len-i-1];
- }
- /**
- * Return ith character from the left of either the forward or the
- * reverse-complement version of the read.
- */
- void windowGet(
- T& ret,
- bool fw,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_leq(len, len_ - depth);
- for(size_t i = 0; i < len; i++) {
- ret.append(fw ? cs_[depth+i] : cs_[depth+len-i-1]);
- }
- }
- /**
- * Assignment to other SStringFixed.
- */
- SStringExpandable<T,S>& operator=(const SStringExpandable<T,S>& o) {
- install(o.cs_, o.len_);
- return *this;
- }
- /**
- * Assignment from a std::basic_string
- */
- SStringExpandable<T,S>& operator=(const std::basic_string<T>& o) {
- install(o.c_str(), o.length());
- return *this;
- }
- /**
- * Insert char c before position 'idx'; slide subsequent chars down.
- */
- void insert(const T& c, size_t idx) {
- assert_lt(idx, len_);
- if(sz_ < len_ + 1) expandCopy((len_ + 1 + S) * M);
- len_++;
- // Move everyone down by 1
- // len_ is the *new* length
- for(size_t i = len_; i > idx+1; i--) {
- cs_[i-1] = cs_[i-2];
- }
- cs_[idx] = c;
- }
- /**
- * Set character at index 'idx' to 'c'.
- */
- void set(int c, size_t idx) {
- assert_lt(idx, len_);
- cs_[idx] = c;
- }
- /**
- * Append char c.
- */
- void append(const T& c) {
- if(sz_ < len_ + 1) expandCopy((len_ + 1 + S) * M);
- cs_[len_++] = c;
- }
- /**
- * Delete char at position 'idx'; slide subsequent chars up.
- */
- void remove(size_t idx) {
- assert_lt(idx, len_);
- assert_gt(len_, 0);
- for(size_t i = idx; i < len_-1; i++) {
- cs_[i] = cs_[i+1];
- }
- len_--;
- }
- /**
- * Retrieve constant version of element i.
- */
- const T& operator[](size_t i) const {
- assert_lt(i, len_);
- return cs_[i];
- }
- /**
- * Retrieve mutable version of element i.
- */
- T& operator[](size_t i) {
- assert_lt(i, len_);
- return cs_[i];
- }
- /**
- * Retrieve constant version of element i.
- */
- const T& get(size_t i) const {
- assert_lt(i, len_);
- return cs_[i];
- }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string.
- */
- virtual void install(const T* b, size_t sz) {
- if(sz_ < sz) expandNoCopy((sz + S) * M);
- memcpy(cs_, b, sz * sizeof(T));
- len_ = sz;
- }
- /**
- * Copy all bytes from zero-terminated buffer 'b' into this string.
- */
- void install(const T* b) { install(b, strlen(b)); }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string, reversing them
- * in the process.
- */
- void installReverse(const char* b, size_t sz) {
- if(sz_ < sz) expandNoCopy((sz + S) * M);
- for(size_t i = 0; i < sz; i++) {
- cs_[i] = b[sz-i-1];
- }
- len_ = sz;
- }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string, reversing them
- * in the process.
- */
- void installReverse(const SStringExpandable<T, S>& b) {
- if(sz_ < b.len_) expandNoCopy((b.len_ + S) * M);
- for(size_t i = 0; i < b.len_; i++) {
- cs_[i] = b.cs_[b.len_ - i - 1];
- }
- len_ = b.len_;
- }
- /**
- * Return true iff the two strings are equal.
- */
- bool operator==(const SStringExpandable<T, S>& o) {
- return sstr_eq(*this, o);
- }
- /**
- * Return true iff the two strings are not equal.
- */
- bool operator!=(const SStringExpandable<T, S>& o) {
- return sstr_neq(*this, o);
- }
- /**
- * Return true iff this string is less than given string.
- */
- bool operator<(const SStringExpandable<T, S>& o) {
- return sstr_lt(*this, o);
- }
- /**
- * Return true iff this string is greater than given string.
- */
- bool operator>(const SStringExpandable<T, S>& o) {
- return sstr_gt(*this, o);
- }
- /**
- * Return true iff this string is less than or equal to given string.
- */
- bool operator<=(const SStringExpandable<T, S>& o) {
- return sstr_leq(*this, o);
- }
- /**
- * Return true iff this string is greater than or equal to given string.
- */
- bool operator>=(const SStringExpandable<T, S>& o) {
- return sstr_geq(*this, o);
- }
- /**
- * Reverse the buffer in place.
- */
- void reverse() {
- for(size_t i = 0; i < (len_ >> 1); i++) {
- T tmp = get(i);
- set(get(len_-i-1), i);
- set(tmp, len_-i-1);
- }
- }
- /**
- * Reverse a substring of the buffer in place.
- */
- void reverseWindow(size_t off, size_t len) {
- assert_leq(off, len_);
- assert_leq(off + len, len_);
- size_t mid = len >> 1;
- for(size_t i = 0; i < mid; i++) {
- T tmp = get(off+i);
- set(get(off+len-i-1), off+i);
- set(tmp, off+len-i-1);
- }
- }
- /**
- * Simply resize the buffer. If the buffer is resized to be
- * longer, the newly-added elements will contain garbage and should
- * be initialized immediately.
- */
- void resize(size_t len) {
- if(sz_ < len) expandCopy((len + S) * M);
- len_ = len;
- }
- /**
- * Simply resize the buffer. If the buffer is resized to be
- * longer, new elements will be initialized with 'el'.
- */
- void resize(size_t len, const T& el) {
- if(sz_ < len) expandCopy((len + S) * M);
- if(len > len_) {
- for(size_t i = len_; i < len; i++) {
- cs_[i] = el;
- }
- }
- len_ = len;
- }
- /**
- * Set the first len elements of the buffer to el.
- */
- void fill(size_t len, const T& el) {
- assert_leq(len, len_);
- for(size_t i = 0; i < len; i++) {
- cs_[i] = el;
- }
- }
- /**
- * Set all elements of the buffer to el.
- */
- void fill(const T& el) {
- fill(len_, el);
- }
- /**
- * Trim len characters from the beginning of the string.
- */
- void trimBegin(size_t len) {
- assert_leq(len, len_);
- if(len == len_) {
- len_ = 0; return;
- }
- for(size_t i = 0; i < len_-len; i++) {
- cs_[i] = cs_[i+len];
- }
- len_ -= len;
- }
- /**
- * Trim len characters from the end of the string.
- */
- void trimEnd(size_t len) {
- if(len >= len_) len_ = 0;
- else len_ -= len;
- }
- /**
- * Copy 'sz' bytes from buffer 'b' into this string.
- */
- void append(const T* b, size_t sz) {
- if(sz_ < len_ + sz) expandCopy((len_ + sz + S) * M);
- memcpy(cs_ + len_, b, sz * sizeof(T));
- len_ += sz;
- }
- /**
- * Copy bytes from zero-terminated buffer 'b' into this string.
- */
- void append(const T* b) {
- append(b, strlen(b));
- }
- /**
- * Return the length of the string.
- */
- size_t length() const { return len_; }
- /**
- * Clear the buffer.
- */
- void clear() { len_ = 0; }
- /**
- * Return true iff the buffer is empty.
- */
- bool empty() const { return len_ == 0; }
- /**
- * Put a terminator in the 'len_'th element and then return a
- * pointer to the buffer. Useful for printing.
- */
- const char* toZBufXForm(const char *xform) const {
- ASSERT_ONLY(size_t xformElts = strlen(xform));
- if(empty()) {
- const_cast<char&>(zero_) = 0;
- return &zero_;
- }
- char* printcs = const_cast<char*>(printcs_);
- // Lazily allocate space for print buffer
- for(size_t i = 0; i < len_; i++) {
- assert_lt(cs_[i], (int)xformElts);
- printcs[i] = xform[(int)cs_[i]];
- }
- printcs[len_] = 0;
- return printcs_;
- }
- /**
- * Put a terminator in the 'len_'th element and then return a
- * pointer to the buffer. Useful for printing.
- */
- virtual const T* toZBuf() const {
- if(empty()) {
- const_cast<T&>(zeroT_) = 0;
- return &zeroT_;
- }
- assert_leq(len_, sz_);
- const_cast<T*>(cs_)[len_] = 0;
- return cs_;
- }
- /**
- * Return true iff this DNA string matches the given nucleotide
- * character string.
- */
- bool eq(const char *str) const {
- const char *self = toZBuf();
- return strcmp(str, self) == 0;
- }
- /**
- * Return a const version of the raw buffer.
- */
- const T* buf() const { return cs_; }
- /**
- * Return a writeable version of the raw buffer.
- */
- T* wbuf() { return cs_; }
- protected:
- /**
- * Allocate new, bigger buffer and copy old contents into it. If
- * requested size can be accommodated by current buffer, do nothing.
- */
- void expandCopy(size_t sz) {
- if(sz_ >= sz) return; // done!
- T *tmp = new T[sz + 1];
- char *ptmp = new char[sz + 1];
- if(cs_ != NULL) {
- memcpy(tmp, cs_, sizeof(T)*len_);
- delete[] cs_;
- }
- if(printcs_ != NULL) {
- memcpy(ptmp, printcs_, sizeof(char)*len_);
- delete[] printcs_;
- }
- cs_ = tmp;
- printcs_ = ptmp;
- sz_ = sz;
- }
- /**
- * Allocate new, bigger buffer. If requested size can be
- * accommodated by current buffer, do nothing.
- */
- void expandNoCopy(size_t sz) {
- if(sz_ >= sz) return; // done!
- if(cs_ != NULL) delete[] cs_;
- if(printcs_ != NULL) delete[] printcs_;
- cs_ = new T[sz + 1];
- printcs_ = new char[sz + 1];
- sz_ = sz;
- }
- T *cs_; // +1 so that we have the option of dropping in a terminating "\0"
- char *printcs_; // +1 so that we have the option of dropping in a terminating "\0"
- char zero_; // 0 terminator for empty string
- T zeroT_; // 0 terminator for empty string
- size_t len_; // # filled-in elements
- size_t sz_; // size capacity of cs_
- };
- /**
- * Simple string class with in-object storage.
- *
- * All copies induced by, e.g., operator=, the copy constructor,
- * install() and append(), are shallow (using memcpy/sizeof). If deep
- * copies are needed, use a different class.
- *
- * Reading from an uninitialized element results in an assert as long
- * as NDEBUG is not defined. If NDEBUG is defined, the result is
- * undefined.
- */
- template<typename T, int S>
- class SStringFixed {
- public:
- explicit SStringFixed() : len_(0) { }
- /**
- * Create an SStringFixed from another SStringFixed.
- */
- SStringFixed(const SStringFixed<T, S>& o) {
- *this = o;
- }
- /**
- * Create an SStringFixed from another SStringFixed.
- */
- explicit SStringFixed(const std::basic_string<T>& str) {
- install(str.c_str(), str.length());
- }
- /**
- * Create an SStringFixed from an array and size.
- */
- explicit SStringFixed(const T* b, size_t sz) {
- install(b, sz);
- }
- /**
- * Create an SStringFixed from a zero-terminated string.
- */
- explicit SStringFixed(const T* b) {
- install(b, strlen(b));
- }
- virtual ~SStringFixed() { } // C++ needs this
- /**
- * Retrieve constant version of element i.
- */
- inline const T& operator[](size_t i) const {
- return get(i);
- }
- /**
- * Retrieve mutable version of element i.
- */
- inline T& operator[](size_t i) {
- return get(i);
- }
- /**
- * Retrieve constant version of element i.
- */
- inline const T& get(size_t i) const {
- assert_lt(i, len_);
- return cs_[i];
- }
- /**
- * Retrieve mutable version of element i.
- */
- inline T& get(size_t i) {
- assert_lt(i, len_);
- return cs_[i];
- }
- /**
- * Return ith character from the left of either the forward or the
- * reverse-complement version of the read.
- */
- T windowGet(
- size_t i,
- bool fw,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_lt(i, len);
- assert_leq(len, len_ - depth);
- return fw ? cs_[depth+i] : cs_[depth+len-i-1];
- }
- /**
- * Return ith character from the left of either the forward or the
- * reverse-complement version of the read.
- */
- void windowGet(
- T& ret,
- bool fw,
- size_t depth = 0,
- size_t len = 0) const
- {
- if(len == 0) len = len_;
- assert_leq(len, len_ - depth);
- for(size_t i = 0; i < len; i++) {
- ret.append(fw ? cs_[depth+i] : cs_[depth+len-i-1]);
- }
- }
- /**
- * Assignment to other SStringFixed.
- */
- SStringFixed<T,S>& operator=(const SStringFixed<T,S>& o) {
- install(o.cs_, o.len_);
- return *this;
- }
- /**
- * Assignment from a std::basic_string
- */
- SStringFixed<T,S>& operator=(const std::basic_string<T>& o) {
- install(o);
- return *this;
- }
- /**
- * Insert char c before position 'idx'; slide subsequent chars down.
- */
- void insert(const T& c, size_t idx) {
- assert_lt(len_, S);
- assert_lt(idx, len_);
- // Move everyone down by 1
- for(int i = len_; i > idx; i--) {
- cs_[i] = cs_[i-1];
- }
- cs_[idx] = c;
- len_++;
- }
- /**
- * Set character at index 'idx' to 'c'.
- */
- void set(int c, size_t idx) {
- assert_lt(idx, len_);
- cs_[idx] = c;
- }
- /**
- * Append char c.
- */
- void append(const T& c) {
- assert_lt(len_, S);
- cs_[len_++] = c;
- }
- /**
- * Delete char at position 'idx'; slide subsequent chars up.
- */
- void remove(size_t idx) {
- assert_lt(idx, len_);
- assert_gt(len_, 0);
- for(size_t i = idx; i < len_-1; i++) {
- cs_[i] = cs_…
Large files files are truncated, but you can click here to view the full file