Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SuffixArraySeqan.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2015.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Clemens Groepl,Andreas Bertsch$
32 // $Authors: Chris Bauer $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
36 #define OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
37 
42 
43 #include <OpenMS/CONCEPT/Types.h>
44 #include <OpenMS/OpenMSConfig.h>
45 
46 #include <functional>
47 #include <vector>
48 
49 namespace OpenMS
50 {
55  public std::binary_function<double, double, bool>
56  {
61  explicit FloatsWithTolLess(const double& t) :
62  tol_(t) {}
67  tol_(rhs.tol_) {}
68 
75  bool operator()(double f1, double f2) const
76  {
77  return f1 < (f2 - tol_);
78  }
79 
80 protected:
81  double const& tol_;
82  };
83 
88  struct IntsInRangeLess :
89  public std::binary_function<double, double, bool>
90  {
95  IntsInRangeLess(const int& s, const int& e) :
96  start_(s), end_(e) {}
101  start_(source.start_), end_(source.end_) {}
102 
109  bool operator()(int f1, int f2) const
110  {
111  //cout<<"f1:"<<f1<<" f2:"<<f2<<" start:"<<start_<< " end:" << end_<<endl;
112  return (f2 == end_) ? f1 <= f2 - start_ : f1 < f2;
113  }
114 
115 protected:
116  int const& start_;
117  int const& end_;
118  };
119 
120 
128  class OPENMS_DLLAPI SuffixArraySeqan :
129  public SuffixArray
130  , public WeightWrapper
131  {
132 
133  typedef seqan::TopDown<seqan::ParentLinks<> > TIterSpec;
134  typedef seqan::Index<seqan::String<char>, seqan::IndexEsa<TIterSpec> > TIndex;
135  typedef seqan::Iter<TIndex, seqan::VSTree<TIterSpec> > TIter;
136 
137  // TODO ??? was: typedef seqan::Index<seqan::String<char>, seqan::Index_ESA<seqan::TopDown<seqan::ParentLinks<seqan::Preorder> > > > TIndex;
138  // @deprecated Deprecated in OpenMS 2.0 - only used by SuffixArrayPeptideFinder - used by PILISIdentification (experimental TOPP tool)
139 public:
140 
149  SuffixArraySeqan(const String& st, const String& filename, const WeightWrapper::WEIGHTMODE weight_mode = WeightWrapper::MONO);
150 
154  SuffixArraySeqan(const SuffixArraySeqan& source);
155 
159  virtual ~SuffixArraySeqan();
160 
164  String toString();
165 
178  void findSpec(std::vector<std::vector<std::pair<std::pair<SignedSize, SignedSize>, double> > >& candidates, const std::vector<double>& spec);
179 
186  bool save(const String& filename);
187 
195  bool open(const String& filename);
196 
202  void setTolerance(double t);
203 
208  double getTolerance() const;
209 
216  bool isDigestingEnd(const char aa1, const char aa2) const;
217 
223  void setTags(const std::vector<OpenMS::String>& tags);
224 
229  const std::vector<OpenMS::String>& getTags();
230 
235  void setUseTags(bool use_tags);
236 
241  bool getUseTags();
242 
247  void setNumberOfModifications(Size number_of_mods);
248 
253  Size getNumberOfModifications();
254 
255  void printStatistic();
256 
257 protected:
258 
271  inline void goNextSubTree_(TIter& it, double& m, std::stack<double>& allm, std::stack<std::map<double, SignedSize> >& mod_map)
272  {
273  // preorder dfs
274  if (!goRight(it))
275  {
276  while (true)
277  {
278  if (goUp(it))
279  {
280  m -= allm.top();
281  allm.pop();
282  mod_map.pop();
283  }
284  else
285  {
286  break;
287  }
288 
289  if (goRight(it))
290  {
291  m -= allm.top();
292  allm.pop();
293  mod_map.pop();
294  break;
295  }
296  }
297  }
298  else
299  {
300  m -= allm.top();
301  allm.pop();
302  mod_map.pop();
303  }
304  if (isRoot(it))
305  {
306  clear(it);
307  }
308  }
309 
315  inline void goNextSubTree_(TIter& it)
316  {
317  // preorder dfs
318  if (!goRight(it))
319  {
320  while (true)
321  {
322  if (!goUp(it))
323  {
324  break;
325  }
326  if (goRight(it))
327  {
328  break;
329  }
330  }
331  }
332  if (isRoot(it))
333  {
334  clear(it);
335  }
336  }
337 
350  inline void goNext_(TIter& it, double& m, std::stack<double>& allm, std::stack<std::map<double, SignedSize> >& mod_map)
351  {
352  // preorder dfs
353  if (!goDown(it))
354  {
355  goNextSubTree_(it, m, allm, mod_map);
356  }
357  }
358 
359  inline void parseTree_(TIter& it, std::vector<std::pair<SignedSize, SignedSize> >& out_number, std::vector<std::pair<SignedSize, SignedSize> >& edge_length, std::vector<SignedSize>& leafe_depth)
360  {
361  SignedSize depth = 1;
362  while (!atEnd(it))
363  {
364  SignedSize le = 0;
365  bool isLeaf = false;
366  if (length(parentEdgeLabel(it)) > 0)
367  {
368  if (countChildren(it) > 0)
369  {
370  edge_length.push_back(std::pair<SignedSize, SignedSize>(depth, length(parentEdgeLabel(it))));
371  }
372  else
373  {
374  //le <- length(representative(it));
375  //isLeaf = true;
376  }
377  }
378  if (countChildren(it) > 0)
379  {
380  out_number.push_back(std::pair<SignedSize, SignedSize>(depth, countChildren(it)));
381  }
382  else
383  {
384  leafe_depth.push_back(depth);
385  }
386  if (goDown(it))
387  {
388  depth++;
389  }
390  else if (!goRight(it))
391  {
392  while (!goRight(it))
393  {
394  goUp(it);
395  if (isLeaf)
396  {
397  edge_length.push_back(std::pair<SignedSize, SignedSize>(depth, le - length(parentEdgeLabel(it))));
398  isLeaf = false;
399  }
400  depth--;
401  if (isRoot(it)) return;
402  }
403  }
404  else
405  {
406  }
407  }
408  }
409 
410  TIndex index_;
411 
412  TIter it_;
413 
421  SignedSize findFirst_(const std::vector<double>& spec, double& m);
422 
434  SignedSize findFirst_(const std::vector<double>& spec, double& m, SignedSize start, SignedSize end);
435 
436  const String& s_;
437 
438  double masse_[255];
439 
441 
442  std::vector<String> tags_;
443 
444  bool use_tags_;
445 
446  double tol_;
447  };
448 }
449 
450 #endif //OPENMS_DATASTRUCTURES_SUFFIXARRAYSEQAN_H
void goNext_(TIter &it, double &m, std::stack< double > &allm, std::stack< std::map< double, SignedSize > > &mod_map)
overwriting goNext from seqan index_esa_stree.h for mass update during suffix array traversal ...
Definition: SuffixArraySeqan.h:350
A more convenient string class.
Definition: String.h:57
Class that uses SEQAN library for a suffix array. It can be used to find peptide Candidates for a MS ...
Definition: SuffixArraySeqan.h:128
void goNextSubTree_(TIter &it, double &m, std::stack< double > &allm, std::stack< std::map< double, SignedSize > > &mod_map)
overwriting goNextSubTree_ from seqan index_esa_stree.h for mass update during suffix array traversal...
Definition: SuffixArraySeqan.h:271
bool use_tags_
if tags are used
Definition: SuffixArraySeqan.h:444
const String & s_
reference to strings for which the suffix array is build
Definition: SuffixArraySeqan.h:436
seqan::Index< seqan::String< char >, seqan::IndexEsa< TIterSpec > > TIndex
Definition: SuffixArraySeqan.h:134
comparator for two doubles with a tolerance value
Definition: SuffixArraySeqan.h:88
double tol_
tolerance
Definition: SuffixArraySeqan.h:446
seqan::Iter< TIndex, seqan::VSTree< TIterSpec > > TIter
Definition: SuffixArraySeqan.h:135
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:128
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
FloatsWithTolLess(const double &t)
constructor
Definition: SuffixArraySeqan.h:61
int const & start_
start index
Definition: SuffixArraySeqan.h:116
seqan::TopDown< seqan::ParentLinks<> > TIterSpec
Definition: SuffixArraySeqan.h:133
IntsInRangeLess(const int &s, const int &e)
constructor
Definition: SuffixArraySeqan.h:95
bool operator()(double f1, double f2) const
implementation of the '<' operator for two doubles with the tolerance value
Definition: SuffixArraySeqan.h:75
SignedSize number_of_modifications_
number of allowed modifications
Definition: SuffixArraySeqan.h:440
void goNextSubTree_(TIter &it)
goes to the next sub tree
Definition: SuffixArraySeqan.h:315
int const & end_
end index
Definition: SuffixArraySeqan.h:117
WEIGHTMODE
Definition: WeightWrapper.h:55
double const & tol_
tolerance value
Definition: SuffixArraySeqan.h:81
Definition: WeightWrapper.h:55
IntsInRangeLess(const IntsInRangeLess &source)
copy constructor
Definition: SuffixArraySeqan.h:100
comparator for two doubles with a tolerance value
Definition: SuffixArraySeqan.h:54
std::vector< String > tags_
all tags
Definition: SuffixArraySeqan.h:442
void parseTree_(TIter &it, std::vector< std::pair< SignedSize, SignedSize > > &out_number, std::vector< std::pair< SignedSize, SignedSize > > &edge_length, std::vector< SignedSize > &leafe_depth)
Definition: SuffixArraySeqan.h:359
TIndex index_
seqan suffix array
Definition: SuffixArraySeqan.h:410
Encapsulated weight queries to simplify mono vs average weight computation.
Definition: WeightWrapper.h:50
FloatsWithTolLess(const FloatsWithTolLess &rhs)
copy constructor
Definition: SuffixArraySeqan.h:66
bool operator()(int f1, int f2) const
implementation of the '<' operator for two doubles with the tolerance value
Definition: SuffixArraySeqan.h:109
String toString(T i)
toString functions (single argument)
Definition: StringUtils.h:68
TIter it_
seqan suffix array iterator
Definition: SuffixArraySeqan.h:412
abstract class for suffix array
Definition: SuffixArray.h:50

OpenMS / TOPP release 2.0.0 Documentation generated on Fri May 29 2015 17:20:31 using doxygen 1.8.9.1