DIAlign
affinealignobj.h
1 #ifndef AFFINEALIGNOBJ_H
2 #define AFFINEALIGNOBJ_H
3 
4 #include <iostream>
5 #include <cstring>
6 #include <vector>
7 #include <algorithm>
8 
14 namespace DIAlign
15 {
16 
21 namespace Traceback
22 {
24 enum TracebackType {SS = 0, DM = 1, DA = 2, DB = 3, TM = 4, TA = 5, TB = 6, LM = 7, LA = 8, LB = 9};
25 
27 enum tbJump {M = 0, A = 1, B = 2};
28 
30 std::ostream& operator<<(std::ostream& out, const TracebackType value);
31 
33 std::vector<char> EnumToChar(std::vector<TracebackType> v);
34 }
35 
45 {
46 private:
47  int signalA_capacity;
48  int signalB_capacity;
49 
50 public:
51  double* s_data;
52  double* M;
53  double* A;
54  double* B;
56  bool* Path;
57  bool* simPath;
58  // s_data, M, A and B should be private. Now there is a possibility of memory-leak.
59  // TODO Make above variables private.
62  double GapOpen;
63  double GapExten;
64  bool FreeEndGaps;
65  std::vector<int> indexA_aligned;
66  std::vector<int> indexB_aligned;
67  std::vector<double> score;
68  int nGaps;
69 
78  // Not a default constructor
79  AffineAlignObj(int ROW_SIZE, int COL_SIZE, bool clearMemory = true)
80  {
81  allocateMemory_(ROW_SIZE, COL_SIZE);
82 
83  // clearMemory means having default zero values.
84  // We could use a for-loop but memset is faster for contiguous location in memory.
85  // It makes byte value = unsigned(int_0)
86  if (clearMemory)
87  {
88  std::memset(s_data, 0, (ROW_SIZE -1) * (COL_SIZE-1) * sizeof(double));
89  std::memset(M, 0, ROW_SIZE * COL_SIZE * sizeof(double));
90  std::memset(A, 0, ROW_SIZE * COL_SIZE * sizeof(double));
91  std::memset(B, 0, ROW_SIZE * COL_SIZE * sizeof(double));
92  std::memset(Traceback, Traceback::SS, 3 * ROW_SIZE * COL_SIZE * sizeof(Traceback::TracebackType));
93  std::memset(Path, 0, ROW_SIZE * COL_SIZE * sizeof(bool));
94  std::memset(simPath, 0, ROW_SIZE * COL_SIZE * sizeof(bool));
95  }
96 
97  signalA_len = ROW_SIZE-1;
98  signalB_len = COL_SIZE-1;
99  GapOpen = 0.0;
100  GapExten = 0.0;
101  FreeEndGaps = true;
102  nGaps = 0;
103 
104  signalA_capacity = ROW_SIZE-1;
105  signalB_capacity = COL_SIZE-1;
106  }
107 
109  void reset(int ROW_SIZE, int COL_SIZE)
110  {
111  if (ROW_SIZE -1 > signalA_capacity || COL_SIZE -1 > signalB_capacity)
112  {
113  std::cout << "Error: cannot reset an object beyond capacity" << std::endl;
114  std::cout << ROW_SIZE << " vs " << signalA_capacity << std::endl;
115  throw 1;
116  }
117 
118  // resetting all values to zero.
119  // We could use a for-loop but memset is faster for contiguous location in memory.
120  std::memset(s_data, 0, (ROW_SIZE -1) * (COL_SIZE-1) * sizeof(double));
121  std::memset(M, 0, ROW_SIZE * COL_SIZE * sizeof(double));
122  std::memset(A, 0, ROW_SIZE * COL_SIZE * sizeof(double));
123  std::memset(B, 0, ROW_SIZE * COL_SIZE * sizeof(double));
124  std::memset(Traceback, Traceback::SS, 3 * ROW_SIZE * COL_SIZE * sizeof(Traceback::TracebackType));
125  std::memset(Path, 0, ROW_SIZE * COL_SIZE * sizeof(bool));
126  std::memset(simPath, 0, ROW_SIZE * COL_SIZE * sizeof(bool));
127 
128  signalA_len = ROW_SIZE-1;
129  signalB_len = COL_SIZE-1;
130  GapOpen = 0.0;
131  GapExten = 0.0;
132  FreeEndGaps = true;
133  indexA_aligned.clear();
134  indexB_aligned.clear();
135  score.clear();
136  nGaps = 0;
137  }
138 
143  {
144  freeMemory_();
145  signalA_len = rhs.signalA_len;
146  signalA_capacity = rhs.signalA_capacity;
147  signalB_len = rhs.signalB_len;
148  signalB_capacity = rhs.signalB_capacity;
149 
150  GapOpen = rhs.GapOpen;
151  GapExten = rhs.GapExten;
152  FreeEndGaps = rhs.FreeEndGaps;
153  indexA_aligned = rhs.indexA_aligned;
154  indexB_aligned = rhs.indexB_aligned;
155  score = rhs.score;
156  nGaps = rhs.nGaps;
157 
158  int ROW_SIZE = rhs.signalA_len + 1;
159  int COL_SIZE = rhs.signalB_len + 1;
160 
161  allocateMemory_(ROW_SIZE, COL_SIZE);
162  copyData_(rhs, ROW_SIZE, COL_SIZE);
163  return *this;
164  }
165 
170  {
171  freeMemory_();
172 
173  signalA_len = rhs.signalA_len;
174  signalA_capacity = rhs.signalA_capacity;
175  signalB_len = rhs.signalB_len;
176  signalB_capacity = rhs.signalB_capacity;
177 
178  GapOpen = rhs.GapOpen;
179  GapExten = rhs.GapExten;
180  FreeEndGaps = rhs.FreeEndGaps;
181  indexA_aligned = rhs.indexA_aligned;
182  indexB_aligned = rhs.indexB_aligned;
183  score = rhs.score;
184  nGaps = rhs.nGaps;
185 
186  int ROW_SIZE = rhs.signalA_len + 1;
187  int COL_SIZE = rhs.signalB_len + 1;
188 
189  allocateMemory_(ROW_SIZE, COL_SIZE);
190  copyData_(rhs, ROW_SIZE, COL_SIZE);
191  }
192 
195  {
196  freeMemory_();
197  }
198 
199 private:
200 
201  // Should be deleted?
202  AffineAlignObj() {}
203 
205  void freeMemory_()
206  {
207  delete[] s_data;
208  delete[] M;
209  delete[] A;
210  delete[] B;
211  delete[] Traceback;
212  delete[] Path;
213  delete[] simPath;
214  }
215 
217  void allocateMemory_(int ROW_SIZE, int COL_SIZE)
218  {
219  // new allocate memory in heap. Here we just keep the memory address in our object's member.
220  // Memory will remain valid outside of this constructor's scope.
221  // Therefore, we need to explicitly free it.
222  s_data = new double[(ROW_SIZE -1) * (COL_SIZE-1)];
223  M = new double[ROW_SIZE * COL_SIZE];
224  A = new double[ROW_SIZE * COL_SIZE];
225  B = new double[ROW_SIZE * COL_SIZE];
226  Traceback = new Traceback::TracebackType[3* ROW_SIZE * COL_SIZE];
227  Path = new bool[ROW_SIZE * COL_SIZE];
228  simPath = new bool[ROW_SIZE * COL_SIZE];
229  }
230 
232  void copyData_(const AffineAlignObj& rhs, int ROW_SIZE, int COL_SIZE)
233  {
234  std::memcpy(s_data, rhs.s_data, (ROW_SIZE -1) * (COL_SIZE-1) * sizeof(double));
235  std::memcpy(M, rhs.M, ROW_SIZE * COL_SIZE * sizeof(double));
236  std::memcpy(A, rhs.A, ROW_SIZE * COL_SIZE * sizeof(double));
237  std::memcpy(B, rhs.B, ROW_SIZE * COL_SIZE * sizeof(double));
238  std::memcpy(Traceback, rhs.Traceback, 3 *ROW_SIZE * COL_SIZE * sizeof(Traceback::TracebackType));
239  std::memcpy(Path, rhs.Path, ROW_SIZE * COL_SIZE * sizeof(bool));
240  std::memcpy(simPath, rhs.simPath, ROW_SIZE * COL_SIZE * sizeof(bool));
241  }
242 };
243 } // namespace DIAlign
244 
245 #endif // AFFINEALIGNOBJ_H
246 
247 // brackets ([]) specify the index of an element of the array. In fact these brackets are a dereferencing operator known as offset operator.
248 // We used bracked to work with STL containers. In case of arrays/ pointers, they work in the same way.
249 // double* M;
250 // M[i] == *(M+i);
bool FreeEndGaps
True for Overlap alignment.
Definition: affinealignobj.h:64
bool * Path
Path matrix would represent alignment path through similarity matrix as binary-hot encoding...
Definition: affinealignobj.h:56
tbJump
cumulative score-matrix names
Definition: affinealignobj.h:27
An affine alignment object.
Definition: affinealignobj.h:44
double * s_data
similarity score matrix.
Definition: affinealignobj.h:51
bool * simPath
Not needed, will be removed.
Definition: affinealignobj.h:57
double * A
Insert in sequence A, residue in A is aligned to gap in B. A(i,j) is the best score given that Ai is ...
Definition: affinealignobj.h:53
void reset(int ROW_SIZE, int COL_SIZE)
Reset object to initial state (without allocating new memory)
Definition: affinealignobj.h:109
Generic namespace for all classes and functions of DIAlign.
Definition: affinealignment.cpp:29
AffineAlignObj(int ROW_SIZE, int COL_SIZE, bool clearMemory=true)
Constructor for AffineAlignObj.
Definition: affinealignobj.h:79
int signalB_len
Number of data-points in signal B.
Definition: affinealignobj.h:61
double * M
Match or Mismatch matrix, residues of A and B are aligned without a gap. M(i,j) = Best score upto (i...
Definition: affinealignobj.h:52
std::ostream & operator<<(std::ostream &out, const TracebackType value)
This function overloads << to display TracebackType.
Definition: affinealignobj.cpp:10
std::vector< double > score
Cumulative score along the aligned path.
Definition: affinealignobj.h:67
int signalA_len
Number of data-points in signal A.
Definition: affinealignobj.h:60
Traceback::TracebackType * Traceback
Traceback matrices store source matrix name and direction as matrices are filled with dynamic program...
Definition: affinealignobj.h:55
double GapExten
Penalty for Gap extension. For n consecutive gaps: Penalty = GapOpen + (n-1)*GapExten.
Definition: affinealignobj.h:63
std::vector< int > indexB_aligned
Aligned signalB indices after affine alignment.
Definition: affinealignobj.h:66
AffineAlignObj & operator=(const AffineAlignObj &rhs)
Overloading copy assignment operator.
Definition: affinealignobj.h:142
std::vector< char > EnumToChar(std::vector< TracebackType > v)
This function converts TracebackType Enum to characters.
Definition: affinealignobj.cpp:30
TracebackType
All combinations of cumulative score-matrix name and arrow directions.
Definition: affinealignobj.h:24
std::vector< int > indexA_aligned
Aligned signalA indices after affine alignment.
Definition: affinealignobj.h:65
~AffineAlignObj()
Destructor: frees memory.
Definition: affinealignobj.h:194
AffineAlignObj(const AffineAlignObj &rhs)
Copy constructor.
Definition: affinealignobj.h:169
double GapOpen
Penalty for Gap opening. For n consecutive gaps: Penalty = GapOpen + (n-1)*GapExten.
Definition: affinealignobj.h:62
int nGaps
Total number of gaps in the alignment path.
Definition: affinealignobj.h:68
double * B
Insert in sequence B, residue in B is aligned to gap in A. B(i,j) is the best score given that Bj is ...
Definition: affinealignobj.h:54