Quantum Gate Decomposer  v1.3
Powerful decomposition of almost any unitary into U3 and CNOT gates
dot.cpp
Go to the documentation of this file.
1 #include "qgd/dot.h"
2 #include <cstring>
3 #include <iostream>
4 #include "tbb/tbb.h"
5 #include <tbb/scalable_allocator.h>
6 
7 
8  // number of rows in matrix A, for which serialized multiplication is applied instead of parallel one
9 #define SERIAL_CUTOFF 16
10 
11 //tbb::spin_mutex my_mutex;
12 
19 Matrix
20 dot( Matrix &A, Matrix &B ) {
21 
22 #if BLAS==1
23  int NumThreads = mkl_get_max_threads();
24  MKL_Set_Num_Threads(1);
25 #elif BLAS==2
26  int NumThreads = openblas_get_num_threads();
27  openblas_set_num_threads(1);
28 #endif
29 
30 
31  // check the Matrix shapes in DEBUG mode
32  assert( check_matrices( A, B ) );
33 
34 
35 #if BLAS==1 // MKL does not support option CblasConjNoTrans so the conjugation of the matrices are done in prior.
36  if ( B.is_conjugated() && !B.is_transposed() ) {
37  Matrix tmp = Matrix( B.rows, B.cols );
38  vzConj( B.cols*B.rows, B.get_data(), tmp.get_data() );
39  B = tmp;
40  }
41 
42  if ( A.is_conjugated() && !A.is_transposed() ) {
43  Matrix tmp = Matrix( A.rows, A.cols );
44  vzConj( A.cols*A.rows, A.get_data(), tmp.get_data() );
45  A = tmp;
46  }
47 #endif
48 
49 
50  // Preparing the output Matrix
51  Matrix C;
52  if ( A.is_transposed() ){
53  if ( B.is_transposed() ) {
54  C = Matrix(A.cols, B.rows);
55  }
56  else {
57  C = Matrix(A.cols, B.cols);
58  }
59  }
60  else {
61  if ( B.is_transposed() ) {
62  C = Matrix(A.rows, B.rows);
63  }
64  else {
65  C = Matrix(A.rows, B.cols);
66  }
67  }
68 
69  // Calculating the matrix product
70  if ( A.rows <= SERIAL_CUTOFF && B.cols <= SERIAL_CUTOFF ) {
71  // creating the serial task object
72  zgemm3m_Task_serial calc_task = zgemm3m_Task_serial( A, B, C );
73  calc_task.zgemm3m_chunk();
74  }
75  else {
76  // creating the task object
77  zgemm3m_Task& calc_task = *new(tbb::task::allocate_root()) zgemm3m_Task( A, B, C );
78 
79  // starting parallel calculations
80  tbb::task::spawn_root_and_wait(calc_task);
81  }
82 
83 #if BLAS==1 //MKL
84  MKL_Set_Num_Threads(NumThreads);
85 #elif BLAS==2 //OpenBLAS
86  openblas_set_num_threads(NumThreads);
87 #endif
88 
89  return C;
90 
91 
92 }
93 
94 
95 
102 bool
104 
105 
106  if (!A.is_transposed() & !B.is_transposed()) {
107  if ( A.cols != B.rows ) {
108  std::cout << "pic::dot:: Cols of matrix A does not match rows of matrix B!" << std::endl;
109  return false;
110  }
111  }
112  else if ( A.is_transposed() & !B.is_transposed() ) {
113  if ( A.rows != B.rows ) {
114  std::cout << "pic::dot:: Cols of matrix A.transpose does not match rows of matrix B!" << std::endl;
115  return false;
116  }
117  }
118  else if ( A.is_transposed() & B.is_transposed() ) {
119  if ( A.rows != B.cols ) {
120  std::cout << "pic::dot:: Cols of matrix A.transpose does not match rows of matrix B.transpose!" << std::endl;
121  return false;
122  }
123  }
124  else if ( !A.is_transposed() & B.is_transposed() ) {
125  if ( A.cols != B.cols ) {
126  std::cout << "pic::dot:: Cols of matrix A does not match rows of matrix B.transpose!" << std::endl;
127  return false;
128  }
129  }
130 
131 
132  // check the pointer of the matrices
133  if ( A.get_data() == NULL ) {
134  std::cout << "pic::dot:: No preallocated data in matrix A!" << std::endl;
135  return false;
136  }
137  if ( B.get_data() == NULL ) {
138  std::cout << "pic::dot:: No preallocated data in matrix B!" << std::endl;
139  return false;
140  }
141 
142  return true;
143 
144 }
145 
146 
152 void
153 get_cblas_transpose( Matrix &A, CBLAS_TRANSPOSE &transpose ) {
154 
155  if ( A.is_conjugated() & A.is_transposed() ) {
156  transpose = CblasConjTrans;
157  }
158  else if ( A.is_conjugated() & !A.is_transposed() ) {
159  transpose = CblasConjNoTrans; // not present in MKL
160  }
161  else if ( !A.is_conjugated() & A.is_transposed() ) {
162  transpose = CblasTrans;
163  }
164  else {
165  transpose = CblasNoTrans;
166  }
167 
168 }
169 
170 
171 
172 
173 
174 
180 }
181 
182 
187 tbb::task*
189  return nullptr;
190 }
191 
192 
193 
194 
202 
203  A = A_in;
204  B = B_in;
205  C = C_in;
206 
207 
208  order = CblasRowMajor;
209 
210  rows.Arows_start = 0;
211  rows.Arows_end = A.rows;
212  rows.Arows = A.rows;
213  rows.Brows_start = 0;
214  rows.Brows_end = B.rows;
215  rows.Brows = B.rows;
216  rows.Crows_start = 0;
217  rows.Crows_end = C.rows;
218  rows.Crows = C.rows;
219 
220  cols.Acols_start = 0;
221  cols.Acols_end = A.cols;
222  cols.Acols = A.cols;
223  cols.Bcols_start = 0;
224  cols.Bcols_end = B.cols;
225  cols.Bcols = B.cols;
226  cols.Ccols_start = 0;
227  cols.Ccols_end = C.cols;
228  cols.Ccols = C.cols;
229 
230 }
231 
232 
242 
243  A = A_in;
244  B = B_in;
245  C = C_in;
246 
247  rows = rows_in;
248  cols = cols_in;
249 
250 }
251 
252 
253 
257 void
259 
260  // setting CBLAS transpose operations
261  CBLAS_TRANSPOSE Atranspose, Btranspose;
262  get_cblas_transpose( A, Atranspose );
263  get_cblas_transpose( B, Btranspose );
264 
268 
269 
270  // zgemm parameters
271  int m,n,k,lda,ldb,ldc;
272 
273  if ( A.is_transposed() ) {
274  m = cols.Acols;
275  k = rows.Arows;
276  lda = A.cols;
277  }
278  else {
279  m = rows.Arows;
280  k = cols.Acols;
281  lda = A.cols;
282  }
283 
284 
285 
286  if (B.is_transposed()) {
287  n = rows.Brows;
288  ldb = B.cols;
289  }
290  else {
291  n = cols.Bcols;
292  ldb = B.cols;
293  }
294 
295  ldc = C.cols;
296 
297  // parameters alpha and beta for the cblas_zgemm3m function (the input matrices are not scaled)
298  QGD_Complex16 alpha;
299  alpha.real = 1;
300  alpha.imag = 0;
301  QGD_Complex16 beta;
302  beta.real = 0;
303  beta.imag = 0;
304 
305 
306 
307  cblas_zgemm3m(CblasRowMajor, Atranspose, Btranspose, m, n, k, (double*)&alpha, (double*)A_zgemm_data, lda, (double*)B_zgemm_data, ldb, (double*)&beta, (double*)C_zgemm_data, ldc);
308 /*
309  #ifdef CBLAS
310  cblas_zgemm3m (CblasRowMajor, CblasNoTrans, CblasNoTrans, matrix_size, matrix_size, matrix_size, &alpha, A, matrix_size, B, matrix_size, &beta, C, matrix_size);
311 #else
312  cblas_zgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, matrix_size, matrix_size, matrix_size, &alpha, A, matrix_size, B, matrix_size, &beta, C, matrix_size);
313 #endif
314 */
315 
316 }
317 
318 
323 tbb::task*
325 
326 
327 
328  if ( !A.is_transposed() && rows.Arows > A.rows/8 && rows.Arows > SERIAL_CUTOFF ) {
329  // *********** divide rows of A into sub-tasks*********
330 
331  size_t rows_start = rows.Arows_start;
332  size_t rows_end = rows.Arows_end;
333  size_t rows_mid = (rows_end+rows_start)/2;
334 
335  row_indices rows_child2 = rows;
336  rows_child2.Arows_start = rows_mid;
337  rows_child2.Arows = rows_end-rows_mid;
338  rows_child2.Crows_start = rows_mid;
339  rows_child2.Crows = rows_end-rows_mid;
340 
341  Cont_Task_rowsA& cont = *new(allocate_continuation()) Cont_Task_rowsA();
342  zgemm3m_Task& calc_task = *new(cont.allocate_child()) zgemm3m_Task( A, B, C, rows_child2, cols );
343 
344  recycle_as_child_of(cont);
345 
346  rows.Arows_end = rows_mid;
347  rows.Arows = rows_mid-rows_start;
348  rows.Crows_end = rows_mid;
349  rows.Crows = rows_mid-rows_start;
350 
351  cont.set_ref_count(2);
352  tbb::task::spawn(calc_task);;
353  return this;
354 
355  }
356 
357 
358  else if ( A.is_transposed() && cols.Acols > A.cols/8 && cols.Acols > SERIAL_CUTOFF ) {
359  // *********** divide cols of B into sub-tasks*********
360 
361  size_t cols_start = cols.Acols_start;
362  size_t cols_end = cols.Acols_end;
363  size_t cols_mid = (cols_end+cols_start)/2;
364 
365  col_indices cols_child2 = cols;
366  cols_child2.Acols_start = cols_mid;
367  cols_child2.Acols = cols_end-cols_mid;
368 
369  row_indices rows_child2 = rows;
370  rows_child2.Crows_start = cols_mid;
371  rows_child2.Crows = cols_end-cols_mid;
372 
373  // creating continuation task
374  Cont_Task_rowsA& cont = *new(allocate_continuation()) Cont_Task_rowsA();
375 
376  // creating child task 2
377  zgemm3m_Task& calc_task = *new(cont.allocate_child()) zgemm3m_Task( A, B, C, rows_child2, cols_child2);
378 
379  // recycling the present task as task1
380  recycle_as_child_of(cont);
381 
382  cols.Acols_end = cols_mid;
383  cols.Acols = cols_mid-cols_start;
384 
385  rows.Crows_end = cols_mid;
386  rows.Crows = cols_mid-cols_start;
387 
388  // spawning task2 and continue with task1 on the same thread
389  cont.set_ref_count(2);
390  tbb::task::spawn(calc_task);
391  return this;
392 
393  }
394 
395 
396  else if ( !B.is_transposed() && cols.Bcols > B.cols/8 && cols.Bcols > SERIAL_CUTOFF ) {
397  // *********** divide cols of B into sub-tasks*********
398 
399  size_t cols_start = cols.Bcols_start;
400  size_t cols_end = cols.Bcols_end;
401  size_t cols_mid = (cols_end+cols_start)/2;
402 
403  col_indices cols_child2 = cols;
404  cols_child2.Bcols_start = cols_mid;
405  cols_child2.Bcols = cols_end-cols_mid;
406  cols_child2.Ccols_start = cols_mid;
407  cols_child2.Ccols = cols_end-cols_mid;
408 
409  // creating continuation task
410  Cont_Task_rowsA& cont = *new(allocate_continuation()) Cont_Task_rowsA();
411 
412  // creating child task 2
413  zgemm3m_Task& calc_task = *new(cont.allocate_child()) zgemm3m_Task( A, B, C, rows, cols_child2);
414 
415  // recycling the present task as task1
416  recycle_as_child_of(cont);
417 
418  cols.Bcols_end = cols_mid;
419  cols.Bcols = cols_mid-cols_start;
420  cols.Ccols_end = cols_mid;
421  cols.Ccols = cols_mid-cols_start;
422 
423  // spawning task2 and continue with task1 on the same thread
424  cont.set_ref_count(2);
425  tbb::task::spawn(calc_task);
426  return this;
427 
428  }
429 
430 
431  if ( B.is_transposed() && rows.Brows > B.rows/8 && rows.Brows > SERIAL_CUTOFF ) {
432  // *********** divide rows of B into sub-tasks*********
433 
434  size_t rows_start = rows.Brows_start;
435  size_t rows_end = rows.Brows_end;
436  size_t rows_mid = (rows_end+rows_start)/2;
437 
438  row_indices rows_child2 = rows;
439  rows_child2.Brows_start = rows_mid;
440  rows_child2.Brows = rows_end-rows_mid;
441 
442  col_indices cols_child2 = cols;
443  cols_child2.Ccols_start = rows_mid;
444  cols_child2.Ccols = rows_end-rows_mid;
445 
446  Cont_Task_rowsA& cont = *new(allocate_continuation()) Cont_Task_rowsA();
447  zgemm3m_Task& calc_task = *new(cont.allocate_child()) zgemm3m_Task( A, B, C, rows_child2, cols_child2 );
448 
449  recycle_as_child_of(cont);
450 
451  rows.Brows_end = rows_mid;
452  rows.Brows = rows_mid-rows_start;
453 
454  cols.Ccols_end = rows_mid;
455  cols.Ccols = rows_mid-rows_start;
456 
457  cont.set_ref_count(2);
458  tbb::task::spawn(calc_task);;
459  return this;
460 
461  }
462  else {
463  zgemm3m_chunk();
464  return nullptr;
465  }
466 
467 
468  return nullptr;
469 
470 
471 
472 } //execute
473 
474 
475 
Matrix dot(Matrix &A, Matrix &B)
Call to calculate the product of two complex matrices by calling method zgemm3m from the CBLAS librar...
Definition: dot.cpp:20
tbb::task * execute()
This function is called when a task is spawned.
Definition: dot.cpp:324
size_t Ccols_start
The firs col in matrix C participating in the multiplication sub-problem.
Definition: dot.h:117
size_t rows
The number of rows.
Definition: matrix_base.h:23
size_t Crows
The number of rows in matrix C participating in the multiplication sub-problem.
Definition: dot.h:96
Matrix C
The matrix C.
Definition: dot.h:158
size_t Ccols
The number of cols in matrix C participating in the multiplication sub-problem.
Definition: dot.h:121
void get_cblas_transpose(Matrix &A, CBLAS_TRANSPOSE &transpose)
Call to get the transpose properties of the input matrix for CBLAS calculations.
Definition: dot.cpp:153
scalar * get_data()
Call to get the pointer to the stored data.
Definition: matrix_base.h:221
Matrix B
The matrix B.
Definition: dot.h:156
Structure containing row limits for the partitioning of the matrix product calculations.
Definition: dot.h:77
Structure containing column limits for the partitioning of the matrix product calculations.
Definition: dot.h:103
tbb::task * execute()
Overriden execute function of class tbb::task.
Definition: dot.cpp:188
zgemm3m_Task_serial(Matrix &A_in, Matrix &B_in, Matrix &C_in)
Constructor of the class.
Definition: dot.cpp:201
size_t Brows_end
The last row in matrix B participating in the multiplication sub-problem. (The rows are picked from a...
Definition: dot.h:88
bool check_matrices(Matrix &A, Matrix &B)
Call to check the shape of the matrices for method dot.
Definition: dot.cpp:103
bool is_conjugated()
Call to get whether the matrix should be conjugated in CBLAS functions or not.
Definition: matrix_base.h:181
size_t Acols_start
The firs col in matrix A participating in the multiplication sub-problem.
Definition: dot.h:105
size_t Bcols_end
The last col in matrix B participating in the multiplication sub-problem. (The cols are picked from a...
Definition: dot.h:113
size_t Arows_start
The firs row in matrix A participating in the multiplication sub-problem.
Definition: dot.h:80
size_t cols
The number of columns.
Definition: matrix_base.h:25
size_t Bcols_start
The firs col in matrix B participating in the multiplication sub-problem.
Definition: dot.h:111
size_t Arows_end
The last row in matrix A participating in the multiplication sub-problem. (The rows are picked from a...
Definition: dot.h:82
size_t Acols_end
The last col in matrix A participating in the multiplication sub-problem. (The cols are picked from a...
Definition: dot.h:107
size_t Crows_start
The firs row in matrix C participating in the multiplication sub-problem.
Definition: dot.h:92
size_t Crows_end
The last row in matrix C participating in the multiplication sub-problem. (The rows are picked from a...
Definition: dot.h:94
size_t Bcols
The number of cols in matrix B participating in the multiplication sub-problem.
Definition: dot.h:115
size_t Brows
The number of rows in matrix B participating in the multiplication sub-problem.
Definition: dot.h:90
Structure type representing complex numbers in the QGD package.
Definition: QGDTypes.h:39
Empty task class to utilize task continuation for saving stack space.
Definition: dot.h:128
Cont_Task_rowsA()
Default constructor of the class.
Definition: dot.cpp:179
Class to store data of complex arrays and its properties.
Definition: matrix.h:12
size_t Ccols_end
The last col in matrix C participating in the multiplication sub-problem. (The col are picked from a ...
Definition: dot.h:119
#define CblasConjNoTrans
Definition: dot.h:14
size_t Arows
The number of rows in matrix A participating in the multiplication sub-problem.
Definition: dot.h:84
row_indices rows
Structure containing row limits for the partitioning of the matrix product calculations.
Definition: dot.h:160
col_indices cols
Structure containing column limits for the partitioning of the matrix product calculations.
Definition: dot.h:162
double real
the real part of a complex number
Definition: QGDTypes.h:41
Class to calculate a matrix product C=A*B in serial.
Definition: dot.h:150
size_t Brows_start
The firs row in matrix B participating in the multiplication sub-problem.
Definition: dot.h:86
Class to calculate a matrix product C=A*B in parallel.
Definition: dot.h:201
CBLAS_ORDER order
CBLAS storage order.
Definition: dot.h:164
Matrix A
The matrix A.
Definition: dot.h:154
size_t Acols
The number of cols in matrix A participating in the multiplication sub-problem.
Definition: dot.h:109
#define SERIAL_CUTOFF
Definition: dot.cpp:9
void zgemm3m_chunk()
Call to calculate the product of matrix chunks defined by attributes rows, cols.
Definition: dot.cpp:258
double imag
the imaginary part of a complex number
Definition: QGDTypes.h:43
bool is_transposed()
Call to get whether the matrix should be conjugated in CBLAS functions or not.
Definition: matrix_base.h:199