00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef MTL_DMAT_DMAT_MULT_INCLUDE
00013 #define MTL_DMAT_DMAT_MULT_INCLUDE
00014
00015 #include <boost/static_assert.hpp>
00016 #include <boost/type_traits.hpp>
00017 #include <boost/mpl/bool.hpp>
00018 #include <boost/utility/enable_if.hpp>
00019
00020 #include <boost/numeric/mtl/operation/set_to_zero.hpp>
00021 #include <boost/numeric/mtl/utility/range_generator.hpp>
00022 #include <boost/numeric/mtl/operation/cursor_pseudo_dot.hpp>
00023 #include <boost/numeric/mtl/operation/multi_action_block.hpp>
00024 #include <boost/numeric/mtl/operation/assign_mode.hpp>
00025 #include <boost/numeric/mtl/operation/static_size.hpp>
00026 #include <boost/numeric/mtl/operation/static_num_rows.hpp>
00027 #include <boost/numeric/mtl/operation/static_num_cols.hpp>
00028 #include <boost/numeric/mtl/utility/tag.hpp>
00029 #include <boost/numeric/mtl/utility/glas_tag.hpp>
00030 #include <boost/numeric/mtl/utility/is_row_major.hpp>
00031 #include <boost/numeric/mtl/utility/is_static.hpp>
00032 #include <boost/numeric/meta_math/loop.hpp>
00033 #include <boost/numeric/mtl/recursion/base_case_test.hpp>
00034 #include <boost/numeric/mtl/recursion/base_case_matrix.hpp>
00035 #include <boost/numeric/mtl/recursion/matrix_recursator.hpp>
00036 #include <boost/numeric/mtl/recursion/base_case_cast.hpp>
00037 #include <boost/numeric/mtl/interface/blas.hpp>
00038
00039 #include <boost/numeric/mtl/matrix/dense2D.hpp>
00040 #include <boost/numeric/mtl/operation/print_matrix.hpp>
00041 #include <boost/numeric/mtl/operation/no_op.hpp>
00042
00043 #include <iostream>
00044 #include <complex>
00045
00046 namespace mtl {
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058 template <typename MatrixA, typename MatrixB, typename MatrixC, typename Assign= assign::assign_sum,
00059 typename Backup= no_op>
00060 struct gen_cursor_dmat_dmat_mult_ft
00061 {
00062 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00063 {
00064 apply(A, B, C, typename traits::category<MatrixA>::type(),
00065 typename traits::category<MatrixB>::type());
00066 }
00067
00068 private:
00069 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::universe, tag::universe)
00070 {
00071 Backup()(A, B, C);
00072 }
00073
00074 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::has_cursor, tag::has_cursor)
00075 {
00076
00077 typedef glas::tag::row row;
00078 typedef glas::tag::col col;
00079 typedef glas::tag::all all;
00080
00081 typedef typename traits::const_value<MatrixA>::type a_value_type;
00082 typedef typename traits::const_value<MatrixB>::type b_value_type;
00083 typedef typename traits::value<MatrixC>::type c_value_type;
00084
00085 typedef typename traits::range_generator<row, MatrixA>::type a_cur_type;
00086 typedef typename traits::range_generator<row, MatrixC>::type c_cur_type;
00087
00088 typedef typename traits::range_generator<col, MatrixB>::type b_cur_type;
00089 typedef typename traits::range_generator<all, c_cur_type>::type c_icur_type;
00090
00091 typedef typename traits::range_generator<all, a_cur_type>::type a_icur_type;
00092 typedef typename traits::range_generator<all, b_cur_type>::type b_icur_type;
00093
00094 #ifndef NDEBUG
00095 typename traits::row<MatrixA>::type row_a(A);
00096 typename traits::col<MatrixA>::type col_a(A);
00097 typename traits::row<MatrixB>::type row_b(B);
00098 typename traits::col<MatrixB>::type col_b(B);
00099 typename traits::row<MatrixC>::type row_c(C);
00100 typename traits::col<MatrixC>::type col_c(C);
00101 #else
00102 # undef MTL_DEBUG_DMAT_DMAT_MULT // doesn't work with NDEBUG
00103 #endif
00104
00105 if (Assign::init_to_zero) set_to_zero(C);
00106
00107 a_value_type a_value(A);
00108 b_value_type b_value(B);
00109 c_value_type c_value(C);
00110
00111 a_cur_type ac= begin<row>(A), aend= end<row>(A);
00112 for (c_cur_type cc= begin<row>(C); ac != aend; ++ac, ++cc) {
00113
00114 b_cur_type bc= begin<col>(B), bend= end<col>(B);
00115 for (c_icur_type cic= begin<all>(cc); bc != bend; ++bc, ++cic) {
00116
00117 typename MatrixC::value_type c_tmp(c_value(*cic));
00118 #ifdef MTL_DEBUG_DMAT_DMAT_MULT
00119 std::cout << "Calculating C[" << row_c(*cic) << "][" << col_c(*cic) << "], initial value is "
00120 << c_tmp << "\n";
00121 #endif
00122 a_icur_type aic= begin<all>(ac), aiend= end<all>(ac);
00123 for (b_icur_type bic= begin<all>(bc); aic != aiend; ++aic, ++bic) {
00124 #ifdef MTL_DEBUG_DMAT_DMAT_MULT
00125 std::cout << "Updating with A[" << row_a(*aic) << "][" << col_a(*aic) << "] /* value is "
00126 << a_value(*aic) << " */ * ";
00127 std::cout << "B[" << row_b(*bic) << "][" << col_b(*bic) << "] /* value is "
00128 << b_value(*bic) << " */ * ";
00129 #endif
00130 assert(row_a(*aic) == row_c(*cic));
00131 assert(col_b(*bic) == col_c(*cic));
00132 assert(col_a(*aic) == row_b(*bic));
00133 Assign::update(c_tmp, a_value(*aic) * b_value(*bic));
00134 #ifdef MTL_DEBUG_DMAT_DMAT_MULT
00135 std::cout << "C's current value is " << c_tmp << "\n";
00136 #endif
00137 }
00138 c_value(*cic, c_tmp);
00139 }
00140 }
00141 }
00142 };
00143
00144
00145 template <typename Assign= assign::assign_sum,
00146 typename Backup= no_op>
00147 struct gen_cursor_dmat_dmat_mult_t
00148 {
00149 template <typename MatrixA, typename MatrixB, typename MatrixC>
00150 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00151 {
00152 gen_cursor_dmat_dmat_mult_ft<MatrixA, MatrixB, MatrixC, Assign, Backup>()(A, B, C);
00153 }
00154 };
00155
00156
00157
00158
00159
00160
00161 template <typename MatrixA, typename MatrixB, typename MatrixC,
00162 typename Assign= assign::assign_sum,
00163 typename Backup= gen_cursor_dmat_dmat_mult_t<Assign> >
00164 struct gen_dmat_dmat_mult_ft
00165 {
00166 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00167 {
00168 apply(A, B, C, typename traits::category<MatrixA>::type(),
00169 typename traits::category<MatrixB>::type());
00170 }
00171
00172 private:
00173 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::universe, tag::universe)
00174 {
00175 Backup()(A, B, C);
00176 }
00177
00178 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::has_iterator, tag::has_iterator)
00179 {
00180
00181 using namespace tag;
00182 using traits::range_generator;
00183 typedef typename range_generator<row, MatrixA>::type a_cur_type;
00184 typedef typename range_generator<row, MatrixC>::type c_cur_type;
00185 typedef typename range_generator<col, MatrixB>::type b_cur_type;
00186 typedef typename range_generator<iter::all, c_cur_type>::type c_icur_type;
00187 typedef typename range_generator<const_iter::all, a_cur_type>::type a_icur_type;
00188 typedef typename range_generator<const_iter::all, b_cur_type>::type b_icur_type;
00189
00190 if (Assign::init_to_zero) set_to_zero(C);
00191
00192 a_cur_type ac= begin<row>(A), aend= end<row>(A);
00193 for (c_cur_type cc= begin<row>(C); ac != aend; ++ac, ++cc) {
00194
00195 b_cur_type bc= begin<col>(B), bend= end<col>(B);
00196 for (c_icur_type cic= begin<iter::all>(cc); bc != bend; ++bc, ++cic) {
00197
00198 typename MatrixC::value_type c_tmp(*cic);
00199 a_icur_type aic= begin<const_iter::all>(ac), aiend= end<const_iter::all>(ac);
00200 for (b_icur_type bic= begin<const_iter::all>(bc); aic != aiend; ++aic, ++bic) {
00201 Assign::update(c_tmp, *aic * *bic);
00202 }
00203 *cic= c_tmp;
00204 }
00205 }
00206 }
00207 };
00208
00209
00210 template <typename Assign= assign::assign_sum,
00211 typename Backup= gen_cursor_dmat_dmat_mult_t<Assign> >
00212 struct gen_dmat_dmat_mult_t
00213 {
00214 template <typename MatrixA, typename MatrixB, typename MatrixC>
00215 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00216 {
00217 gen_dmat_dmat_mult_ft<MatrixA, MatrixB, MatrixC, Assign, Backup>()(A, B, C);
00218 }
00219 };
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258 #ifndef MTL_DMAT_DMAT_MULT_TILING1
00259 # define MTL_DMAT_DMAT_MULT_TILING1 2
00260 #endif
00261
00262 #ifndef MTL_DMAT_DMAT_MULT_TILING2
00263 # define MTL_DMAT_DMAT_MULT_TILING2 4
00264 #endif
00265
00266 #ifndef MTL_DMAT_DMAT_MULT_INNER_UNROLL
00267 # define MTL_DMAT_DMAT_MULT_INNER_UNROLL 8
00268 #endif
00269
00270
00271 template <unsigned long Index0, unsigned long Max0, unsigned long Index1, unsigned long Max1, typename Assign>
00272 struct gen_tiling_dmat_dmat_mult_block
00273 : public meta_math::loop2<Index0, Max0, Index1, Max1>
00274 {
00275 typedef meta_math::loop2<Index0, Max0, Index1, Max1> base;
00276 typedef gen_tiling_dmat_dmat_mult_block<base::next_index0, Max0, base::next_index1, Max1, Assign> next_t;
00277
00278 template <typename Value, typename ValueA, typename SizeA, typename ValueB, typename SizeB>
00279 static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04,
00280 Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09,
00281 Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,
00282 ValueA *begin_a, SizeA& ari, ValueB *begin_b, SizeB& bci)
00283 {
00284 tmp00+= begin_a[ base::index0 * ari ] * begin_b[ base::index1 * bci ];
00285 next_t::apply(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09,
00286 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00,
00287 begin_a, ari, begin_b, bci);
00288 }
00289
00290 template <typename Value, typename MatrixC, typename SizeC>
00291 static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04,
00292 Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09,
00293 Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,
00294 MatrixC& C, SizeC i, SizeC k)
00295 {
00296 Assign::update(C(i + base::index0, k + base::index1), tmp00);
00297 next_t::update(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09,
00298 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00,
00299 C, i, k);
00300 }
00301 };
00302
00303 template <unsigned long Max0, unsigned long Max1, typename Assign>
00304 struct gen_tiling_dmat_dmat_mult_block<Max0, Max0, Max1, Max1, Assign>
00305 : public meta_math::loop2<Max0, Max0, Max1, Max1>
00306 {
00307 typedef meta_math::loop2<Max0, Max0, Max1, Max1> base;
00308
00309 template <typename Value, typename ValueA, typename SizeA, typename ValueB, typename SizeB>
00310 static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04,
00311 Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09,
00312 Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,
00313 ValueA *begin_a, SizeA& ari, ValueB *begin_b, SizeB& bci)
00314 {
00315 tmp00+= begin_a[ base::index0 * ari ] * begin_b[ base::index1 * bci ];
00316 }
00317
00318 template <typename Value, typename MatrixC, typename SizeC>
00319 static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04,
00320 Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09,
00321 Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,
00322 MatrixC& C, SizeC i, SizeC k)
00323 {
00324 Assign::update(C(i + base::index0, k + base::index1), tmp00);
00325 }
00326 };
00327
00328
00329 template <typename MatrixA, typename MatrixB, typename MatrixC,
00330 unsigned long Tiling1= MTL_DMAT_DMAT_MULT_TILING1,
00331 unsigned long Tiling2= MTL_DMAT_DMAT_MULT_TILING2,
00332 typename Assign= assign::assign_sum,
00333 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00334 struct gen_tiling_dmat_dmat_mult_ft
00335 {
00336 BOOST_STATIC_ASSERT((Tiling1 * Tiling2 <= 16));
00337
00338 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00339 {
00340 apply(A, B, C, typename traits::category<MatrixA>::type(),
00341 typename traits::category<MatrixB>::type());
00342 }
00343
00344 private:
00345 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::universe, tag::universe)
00346 {
00347 Backup()(A, B, C);
00348 }
00349
00350 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::has_2D_layout, tag::has_2D_layout)
00351 {
00352
00353 if (num_rows(A) < 2 || num_cols(A) < 2 || num_cols(B) < 2) {
00354 Backup()(A, B, C);
00355 return;
00356 }
00357
00358
00359 if (Assign::init_to_zero) set_to_zero(C);
00360
00361 typedef gen_tiling_dmat_dmat_mult_block<1, Tiling1, 1, Tiling2, Assign> block;
00362 typedef typename MatrixC::size_type size_type;
00363 typedef typename MatrixC::value_type value_type;
00364 const value_type z= math::zero(C[0][0]);
00365
00366 size_type i_max= num_rows(C), i_block= Tiling1 * (i_max / Tiling1),
00367 k_max= num_cols(C), k_block= Tiling2 * (k_max / Tiling2);
00368 size_t ari= &A(1, 0) - &A(0, 0),
00369 aci= &A(0, 1) - &A(0, 0), bri= &B(1, 0) - &B(0, 0), bci= &B(0, 1) - &B(0, 0);
00370
00371
00372 for (size_type i= 0; i < i_block; i+= Tiling1)
00373 for (size_type k= 0; k < k_block; k+= Tiling2) {
00374
00375 value_type tmp00= z, tmp01= z, tmp02= z, tmp03= z, tmp04= z,
00376 tmp05= z, tmp06= z, tmp07= z, tmp08= z, tmp09= z,
00377 tmp10= z, tmp11= z, tmp12= z, tmp13= z, tmp14= z, tmp15= z;
00378 const typename MatrixA::value_type *begin_a= &A(i, 0), *end_a= begin_a + num_cols(A) * aci;
00379 const typename MatrixB::value_type *begin_b= &B(0, k);
00380
00381 for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)
00382 block::apply(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09,
00383 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15,
00384 begin_a, ari, begin_b, bci);
00385 block::update(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09,
00386 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15,
00387 C, i, k);
00388 }
00389
00390
00391 for (size_type i= 0; i < i_block; i++)
00392 for (size_type k = k_block; k < k_max; k++) {
00393 value_type tmp00= z;
00394 const typename MatrixA::value_type *begin_a= &A(i, 0), *end_a= begin_a + num_cols(A) * aci;
00395 const typename MatrixB::value_type *begin_b= &B(0, k);
00396
00397 for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)
00398 tmp00 += *begin_a * *begin_b;
00399 Assign::update(C(i, k), tmp00);
00400 }
00401
00402
00403 for (size_type i= i_block; i < i_max; i++)
00404 for (size_type k = 0; k < k_max; k++) {
00405 value_type tmp00= z;
00406 const typename MatrixA::value_type *begin_a= &A(i, 0), *end_a= begin_a + num_cols(A) * aci;
00407 const typename MatrixB::value_type *begin_b= &B(0, k);
00408
00409 for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)
00410 tmp00 += *begin_a * *begin_b;
00411 Assign::update(C(i, k), tmp00);
00412 }
00413 }
00414 };
00415
00416 template <unsigned long Tiling1= MTL_DMAT_DMAT_MULT_TILING1,
00417 unsigned long Tiling2= MTL_DMAT_DMAT_MULT_TILING2,
00418 typename Assign= assign::assign_sum,
00419 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00420 struct gen_tiling_dmat_dmat_mult_t
00421 {
00422 template <typename MatrixA, typename MatrixB, typename MatrixC>
00423 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00424 {
00425 gen_tiling_dmat_dmat_mult_ft<
00426 MatrixA, MatrixB, MatrixC, Tiling1, Tiling2, Assign, Backup
00427 >()(A, B, C);
00428 }
00429 };
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439 template <typename MatrixA, typename MatrixB, typename MatrixC,
00440 typename Assign= assign::assign_sum,
00441 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00442 struct gen_tiling_44_dmat_dmat_mult_ft
00443 {
00444 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00445 {
00446 apply(A, B, C, typename traits::category<MatrixA>::type(),
00447 typename traits::category<MatrixB>::type());
00448 }
00449
00450 private:
00451 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::universe, tag::universe)
00452 {
00453 Backup()(A, B, C);
00454 }
00455
00456 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::has_2D_layout, tag::has_2D_layout)
00457 {
00458
00459 if (num_rows(A) < 2 || num_cols(A) < 2 || num_cols(B) < 2) {
00460 Backup()(A, B, C);
00461 return;
00462 }
00463
00464
00465 if (Assign::init_to_zero) set_to_zero(C);
00466
00467 typedef typename MatrixC::size_type size_type;
00468 typedef typename MatrixC::value_type value_type;
00469
00470 const size_type Tiling1= 4, Tiling2= 4;
00471 const value_type z= math::zero(C[0][0]);
00472
00473 size_type i_max= num_rows(C), i_block= Tiling1 * (i_max / Tiling1),
00474 k_max= num_cols(C), k_block= Tiling2 * (k_max / Tiling2);
00475 size_t ari= &A(1, 0) - &A(0, 0),
00476 aci= &A(0, 1) - &A(0, 0), bri= &B(1, 0) - &B(0, 0), bci= &B(0, 1) - &B(0, 0);
00477
00478
00479 for (size_type i= 0; i < i_block; i+= Tiling1)
00480 for (size_type k= 0; k < k_block; k+= Tiling2) {
00481
00482 value_type tmp00= z, tmp01= z, tmp02= z, tmp03= z, tmp04= z,
00483 tmp05= z, tmp06= z, tmp07= z, tmp08= z, tmp09= z,
00484 tmp10= z, tmp11= z, tmp12= z, tmp13= z, tmp14= z, tmp15= z;
00485 const typename MatrixA::value_type *begin_a= &A(i, 0), *end_a= begin_a + num_cols(A) * aci;
00486 const typename MatrixB::value_type *begin_b= &B(0, k);
00487
00488 for (; begin_a != end_a; begin_a+= aci, begin_b+= bri) {
00489 tmp00+= begin_a[ 0 * ari ] * begin_b[ 0 * bci ];
00490 tmp01+= begin_a[ 0 * ari ] * begin_b[ 1 * bci ];
00491 tmp02+= begin_a[ 0 * ari ] * begin_b[ 2 * bci ];
00492 tmp03+= begin_a[ 0 * ari ] * begin_b[ 3 * bci ];
00493 tmp04+= begin_a[ 1 * ari ] * begin_b[ 0 * bci ];
00494 tmp05+= begin_a[ 1 * ari ] * begin_b[ 1 * bci ];
00495 tmp06+= begin_a[ 1 * ari ] * begin_b[ 2 * bci ];
00496 tmp07+= begin_a[ 1 * ari ] * begin_b[ 3 * bci ];
00497 tmp08+= begin_a[ 2 * ari ] * begin_b[ 0 * bci ];
00498 tmp09+= begin_a[ 2 * ari ] * begin_b[ 1 * bci ];
00499 tmp10+= begin_a[ 2 * ari ] * begin_b[ 2 * bci ];
00500 tmp11+= begin_a[ 2 * ari ] * begin_b[ 3 * bci ];
00501 tmp12+= begin_a[ 3 * ari ] * begin_b[ 0 * bci ];
00502 tmp13+= begin_a[ 3 * ari ] * begin_b[ 1 * bci ];
00503 tmp14+= begin_a[ 3 * ari ] * begin_b[ 2 * bci ];
00504 tmp15+= begin_a[ 3 * ari ] * begin_b[ 3 * bci ];
00505 }
00506 Assign::update(C(i + 0, k + 0), tmp00);
00507 Assign::update(C(i + 0, k + 1), tmp01);
00508 Assign::update(C(i + 0, k + 2), tmp02);
00509 Assign::update(C(i + 0, k + 3), tmp03);
00510 Assign::update(C(i + 1, k + 0), tmp04);
00511 Assign::update(C(i + 1, k + 1), tmp05);
00512 Assign::update(C(i + 1, k + 2), tmp06);
00513 Assign::update(C(i + 1, k + 3), tmp07);
00514 Assign::update(C(i + 2, k + 0), tmp08);
00515 Assign::update(C(i + 2, k + 1), tmp09);
00516 Assign::update(C(i + 2, k + 2), tmp10);
00517 Assign::update(C(i + 2, k + 3), tmp11);
00518 Assign::update(C(i + 3, k + 0), tmp12);
00519 Assign::update(C(i + 3, k + 1), tmp13);
00520 Assign::update(C(i + 3, k + 2), tmp14);
00521 Assign::update(C(i + 3, k + 3), tmp15);
00522 }
00523
00524
00525 for (size_type i= 0; i < i_block; i++)
00526 for (size_type k = k_block; k < k_max; k++) {
00527 value_type tmp00= z;
00528 const typename MatrixA::value_type *begin_a= &A(i, 0), *end_a= begin_a + num_cols(A) * aci;
00529 const typename MatrixB::value_type *begin_b= &B(0, k);
00530
00531 for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)
00532 tmp00 += *begin_a * *begin_b;
00533 Assign::update(C(i, k), tmp00);
00534 }
00535
00536
00537 for (size_type i= i_block; i < i_max; i++)
00538 for (size_type k = 0; k < k_max; k++) {
00539 value_type tmp00= z;
00540 const typename MatrixA::value_type *begin_a= &A(i, 0), *end_a= begin_a + num_cols(A) * aci;
00541 const typename MatrixB::value_type *begin_b= &B(0, k);
00542
00543 for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)
00544 tmp00 += *begin_a * *begin_b;
00545 Assign::update(C(i, k), tmp00);
00546 }
00547 }
00548 };
00549
00550 template <typename Assign= assign::assign_sum,
00551 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00552 struct gen_tiling_44_dmat_dmat_mult_t
00553 {
00554 template <typename MatrixA, typename MatrixB, typename MatrixC>
00555 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00556 {
00557 gen_tiling_44_dmat_dmat_mult_ft<
00558 MatrixA, MatrixB, MatrixC, Assign, Backup
00559 >()(A, B, C);
00560 }
00561 };
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572 template <typename MatrixA, typename MatrixB, typename MatrixC,
00573 typename Assign= assign::assign_sum,
00574 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00575 struct gen_tiling_22_dmat_dmat_mult_ft
00576 {
00577 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00578 {
00579 apply(A, B, C, typename traits::category<MatrixA>::type(),
00580 typename traits::category<MatrixB>::type());
00581 }
00582
00583 private:
00584 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::universe, tag::universe)
00585 {
00586 Backup()(A, B, C);
00587 }
00588
00589 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::has_2D_layout, tag::has_2D_layout)
00590 {
00591
00592 if (num_rows(A) < 2 || num_cols(A) < 2 || num_cols(B) < 2) {
00593 Backup()(A, B, C);
00594 return;
00595 }
00596
00597
00598 if (Assign::init_to_zero) set_to_zero(C);
00599
00600 typedef typename MatrixC::size_type size_type;
00601 typedef typename MatrixC::value_type value_type;
00602
00603 const size_type Tiling1= 2, Tiling2= 2;
00604 const value_type z= math::zero(C[0][0]);
00605
00606 size_type i_max= num_rows(C), i_block= Tiling1 * (i_max / Tiling1),
00607 k_max= num_cols(C), k_block= Tiling2 * (k_max / Tiling2);
00608 size_t ari= &A(1, 0) - &A(0, 0),
00609 aci= &A(0, 1) - &A(0, 0), bri= &B(1, 0) - &B(0, 0), bci= &B(0, 1) - &B(0, 0);
00610
00611
00612 for (size_type i= 0; i < i_block; i+= Tiling1)
00613 for (size_type k= 0; k < k_block; k+= Tiling2) {
00614
00615 value_type tmp00= z, tmp01= z, tmp02= z, tmp03= z;
00616 const typename MatrixA::value_type *begin_a= &A(i, 0), *end_a= begin_a + num_cols(A) * aci;
00617 const typename MatrixB::value_type *begin_b= &B(0, k);
00618
00619 for (; begin_a != end_a; begin_a+= aci, begin_b+= bri) {
00620 tmp00+= begin_a[ 0 ] * begin_b[ 0 ];
00621 tmp01+= begin_a[ 0 ] * begin_b[bci];
00622 tmp02+= begin_a[ari] * begin_b[ 0 ];
00623 tmp03+= begin_a[ari] * begin_b[bci];
00624 }
00625 Assign::update(C(i + 0, k + 0), tmp00);
00626 Assign::update(C(i + 0, k + 1), tmp01);
00627 Assign::update(C(i + 1, k + 0), tmp02);
00628 Assign::update(C(i + 1, k + 1), tmp03);
00629 }
00630
00631
00632 for (size_type i= 0; i < i_block; i++)
00633 for (size_type k = k_block; k < k_max; k++) {
00634 value_type tmp00= z;
00635 const typename MatrixA::value_type *begin_a= &A(i, 0), *end_a= begin_a + num_cols(A) * aci;
00636 const typename MatrixB::value_type *begin_b= &B(0, k);
00637
00638 for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)
00639 tmp00 += *begin_a * *begin_b;
00640 Assign::update(C(i, k), tmp00);
00641 }
00642
00643
00644 for (size_type i= i_block; i < i_max; i++)
00645 for (size_type k = 0; k < k_max; k++) {
00646 value_type tmp00= z;
00647 const typename MatrixA::value_type *begin_a= &A(i, 0), *end_a= begin_a + num_cols(A) * aci;
00648 const typename MatrixB::value_type *begin_b= &B(0, k);
00649
00650 for (; begin_a != end_a; begin_a+= aci, begin_b+= bri)
00651 tmp00 += *begin_a * *begin_b;
00652 Assign::update(C(i, k), tmp00);
00653 }
00654 }
00655 };
00656
00657 template <typename Assign= assign::assign_sum,
00658 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00659 struct gen_tiling_22_dmat_dmat_mult_t
00660 {
00661 template <typename MatrixA, typename MatrixB, typename MatrixC>
00662 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00663 {
00664 gen_tiling_22_dmat_dmat_mult_ft<
00665 MatrixA, MatrixB, MatrixC, Assign, Backup
00666 >()(A, B, C);
00667 }
00668 };
00669
00670
00671
00672
00673
00674
00675
00676
00677 namespace wrec {
00678
00679 template <typename BaseMult, typename BaseTest= recursion::bound_test_static<64> >
00680 struct gen_dmat_dmat_mult_t
00681 {
00682 template <typename RecA, typename RecB, typename RecC>
00683 void operator()(RecA const& rec_a, RecB const& rec_b, RecC& rec_c)
00684 {
00685
00686 using namespace recursion;
00687 if (is_empty(rec_a) || is_empty(rec_b) || is_empty(rec_c))
00688 return;
00689
00690 if (BaseTest()(rec_a)) {
00691 typename base_case_matrix<typename RecC::matrix_type, BaseTest>::type
00692 C= base_case_cast<BaseTest>(*rec_c);
00693 BaseMult()(base_case_cast<BaseTest>(*rec_a),
00694 base_case_cast<BaseTest>(*rec_b), C);
00695 } else {
00696 RecC c_north_west= north_west(rec_c), c_north_east= north_east(rec_c),
00697 c_south_west= south_west(rec_c), c_south_east= south_east(rec_c);
00698
00699 (*this)(north_west(rec_a), north_west(rec_b), c_north_west);
00700 (*this)(north_west(rec_a), north_east(rec_b), c_north_east);
00701 (*this)(south_west(rec_a), north_east(rec_b), c_south_east);
00702 (*this)(south_west(rec_a), north_west(rec_b), c_south_west);
00703 (*this)(south_east(rec_a), south_west(rec_b), c_south_west);
00704 (*this)(south_east(rec_a), south_east(rec_b), c_south_east);
00705 (*this)(north_east(rec_a), south_east(rec_b), c_north_east);
00706 (*this)(north_east(rec_a), south_west(rec_b), c_north_west);
00707 }
00708 }
00709 };
00710
00711 }
00712
00713
00714 template <typename BaseMult,
00715 typename BaseTest= recursion::bound_test_static<64>,
00716 typename Assign= assign::assign_sum,
00717 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00718 struct gen_recursive_dmat_dmat_mult_t
00719 {
00720 template <typename MatrixA, typename MatrixB, typename MatrixC>
00721 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00722 {
00723 apply(A, B, C, typename traits::category<MatrixA>::type(),
00724 typename traits::category<MatrixB>::type(),
00725 typename traits::category<MatrixC>::type());
00726 }
00727
00728 private:
00729
00730 template <typename MatrixA, typename MatrixB, typename MatrixC>
00731 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, tag::universe, tag::universe, tag::universe)
00732 {
00733 Backup()(A, B, C);
00734 }
00735
00736
00737 template <typename MatrixA, typename MatrixB, typename MatrixC>
00738 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C,
00739 tag::qsub_dividable, tag::qsub_dividable, tag::qsub_dividable)
00740 {
00741
00742 if (Assign::init_to_zero) set_to_zero(C);
00743
00744
00745
00746
00747 using matrix::recursator;
00748 recursator<MatrixA> rec_a(A);
00749 recursator<MatrixB> rec_b(B);
00750 recursator<MatrixC> rec_c(C);
00751 equalize_depth(rec_a, rec_b, rec_c);
00752
00753 wrec::gen_dmat_dmat_mult_t<BaseMult, BaseTest>() (rec_a, rec_b, rec_c);
00754 }
00755 };
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766 template <typename MatrixA, typename MatrixB, typename MatrixC, typename Assign= assign::assign_sum,
00767 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00768 struct gen_platform_dmat_dmat_mult_ft
00769 : public Backup
00770 {};
00771
00772
00773 template <typename Assign= assign::assign_sum,
00774 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00775 struct gen_platform_dmat_dmat_mult_t
00776 {
00777 template <typename MatrixA, typename MatrixB, typename MatrixC>
00778 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00779 {
00780 gen_platform_dmat_dmat_mult_ft<MatrixA, MatrixB, MatrixC, Assign, Backup>()(A, B, C);
00781 }
00782 };
00783
00784
00785
00786
00787
00788
00789
00790 template <typename MatrixA, typename MatrixB, typename MatrixC, typename Assign= assign::assign_sum,
00791 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00792 struct gen_blas_dmat_dmat_mult_ft
00793 : public Backup
00794 {};
00795
00796
00797 #ifdef MTL_HAS_BLAS
00798
00799 namespace detail {
00800
00801
00802 double inline dgemm_alpha(assign::assign_sum) { return 1.0; }
00803 double inline dgemm_alpha(assign::plus_sum) { return 1.0; }
00804 double inline dgemm_alpha(assign::minus_sum) { return -1.0; }
00805
00806
00807 double inline dgemm_beta(assign::assign_sum) { return 0.0; }
00808 double inline dgemm_beta(assign::plus_sum) { return 1.0; }
00809 double inline dgemm_beta(assign::minus_sum) { return 1.0; }
00810
00811 template <typename Value, typename ParaA, typename ParaB, typename ParaC, typename Function, typename Assign>
00812 void inline xgemm(const dense2D<Value, ParaA>& A, const dense2D<Value, ParaB>& B,
00813 dense2D<Value, ParaC>& C, Function f, Assign)
00814 {
00815
00816 int m= num_rows(A), n= num_cols(B), k= num_cols(A), lda= A.get_ldim(), ldb= B.get_ldim(), ldc= C.get_ldim();
00817 Value alpha= dgemm_alpha(Assign()), beta= dgemm_beta(Assign());
00818 char a_trans= traits::is_row_major<ParaA>::value ? 'T' : 'N',
00819 b_trans= traits::is_row_major<ParaB>::value ? 'T' : 'N';
00820
00821 if (traits::is_row_major<ParaC>::value) {
00822
00823 a_trans= 'T' + 'N' - a_trans; b_trans= 'T' + 'N' - b_trans;
00824 f(&b_trans, &a_trans, &n , &m , &k ,
00825 &alpha, &B[0][0], &ldb, &A[0][0], &lda, &beta, &C[0][0], &ldc);
00826 } else
00827 f(&a_trans, &b_trans, &m, &n, &k, &alpha, &A[0][0], &lda, &B[0][0], &ldb, &beta, &C[0][0], &ldc);
00828 }
00829
00830 }
00831
00832 template<typename ParaA, typename ParaB, typename ParaC, typename Assign, typename Backup>
00833 struct gen_blas_dmat_dmat_mult_ft<dense2D<float, ParaA>, dense2D<float, ParaB>,
00834 dense2D<float, ParaC>, Assign, Backup>
00835 {
00836 void operator()(const dense2D<float, ParaA>& A, const dense2D<float, ParaB>& B,
00837 dense2D<float, ParaC>& C)
00838 {
00839 detail::xgemm(A, B, C, MTL_BLAS_NAME(sgemm), Assign());
00840 }
00841 };
00842
00843 template<typename ParaA, typename ParaB, typename ParaC, typename Assign, typename Backup>
00844 struct gen_blas_dmat_dmat_mult_ft<dense2D<double, ParaA>, dense2D<double, ParaB>,
00845 dense2D<double, ParaC>, Assign, Backup>
00846 {
00847 void operator()(const dense2D<double, ParaA>& A, const dense2D<double, ParaB>& B,
00848 dense2D<double, ParaC>& C)
00849 {
00850 detail::xgemm(A, B, C, MTL_BLAS_NAME(dgemm), Assign());
00851 }
00852 };
00853
00854
00855 template<typename ParaA, typename ParaB, typename ParaC, typename Assign, typename Backup>
00856 struct gen_blas_dmat_dmat_mult_ft<dense2D<std::complex<float>, ParaA>, dense2D<std::complex<float>, ParaB>,
00857 dense2D<std::complex<float>, ParaC>, Assign, Backup>
00858 {
00859 void operator()(const dense2D<std::complex<float>, ParaA>& A, const dense2D<std::complex<float>, ParaB>& B,
00860 dense2D<std::complex<float>, ParaC>& C)
00861 {
00862 detail::xgemm(A, B, C, MTL_BLAS_NAME(cgemm), Assign());
00863 }
00864 };
00865
00866
00867 template<typename ParaA, typename ParaB, typename ParaC, typename Assign, typename Backup>
00868 struct gen_blas_dmat_dmat_mult_ft<dense2D<std::complex<double>, ParaA>, dense2D<std::complex<double>, ParaB>,
00869 dense2D<std::complex<double>, ParaC>, Assign, Backup>
00870 {
00871 void operator()(const dense2D<std::complex<double>, ParaA>& A, const dense2D<std::complex<double>, ParaB>& B,
00872 dense2D<std::complex<double>, ParaC>& C)
00873 {
00874 detail::xgemm(A, B, C, MTL_BLAS_NAME(zgemm), Assign());
00875 }
00876 };
00877
00878
00879
00880 #endif // MTL_HAS_BLAS
00881
00882 template <typename Assign= assign::assign_sum,
00883 typename Backup= gen_dmat_dmat_mult_t<Assign> >
00884 struct gen_blas_dmat_dmat_mult_t
00885 : public Backup
00886 {
00887 template <typename MatrixA, typename MatrixB, typename MatrixC>
00888 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00889 {
00890 gen_blas_dmat_dmat_mult_ft<MatrixA, MatrixB, MatrixC, Assign, Backup>()(A, B, C);
00891 }
00892 };
00893
00894
00895
00896
00897
00898
00899 template <std::size_t SizeLimit, typename FunctorSmall, typename FunctorLarge>
00900 struct size_switch_dmat_dmat_mult_t
00901 {
00902 template <typename MatrixA, typename MatrixB, typename MatrixC>
00903 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00904 {
00905 const bool all_static= traits::is_static<MatrixA>::value && traits::is_static<MatrixB>::value
00906 && traits::is_static<MatrixC>::value;
00907 apply(A, B, C, boost::mpl::bool_<all_static>());
00908 }
00909
00910 private:
00911
00912 template <typename MatrixA, typename MatrixB, typename MatrixC>
00913 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, boost::mpl::false_)
00914 {
00915 if (size(A) <= SizeLimit || size(B) <= SizeLimit || size(C) <= SizeLimit)
00916 FunctorSmall()(A, B, C);
00917 else
00918 FunctorLarge()(A, B, C);
00919 }
00920
00921
00922 template <typename MatrixA, typename MatrixB, typename MatrixC>
00923 void apply(MatrixA const& A, MatrixB const& B, MatrixC& C, boost::mpl::true_)
00924 {
00925 const bool is_small= mtl::static_size<MatrixA>::value <= SizeLimit || mtl::static_size<MatrixB>::value <= SizeLimit
00926 || mtl::static_size<MatrixC>::value <= SizeLimit;
00927 typename boost::mpl::if_c<is_small, FunctorSmall, FunctorLarge>::type()(A, B, C);
00928 }
00929 };
00930
00931
00932
00933
00934
00935
00936 template <bool IsStatic, typename FunctorStatic, typename FunctorDynamic>
00937 struct static_switch_dmat_dmat_mult_t
00938 {
00939 template <typename MatrixA, typename MatrixB, typename MatrixC>
00940 void operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
00941 {
00942 typename boost::mpl::if_c<IsStatic, FunctorStatic, FunctorDynamic>::type()(A, B, C);
00943 }
00944 };
00945
00946
00947
00948
00949
00950 template <std::size_t Index0, std::size_t Max0, std::size_t Index1, std::size_t Max1, typename Assign>
00951 struct fully_unroll_dmat_dmat_mult_init_block
00952 : public meta_math::loop2<Index0, Max0, Index1, Max1>
00953 {
00954 typedef meta_math::loop2<Index0, Max0, Index1, Max1> base;
00955 typedef fully_unroll_dmat_dmat_mult_init_block<base::next_index0, Max0, base::next_index1, Max1, Assign> next_t;
00956
00957 template <typename MatrixA, typename MatrixB, typename MatrixC>
00958 static inline void apply(MatrixA const& A, MatrixB const& B, MatrixC& C)
00959 {
00960 Assign::first_update(C[base::index0][base::index1], A[base::index0][0] * B[0][base::index1]);
00961 next_t::apply(A, B, C);
00962 }
00963 };
00964
00965 template <std::size_t Max0, std::size_t Max1, typename Assign>
00966 struct fully_unroll_dmat_dmat_mult_init_block<Max0, Max0, Max1, Max1, Assign>
00967 : public meta_math::loop2<Max0, Max0, Max1, Max1>
00968 {
00969 typedef meta_math::loop2<Max0, Max0, Max1, Max1> base;
00970
00971 template <typename MatrixA, typename MatrixB, typename MatrixC>
00972 static inline void apply(MatrixA const& A, MatrixB const& B, MatrixC& C)
00973 {
00974 Assign::first_update(C[base::index0][base::index1], A[base::index0][0] * B[0][base::index1]);
00975 }
00976 };
00977
00978
00979 template <std::size_t Index0, std::size_t Max0, std::size_t Index1, std::size_t Max1,
00980 std::size_t Index2, std::size_t Max2, typename Assign>
00981 struct fully_unroll_dmat_dmat_mult_block
00982 : public meta_math::loop3<Index0, Max0, Index1, Max1, Index2, Max2>
00983 {
00984 typedef meta_math::loop3<Index0, Max0, Index1, Max1, Index2, Max2> base;
00985 typedef fully_unroll_dmat_dmat_mult_block<base::next_index0, Max0, base::next_index1, Max1, base::next_index2, Max2, Assign> next_t;
00986
00987 template <typename MatrixA, typename MatrixB, typename MatrixC>
00988 static inline void apply(MatrixA const& A, MatrixB const& B, MatrixC& C)
00989 {
00990 Assign::update(C[base::index0][base::index1], A[base::index0][base::index2] * B[base::index2][base::index1]);
00991 next_t::apply(A, B, C);
00992 }
00993 };
00994
00995 template <std::size_t Max0, std::size_t Max1, std::size_t Max2, typename Assign>
00996 struct fully_unroll_dmat_dmat_mult_block<Max0, Max0, Max1, Max1, Max2, Max2, Assign>
00997 : public meta_math::loop3<Max0, Max0, Max1, Max1, Max2, Max2>
00998 {
00999 typedef meta_math::loop3<Max0, Max0, Max1, Max1, Max2, Max2> base;
01000
01001 template <typename MatrixA, typename MatrixB, typename MatrixC>
01002 static inline void apply(MatrixA const& A, MatrixB const& B, MatrixC& C)
01003 {
01004 Assign::update(C[base::index0][base::index1], A[base::index0][base::index2] * B[base::index2][base::index1]);
01005 }
01006 };
01007
01008 template <typename Assign= assign::assign_sum,
01009 typename Backup= gen_dmat_dmat_mult_t<Assign> >
01010 struct fully_unroll_fixes_size_dmat_dmat_mult_t
01011 {
01012
01013 template <typename MatrixA, typename MatrixB, typename MatrixC>
01014 typename boost::enable_if_c<static_size<MatrixC>::value == 0>::type
01015 operator()(MatrixA const& A, MatrixB const& B, MatrixC& C) {}
01016
01017
01018 template <typename MatrixA, typename MatrixB, typename MatrixC>
01019 typename boost::enable_if_c<static_num_cols<MatrixA>::value == 0 && static_size<MatrixC>::value != 0>::type
01020 operator()(MatrixA const& A, MatrixB const& B, MatrixC& C) { if (Assign::init_to_zero) set_to_zero(C); }
01021
01022 struct noop
01023 {
01024 template <typename MatrixA, typename MatrixB, typename MatrixC>
01025 static inline void apply(MatrixA const&, MatrixB const&, MatrixC&) {}
01026 };
01027
01028
01029 template <typename MatrixA, typename MatrixB, typename MatrixC>
01030 typename boost::enable_if_c<static_size<MatrixA>::value != 0 && static_size<MatrixC>::value != 0>::type
01031 operator()(MatrixA const& A, MatrixB const& B, MatrixC& C)
01032 {
01033 typedef typename static_num_rows<MatrixC>::type size_type;
01034 static const size_type rows_c= static_num_rows<MatrixC>::value, cols_c= static_num_cols<MatrixC>::value,
01035 cols_a= static_num_cols<MatrixA>::value;
01036
01037 fully_unroll_dmat_dmat_mult_init_block<1, rows_c, 1, cols_c, Assign>::apply(A, B, C);
01038
01039
01040 typedef fully_unroll_dmat_dmat_mult_block<1, rows_c, 1, cols_c, 2, cols_a, Assign> f2;
01041 typedef typename boost::mpl::if_c<(cols_a > 1), f2, noop>::type f3;
01042 f3::apply(A, B, C);
01043 }
01044 };
01045
01046
01047 }
01048
01049 #endif // MTL_DMAT_DMAT_MULT_INCLUDE
01050
01051
01052 #include <boost/numeric/mtl/operation/opteron/matrix_mult.hpp>
01053