00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef MTL_SMAT_DMAT_MULT_INCLUDE
00013 #define MTL_SMAT_DMAT_MULT_INCLUDE
00014
00015 #include <boost/numeric/mtl/operation/set_to_zero.hpp>
00016 #include <boost/numeric/mtl/utility/range_generator.hpp>
00017 #include <boost/numeric/mtl/concept/collection.hpp>
00018 #include <boost/numeric/mtl/utility/tag.hpp>
00019 #include <boost/numeric/mtl/utility/category.hpp>
00020 #include <boost/numeric/meta_math/loop1.hpp>
00021
00022 namespace mtl { namespace functor {
00023
00024 template <typename Assign= assign::assign_sum,
00025 typename Backup= no_op>
00026 struct gen_smat_dmat_mult
00027 {
00028 template <typename MatrixA, typename MatrixB, typename MatrixC>
00029 void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)
00030 {
00031 apply(a, b, c, typename OrientedCollection<MatrixA>::orientation());
00032 }
00033
00034 private:
00035 template <typename MatrixA, typename MatrixB, typename MatrixC>
00036 void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::row_major)
00037 {
00038 using namespace tag;
00039 using traits::range_generator;
00040 typedef typename range_generator<row, MatrixA>::type a_cur_type;
00041 typedef typename range_generator<row, MatrixC>::type c_cur_type;
00042 typedef typename range_generator<col, MatrixB>::type b_cur_type;
00043
00044 typedef typename range_generator<nz, a_cur_type>::type a_icur_type;
00045 typedef typename range_generator<all, b_cur_type>::type b_icur_type;
00046 typedef typename range_generator<iter::all, c_cur_type>::type c_icur_type;
00047
00048 typename traits::col<MatrixA>::type col_a(a);
00049 typename traits::const_value<MatrixA>::type value_a(a);
00050 typename traits::const_value<MatrixB>::type value_b(b);
00051
00052 if (Assign::init_to_zero) set_to_zero(c);
00053
00054 a_cur_type ac= begin<row>(a), aend= end<row>(a);
00055 for (c_cur_type cc= begin<row>(c); ac != aend; ++ac, ++cc) {
00056
00057 b_cur_type bc= begin<col>(b), bend= end<col>(b);
00058 for (c_icur_type cic= begin<iter::all>(cc); bc != bend; ++bc, ++cic) {
00059
00060 typename MatrixC::value_type c_tmp(*cic);
00061 for (a_icur_type aic= begin<nz>(ac), aiend= end<nz>(ac); aic != aiend; ++aic) {
00062
00063 typename Collection<MatrixA>::size_type ca= col_a(*aic);
00064
00065 b_icur_type bic= begin<all>(bc);
00066 bic+= ca;
00067 Assign::update(c_tmp, value_a(*aic) * value_b(*bic));
00068 }
00069 *cic= c_tmp;
00070 }
00071 }
00072 }
00073
00074
00075 template <typename MatrixA, typename MatrixB, typename MatrixC>
00076 void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::col_major)
00077 {
00078 using namespace tag;
00079 using traits::range_generator;
00080 typedef typename range_generator<col, MatrixA>::type a_cur_type;
00081 typedef typename range_generator<nz, a_cur_type>::type a_icur_type;
00082
00083 typename traits::row<MatrixA>::type row_a(a);
00084 typename traits::const_value<MatrixA>::type value_a(a);
00085
00086 if (Assign::init_to_zero) set_to_zero(c);
00087
00088 unsigned rb= 0;
00089 for (a_cur_type ac= begin<col>(a), aend= end<col>(a); ac != aend; ++ac, ++rb)
00090 for (a_icur_type aic= begin<nz>(ac), aiend= end<nz>(ac); aic != aiend; ++aic) {
00091 typename Collection<MatrixA>::size_type ra= row_a(*aic);
00092 typename Collection<MatrixA>::value_type va= value_a(*aic);
00093
00094 for (unsigned cb= 0; cb < num_cols(b); ++cb)
00095 Assign::update(c(ra, cb), va * b(rb, cb));
00096 }
00097 }
00098 };
00099
00100
00101
00102
00103
00104
00105
00106
00107 #ifndef MTL_SMAT_DMAT_MULT_TILING1
00108 # define MTL_SMAT_DMAT_MULT_TILING1 8
00109 #endif
00110
00111 template <unsigned long Index0, unsigned long Max0, typename Assign>
00112 struct gen_tiling_smat_dmat_mult_block
00113 : public meta_math::loop1<Index0, Max0>
00114 {
00115 typedef meta_math::loop1<Index0, Max0> base;
00116 typedef gen_tiling_smat_dmat_mult_block<base::next_index0, Max0, Assign> next_t;
00117
00118 template <typename Value, typename ValueA, typename ValueB, typename Size>
00119 static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04,
00120 Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09,
00121 Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,
00122 const ValueA& va, ValueB *begin_b, const Size& bci)
00123 {
00124 tmp00+= va * *(begin_b + base::index0 * bci);
00125 next_t::apply(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09,
00126 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00,
00127 va, begin_b, bci);
00128 }
00129
00130 template <typename Value, typename MatrixC, typename SizeC>
00131 static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04,
00132 Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09,
00133 Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,
00134 MatrixC& c, SizeC i, SizeC k)
00135 {
00136 Assign::update(c(i, k + base::index0), tmp00);
00137 next_t::update(tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09,
00138 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp00,
00139 c, i, k);
00140 }
00141 };
00142
00143
00144 template <unsigned long Max0, typename Assign>
00145 struct gen_tiling_smat_dmat_mult_block<Max0, Max0, Assign>
00146 : public meta_math::loop1<Max0, Max0>
00147 {
00148 typedef meta_math::loop1<Max0, Max0> base;
00149
00150 template <typename Value, typename ValueA, typename ValueB, typename Size>
00151 static inline void apply(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04,
00152 Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09,
00153 Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,
00154 const ValueA& va, ValueB *begin_b, const Size& bci)
00155 {
00156 tmp00+= va * *(begin_b + base::index0 * bci);
00157 }
00158
00159 template <typename Value, typename MatrixC, typename SizeC>
00160 static inline void update(Value& tmp00, Value& tmp01, Value& tmp02, Value& tmp03, Value& tmp04,
00161 Value& tmp05, Value& tmp06, Value& tmp07, Value& tmp08, Value& tmp09,
00162 Value& tmp10, Value& tmp11, Value& tmp12, Value& tmp13, Value& tmp14, Value& tmp15,
00163 MatrixC& c, SizeC i, SizeC k)
00164 {
00165 Assign::update(c(i, k + base::index0), tmp00);
00166 }
00167 };
00168
00169
00170 template <unsigned long Tiling1= MTL_SMAT_DMAT_MULT_TILING1,
00171 typename Assign= assign::assign_sum,
00172 typename Backup= gen_smat_dmat_mult<Assign> >
00173 struct gen_tiling_smat_dmat_mult
00174 {
00175 template <typename MatrixA, typename MatrixB, typename MatrixC>
00176 void operator()(MatrixA const& a, MatrixB const& b, MatrixC& c)
00177 {
00178 apply(a, b, c, typename traits::category<MatrixC>::type());
00179 }
00180
00181 private:
00182 template <typename MatrixA, typename MatrixB, typename MatrixC>
00183 void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::universe)
00184 {
00185 Backup()(a, b, c);
00186 }
00187
00188 template <typename MatrixA, typename MatrixB, typename MatrixC>
00189 void apply(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::has_2D_layout)
00190 {
00191 apply2(a, b, c, typename OrientedCollection<MatrixA>::orientation());
00192 }
00193
00194 template <typename MatrixA, typename MatrixB, typename MatrixC>
00195 void apply2(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::col_major)
00196 {
00197
00198 Backup()(a, b, c);
00199 }
00200
00201
00202 template <typename MatrixA, typename MatrixB, typename MatrixC>
00203 void apply2(MatrixA const& a, MatrixB const& b, MatrixC& c, tag::row_major)
00204 {
00205 using namespace tag;
00206 using traits::range_generator;
00207
00208 typedef gen_tiling_smat_dmat_mult_block<1, Tiling1, Assign> block;
00209 typedef typename Collection<MatrixA>::size_type size_type;
00210 typedef typename Collection<MatrixC>::value_type value_type;
00211 const value_type z= math::zero(c[0][0]);
00212
00213 typedef typename range_generator<row, MatrixA>::type a_cur_type;
00214 typedef typename range_generator<nz, a_cur_type>::type a_icur_type;
00215
00216 typename traits::col<MatrixA>::type col_a(a);
00217 typename traits::const_value<MatrixA>::type value_a(a);
00218
00219 if (Assign::init_to_zero) set_to_zero(c);
00220
00221 size_type i_max= num_cols(b), i_block= Tiling1 * (i_max / Tiling1);
00222 size_t bci= &b(0, 1) - &b(0, 0);
00223
00224 a_cur_type ac= begin<row>(a), aend= end<row>(a);
00225 size_type rc= 0;
00226 for (a_cur_type ac= begin<row>(a), aend= end<row>(a); ac != aend; ++ac, ++rc) {
00227
00228 for (size_type i= 0; i < i_block; i+= Tiling1) {
00229
00230 value_type tmp00= z, tmp01= z, tmp02= z, tmp03= z, tmp04= z,
00231 tmp05= z, tmp06= z, tmp07= z, tmp08= z, tmp09= z,
00232 tmp10= z, tmp11= z, tmp12= z, tmp13= z, tmp14= z, tmp15= z;
00233
00234 for (a_icur_type aic= begin<nz>(ac), aiend= end<nz>(ac); aic != aiend; ++aic) {
00235 typename Collection<MatrixA>::size_type ca= col_a(*aic);
00236 typename Collection<MatrixA>::value_type va= value_a(*aic);
00237
00238
00239 const typename MatrixB::value_type *begin_b= &b(ca, i);
00240 block::apply(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09,
00241 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15,
00242 va, begin_b, bci);
00243 }
00244 block::update(tmp00, tmp01, tmp02, tmp03, tmp04, tmp05, tmp06, tmp07, tmp08, tmp09,
00245 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15,
00246 c, rc, i);
00247 }
00248
00249 for (size_type i= i_block; i < i_max; i++) {
00250 value_type tmp00= z;
00251 for (a_icur_type aic= begin<nz>(ac), aiend= end<nz>(ac); aic != aiend; ++aic) {
00252 typename Collection<MatrixA>::size_type ca= col_a(aic);
00253 tmp00+= value_a(*aic) * b(ca, i);
00254 }
00255 Assign::update(c(rc, i), tmp00);
00256 }
00257 }
00258 }
00259 };
00260
00261
00262
00263
00264 }}
00265
00266 #endif // MTL_SMAT_DMAT_MULT_INCLUDE