MPQC  3.0.0-alpha
src/lib/chemistry/qc/ccr12/mtensor.h
00001 //
00002 // mtensor.h
00003 //
00004 // Copyright (C) 2009 Edward Valeev
00005 //
00006 // Author: Edward Valeev <evaleev@vt.edu>
00007 // Maintainer: EV
00008 //
00009 // This file is part of the SC Toolkit.
00010 //
00011 // The SC Toolkit is free software; you can redistribute it and/or modify
00012 // it under the terms of the GNU Library General Public License as published by
00013 // the Free Software Foundation; either version 2, or (at your option)
00014 // any later version.
00015 //
00016 // The SC Toolkit is distributed in the hope that it will be useful,
00017 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019 // GNU Library General Public License for more details.
00020 //
00021 // You should have received a copy of the GNU Library General Public License
00022 // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
00023 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
00024 //
00025 // The U.S. Government is granted a limited license as per AL 91-7.
00026 //
00027 
00028 #ifndef _ccr12_mtensor_h
00029 #define _ccr12_mtensor_h
00030 
00031 #include <numeric>
00032 #include <chemistry/qc/ccr12/tensor.h>
00033 #include <chemistry/qc/ccr12/ccr12_info.h>
00034 #include <math/scmat/matrix.h>
00035 
00036 namespace {
00037   void print_tile(double* data, int n01, int n23) {
00038     using namespace sc;
00039     RefSCMatrix mat = SCMatrixKit::default_matrixkit()->matrix(new SCDimension(n01),
00040                                                                new SCDimension(n23));
00041     mat.assign(data);
00042     mat.print("tile");
00043   }
00044 
00045 }
00046 
00047 namespace sc {
00048 
00050     template <typename I> inline std::pair<I,I> intersect(const std::pair<I,I>& range1, const std::pair<I,I>& range2) {
00051       I start_max = std::max(range1.first, range2.first);
00052       I fence_min = std::min(range1.second, range2.second);
00053       if (start_max < fence_min) return make_pair(start_max, fence_min);
00054       return make_pair(I(0),I(0));
00055     }
00056 
00058     template <typename I> inline bool in(const std::pair<I,I>& r, const std::pair<I,I>& range) {
00059       return r.first >= range.first && r.second <= range.second;
00060     }
00062     template <typename I> inline bool in(I i, const std::pair<I,I>& range) {
00063       return i >= range.first && i < range.second;
00064     }
00065 
00067 
00069   template <size_t NDIM>
00070   class MTensor {
00071     public:
00072       typedef CCR12_Info Info;
00073       typedef long tile_index;
00074       typedef long element_index;
00075       typedef std::pair<tile_index, tile_index> tile_range;  //< [start, fence)
00076       typedef std::vector<tile_range> tile_ranges;
00077       typedef std::pair<element_index, element_index> element_range;  //< [start, fence)
00078       typedef std::vector<element_range> element_ranges;
00079       typedef std::vector<element_index> element_index_map;
00080       static const size_t ndim = NDIM;
00081 
00085       MTensor(Info const* info,
00086               Tensor* tensor,
00087               const tile_ranges& range) :
00088                 tensor_(tensor), info_(info), range_(range)
00089       {
00090         assert(range.size() == NDIM);
00091       }
00092 
00104       void convert(const Ref<DistArray4>& src, unsigned int tbtype,
00105                    const element_index_map& eimap0,
00106                    const element_index_map& eimap1,
00107                    const element_index_map& eimap2,
00108                    const element_index_map& eimap3,
00109                    element_ranges const* mapped_element_ranges = 0,
00110                    bool src_is_2301 = false);
00111 
00121       template <typename SrcType> void convert(const SrcType& src,
00122                                                int n1,
00123                                                int n3,
00124                                                const element_index_map& eimap0,
00125                                                const element_index_map& eimap1,
00126                                                const element_index_map& eimap2,
00127                                                const element_index_map& eimap3,
00128                                                element_ranges const* mapped_element_ranges = 0) {
00129 
00130         assert(NDIM == 4);
00131         assert(mapped_element_ranges != 0);
00132 
00133         // determine which tiles map to the allowed element ranges in src
00134         const std::vector<bool> tile0_in_erange = tiles_in_erange(range_[0], eimap0, (*mapped_element_ranges)[0]);
00135         const std::vector<bool> tile1_in_erange = tiles_in_erange(range_[1], eimap1, (*mapped_element_ranges)[1]);
00136         const std::vector<bool> tile2_in_erange = tiles_in_erange(range_[2], eimap2, (*mapped_element_ranges)[2]);
00137         const std::vector<bool> tile3_in_erange = tiles_in_erange(range_[3], eimap3, (*mapped_element_ranges)[3]);
00138 
00139         // split work over tasks assuming that all tasks can access src
00140         const int nproc_with_ints = info_->mem()->n();
00141         const int me = info_->mem()->me();
00142 
00143         const size_t ntiles0 = range_[0].second - range_[0].first;
00144         const size_t ntiles1 = range_[1].second - range_[1].first;
00145         const size_t ntiles2 = range_[2].second - range_[2].first;
00146         const size_t ntiles3 = range_[3].second - range_[3].first;
00147 
00148         // if espace0 is equivalent to espace1, or espace2 equivalent to espace3,
00149         // can compute antisymmetric integrals using only (espace0 espace1| espace2 espace3)
00150         // else also need (espace0 espace1| espace3 espace2), or an equivalent
00151         const bool espace0_eq_espace1 = ((*mapped_element_ranges)[0] == (*mapped_element_ranges)[1]);
00152         const bool espace2_eq_espace3 = ((*mapped_element_ranges)[2] == (*mapped_element_ranges)[3]);
00153         const bool can_always_antisymmetrize = (espace0_eq_espace1 || espace2_eq_espace3);
00154         assert(can_always_antisymmetrize == true);  // this is likely to break the logic downstream
00155                                                     // I clearly don't understand enough at the moment
00156 
00157         // TODO check if equivalence of MTensor spaces is matched by their relationship in src
00158 
00159         // assume that src is accessible from all nodes
00160         if (1) {
00161 
00162           // how do I determine max size of the tiles?
00163           const size_t maxsize1 = info_->maxtilesize();
00164           const size_t maxtilesize = maxsize1 * maxsize1 * maxsize1 * maxsize1;
00165           double* data = info_->mem()->malloc_local_double(maxtilesize);
00166 
00167           for (long t0 = range_[0].first; t0 < range_[0].second; ++t0) {
00168             const long size0 = info_->get_range(t0);
00169             const long offset0 = info_->get_offset(t0);
00170             const long spin0 = info_->get_spin(t0);
00171             const bool in_erange0 = tile0_in_erange[t0];
00172 
00173             for (long t1 = std::max(range_[1].first, t0); t1 < range_[1].second; ++t1) {
00174               const long size1 = info_->get_range(t1);
00175               const long offset1 = info_->get_offset(t1);
00176               const long spin1 = info_->get_spin(t1);
00177               const bool in_erange1 = tile1_in_erange[t1];
00178 
00179               const bool aaaa = (spin0 == spin1);
00180 
00181               for (long t2 = range_[2].first; t2 < range_[2].second; ++t2) {
00182                 const long size2 = info_->get_range(t2);
00183                 const long offset2 = info_->get_offset(t2);
00184                 const long spin2 = info_->get_spin(t2);
00185                 const bool in_erange2 = tile2_in_erange[t2];
00186 
00187                 const bool abab = ((spin0 != spin1) && (spin0 == spin2));
00188                 const bool abba = ((spin0 != spin1) && (spin0 != spin2));
00189 
00190                 for (long t3 = std::max(range_[3].first, t2); t3 < range_[3].second; ++t3) {
00191                   const long size3 = info_->get_range(t3);
00192                   const long offset3 = info_->get_offset(t3);
00193                   const long spin3 = info_->get_spin(t3);
00194                   const bool in_erange3 = tile3_in_erange[t3];
00195 
00196                   // cartesian ordinal is used as a key for hashing tiles
00197                   const long tile_key = (t3-range_[3].first) +
00198                                         ntiles3 * ( (t2-range_[2].first) +
00199                                                     ntiles2 * ( (t1-range_[1].first) +
00200                                                                 ntiles1 * (t0-range_[0].first)
00201                                                               )
00202                                                   );
00203 
00204                   // since all procs can access integrals, use locality
00205                   if (tensor_->exists(tile_key) && tensor_->is_this_local(tile_key)) {
00206 
00207                     if ((!in_erange0 || !in_erange1 || !in_erange2 || !in_erange3))
00208                         continue;
00209 
00210                     // if erange[0] == erange[1], clearly can antisymmetrize 0 and 1
00211                     // if erange[2] == erange[3], clearly can antisymmetrize 2 and 3
00212                     bool antisymmetrize01 = espace0_eq_espace1;
00213                     bool antisymmetrize23 = espace2_eq_espace3;
00214                     // prefer to antisymmetrize 23 since can do that with 1 block of DistArray4
00215                     if (antisymmetrize23) antisymmetrize01 = false;
00216 
00217                     long size = size0 * size1 * size2 * size3;
00218                     std::fill(data, data+size, 0.0);
00219 
00220                       long i0123 = 0;
00221                       for (int i0 = 0; i0 < size0; ++i0) {
00222                         const int ii0 = eimap0[offset0 + i0];
00223                         for (int i1 = 0; i1 < size1; ++i1) {
00224                           const int ii1 = eimap1[offset1 + i1];
00225 
00226                           //if (ii0 == ii1 && aaaa) continue;
00227 
00228                           // split the work in round robin fashion by processors who have access to integrals
00229                           long i01;
00230                           if (antisymmetrize01) {
00231                             const long iii0 = std::max(ii0, ii1);
00232                             const long iii1 = std::min(ii0, ii1);
00233                             i01 = iii0 * (iii0+1)/2 + iii1;
00234                           } else {
00235                             i01 = ii0 * n1 + ii1;
00236                           }
00237                           const long i01_proc = i01 % nproc_with_ints;
00238                           if (i01_proc != me)
00239                             continue;
00240 
00241                           if (antisymmetrize23) {
00242 
00243                             for (int i2 = 0; i2 < size2; ++i2) {
00244                               const int ii2 = eimap2[offset2 + i2];
00245                               for (int i3 = 0; i3 < size3; ++i3, ++i0123) {
00246                                 const int ii3 = eimap3[offset3 + i3];
00247 
00248                                 const int i23 = ii2 * n3 + ii3;
00249                                 const int i32 = ii3 * n3 + ii2;
00250 
00251                                 if (abab) {
00252                                   const double integral_0123 = src(i01,i23);
00253                                   data[i0123] = integral_0123;
00254                                 } else if (abba) {
00255                                   const double integral_0132 = src(i01,i32);
00256                                   data[i0123] = -integral_0132;
00257                                 } else { // aaaa
00258                                   const double integral_0123 = src(i01,i23);
00259                                   const double integral_0132 = src(i01,i32);
00260                                   data[i0123] = integral_0123 - integral_0132;
00261                                 }
00262 
00263                               }
00264                             }
00265 
00266                           } // antisymmetrize23
00267 
00268                           else if (antisymmetrize01) { // means can't antisymmetrize 2 and 3
00269 
00270                             const int i10 = i1 * n1 + i0;
00271 
00272                             for (int i2 = 0; i2 < size2; ++i2) {
00273                               const int ii2 = eimap2[offset2 + i2];
00274                               for (int i3 = 0; i3 < size3; ++i3, ++i0123) {
00275                                 const int ii3 = eimap3[offset3 + i3];
00276 
00277                                 const int i23 = ii2 * n3 + ii3;
00278 
00279                                 if (abab) {
00280                                   const double integral_0123 = src(i01,i23);
00281                                   data[i0123] = integral_0123;
00282                                 } else if (abba) {
00283                                   const double integral_1023 = src(i10,i23);
00284                                   data[i0123] = -integral_1023;
00285                                 } else { // aaaa
00286                                   const double integral_0123 = src(i01,i23);
00287                                   const double integral_1023 = src(i10,i23);
00288                                   data[i0123] = integral_0123 - integral_1023;
00289                                 }
00290 
00291                               }
00292                             }
00293 
00294                           } // antisymmetrize01
00295 
00296                           else { // do not antisymmetrize
00297                             assert(false); // should not happen, but only SMITH knows :-)
00298                           }
00299 
00300                         }
00301                       }
00302 
00303                       tensor_->put_block(tile_key, data);
00304 
00305 #if 0
00306                 const double sum = std::accumulate(data, data+size, 0.0);
00307                 ExEnv::out0() << "tiles = (" << t0 << "," << t1 << "," << t2 << "," << t3
00308                               << ")  key = " << tile_key  << " sum = " << sum << std::endl;
00309 
00310                 for(int i=0; i<size; ++i) {
00311                   ExEnv::out0() << "data[" << i << "] = " << data[i] << std::endl;
00312                 }
00313 #endif
00314                   } // if this task will process this tile
00315 
00316                 } // t3
00317               } // t2
00318             } // t1
00319           } // t0
00320 
00321           info_->mem()->free_local_double(data);
00322 
00323         } // if this task can access the integrals
00324 
00325       } // MTensor<4>::convert() from RefSCMatrix or RefSymmSCMatrix
00326 
00336       template <typename SrcType> void convert(const SrcType& src0, const SrcType& src1,
00337                                                int n0,
00338                                                int n1,
00339                                                int n2,
00340                                                int n3,
00341                                                const element_index_map& eimap0,
00342                                                const element_index_map& eimap1,
00343                                                const element_index_map& eimap2,
00344                                                const element_index_map& eimap3,
00345                                                element_ranges const* mapped_element_ranges = 0,
00346                                                bool src1_is_1023 = false) {
00347 
00348         assert(NDIM == 4);
00349         assert(mapped_element_ranges != 0);
00350 
00351         // determine which tiles map to the allowed element ranges in src
00352         const std::vector<bool> tile0_in_erange = tiles_in_erange(range_[0], eimap0, (*mapped_element_ranges)[0]);
00353         const std::vector<bool> tile1_in_erange = tiles_in_erange(range_[1], eimap1, (*mapped_element_ranges)[1]);
00354         const std::vector<bool> tile2_in_erange = tiles_in_erange(range_[2], eimap2, (*mapped_element_ranges)[2]);
00355         const std::vector<bool> tile3_in_erange = tiles_in_erange(range_[3], eimap3, (*mapped_element_ranges)[3]);
00356 
00357         // split work over tasks assuming that all tasks can access src
00358         const int nproc_with_ints = info_->mem()->n();
00359         const int me = info_->mem()->me();
00360 
00361         const size_t ntiles0 = range_[0].second - range_[0].first;
00362         const size_t ntiles1 = range_[1].second - range_[1].first;
00363         const size_t ntiles2 = range_[2].second - range_[2].first;
00364         const size_t ntiles3 = range_[3].second - range_[3].first;
00365 
00366         // if espace0 is equivalent to espace1, or espace2 equivalent to espace3,
00367         // can compute antisymmetric integrals using only (espace0 espace1| espace2 espace3)
00368         // else also need (espace0 espace1| espace3 espace2), or an equivalent
00369         const bool espace0_eq_espace1 = ((*mapped_element_ranges)[0] == (*mapped_element_ranges)[1]);
00370         const bool espace2_eq_espace3 = ((*mapped_element_ranges)[2] == (*mapped_element_ranges)[3]);
00371         const bool can_always_antisymmetrize = (espace0_eq_espace1 || espace2_eq_espace3);
00372         assert(can_always_antisymmetrize == true);  // this is likely to break the logic downstream
00373                                                     // I clearly don't understand enough at the moment
00374 
00375         // TODO check if equivalence of MTensor spaces is matched by their relationship in src
00376 
00377         // assume that src is accessible from all nodes
00378         if (1) {
00379 
00380           // how do I determine max size of the tiles?
00381           const size_t maxsize1 = info_->maxtilesize();
00382           const size_t maxtilesize = maxsize1 * maxsize1 * maxsize1 * maxsize1;
00383           double* data = info_->mem()->malloc_local_double(maxtilesize);
00384 
00385           for (long t0 = range_[0].first; t0 < range_[0].second; ++t0) {
00386             const long size0 = info_->get_range(t0);
00387             const long offset0 = info_->get_offset(t0);
00388             const long spin0 = info_->get_spin(t0);
00389             const bool in_erange0 = tile0_in_erange[t0];
00390 
00391             for (long t1 = std::max(range_[1].first, t0); t1 < range_[1].second; ++t1) {
00392               const long size1 = info_->get_range(t1);
00393               const long offset1 = info_->get_offset(t1);
00394               const long spin1 = info_->get_spin(t1);
00395               const bool in_erange1 = tile1_in_erange[t1];
00396 
00397               const bool aaaa = (spin0 == spin1);
00398 
00399               for (long t2 = range_[2].first; t2 < range_[2].second; ++t2) {
00400                 const long size2 = info_->get_range(t2);
00401                 const long offset2 = info_->get_offset(t2);
00402                 const long spin2 = info_->get_spin(t2);
00403                 const bool in_erange2 = tile2_in_erange[t2];
00404 
00405                 const bool abab = ((spin0 != spin1) && (spin0 == spin2));
00406                 const bool abba = ((spin0 != spin1) && (spin0 != spin2));
00407 
00408                 for (long t3 = std::max(range_[3].first, t2); t3 < range_[3].second; ++t3) {
00409                   const long size3 = info_->get_range(t3);
00410                   const long offset3 = info_->get_offset(t3);
00411                   const long spin3 = info_->get_spin(t3);
00412                   const bool in_erange3 = tile3_in_erange[t3];
00413 
00414                   // cartesian ordinal is used as a key for hashing tiles
00415                   const long tile_key = (t3-range_[3].first) +
00416                                         ntiles3 * ( (t2-range_[2].first) +
00417                                                     ntiles2 * ( (t1-range_[1].first) +
00418                                                                 ntiles1 * (t0-range_[0].first)
00419                                                               )
00420                                                   );
00421 
00422                   // since all procs can access integrals, use locality
00423                   if (tensor_->exists(tile_key) && tensor_->is_this_local(tile_key)) {
00424 
00425                     if ((!in_erange0 || !in_erange1 || !in_erange2 || !in_erange3))
00426                         continue;
00427 
00428                     // if erange[0] == erange[1], clearly can antisymmetrize 0 and 1
00429                     // if erange[2] == erange[3], clearly can antisymmetrize 2 and 3
00430                     bool antisymmetrize01 = espace0_eq_espace1;
00431                     bool antisymmetrize23 = espace2_eq_espace3;
00432                     // prefer to antisymmetrize 23 since can do that with 1 block of DistArray4
00433                     if (antisymmetrize23) antisymmetrize01 = false;
00434 
00435                     long size = size0 * size1 * size2 * size3;
00436                     std::fill(data, data+size, 0.0);
00437 
00438                       long i0123 = 0;
00439                       for (int i0 = 0; i0 < size0; ++i0) {
00440                         const int ii0 = eimap0[offset0 + i0];
00441                         for (int i1 = 0; i1 < size1; ++i1) {
00442                           const int ii1 = eimap1[offset1 + i1];
00443 
00444                           //if (ii0 == ii1 && aaaa) continue;
00445 
00446                           // split the work in round robin fashion by processors who have access to integrals
00447                           long i01;
00448                           if (antisymmetrize01) {
00449                             const long iii0 = std::max(ii0, ii1);
00450                             const long iii1 = std::min(ii0, ii1);
00451                             i01 = iii0 * (iii0+1)/2 + iii1;
00452                           } else {
00453                             i01 = ii0 * n1 + ii1;
00454                           }
00455                           const long i01_proc = i01 % nproc_with_ints;
00456                           if (i01_proc != me)
00457                             continue;
00458 
00459                           if (antisymmetrize23) {
00460 
00461                             for (int i2 = 0; i2 < size2; ++i2) {
00462                               const int ii2 = eimap2[offset2 + i2];
00463                               for (int i3 = 0; i3 < size3; ++i3, ++i0123) {
00464                                 const int ii3 = eimap3[offset3 + i3];
00465 
00466                                 const int i23 = ii2 * n3 + ii3;
00467                                 const int i32 = ii3 * n2 + ii2;
00468 
00469                                 if (abab) {
00470                                   const double integral_0123 = src0(i01,i23);
00471                                   data[i0123] = integral_0123;
00472                                 } else if (abba) {
00473                                   const double integral_0132 = src1(i01,i32);
00474                                   data[i0123] = -integral_0132;
00475                                 } else { // aaaa
00476                                   const double integral_0123 = src0(i01,i23);
00477                                   const double integral_0132 = src1(i01,i32);
00478                                   data[i0123] = integral_0123 - integral_0132;
00479                                 }
00480 
00481                               }
00482                             }
00483 
00484                           } // antisymmetrize23
00485 
00486                           else if (antisymmetrize01) { // means can't antisymmetrize 2 and 3
00487 
00488                             const int i10 = i1 * n0 + i0;
00489 
00490                             for (int i2 = 0; i2 < size2; ++i2) {
00491                               const int ii2 = eimap2[offset2 + i2];
00492                               for (int i3 = 0; i3 < size3; ++i3, ++i0123) {
00493                                 const int ii3 = eimap3[offset3 + i3];
00494 
00495                                 const int i23 = ii2 * n3 + ii3;
00496 
00497                                 if (abab) {
00498                                   const double integral_0123 = src0(i01,i23);
00499                                   data[i0123] = integral_0123;
00500                                 } else if (abba) {
00501                                   const double integral_1023 = src1(i10,i23);
00502                                   data[i0123] = -integral_1023;
00503                                 } else { // aaaa
00504                                   const double integral_0123 = src0(i01,i23);
00505                                   const double integral_1023 = src1(i10,i23);
00506                                   data[i0123] = integral_0123 - integral_1023;
00507                                 }
00508 
00509                               }
00510                             }
00511 
00512                           } // antisymmetrize01
00513 
00514                           else { // do not antisymmetrize
00515                             assert(false); // should not happen, but only SMITH knows :-)
00516                           }
00517 
00518                         }
00519                       }
00520 
00521                       tensor_->put_block(tile_key, data);
00522 
00523 #if 0
00524                 const double sum = std::accumulate(data, data+size, 0.0);
00525                 ExEnv::out0() << "tiles = (" << t0 << "," << t1 << "," << t2 << "," << t3
00526                               << ")  key = " << tile_key  << " sum = " << sum << std::endl;
00527 
00528                 for(int i=0; i<size; ++i) {
00529                   ExEnv::out0() << "data[" << i << "] = " << data[i] << std::endl;
00530                 }
00531 #endif
00532                   } // if this task will process this tile
00533 
00534                 } // t3
00535               } // t2
00536             } // t1
00537           } // t0
00538 
00539           info_->mem()->free_local_double(data);
00540 
00541         } // if this task can access the integrals
00542 
00543       } // MTensor<4>::convert() from RefSCMatrix or RefSymmSCMatrix
00544 
00551       template <typename SrcType> void convert(const SrcType& src,
00552                                                const element_index_map& eimap0,
00553                                                const element_index_map& eimap1,
00554                                                element_ranges const* mapped_element_ranges = 0) {
00555 
00556         assert(NDIM == 2);
00557 
00558         // determine which tiles map to the allowed element ranges in src
00559         const std::vector<bool> tile0_in_erange = tiles_in_erange(range_[0], eimap0, (*mapped_element_ranges)[0]);
00560         const std::vector<bool> tile1_in_erange = tiles_in_erange(range_[1], eimap1, (*mapped_element_ranges)[1]);
00561 
00562         // split work over tasks assuming that all tasks can access src
00563         const int nproc_with_ints = info_->mem()->n();
00564         const int me = info_->mem()->me();
00565 
00566         const size_t ntiles0 = range_[0].second - range_[0].first;
00567         const size_t ntiles1 = range_[1].second - range_[1].first;
00568 
00569         // check that the needed index ranges are present in src
00570       #if 0  // assume that the user knows what she's doing
00571         element_range src_range[2];
00572         src_range[0] = this->map_range(range_[0], eimap0);
00573         src_range[1] = this->map_range(range_[1], eimap1);
00574         assert(src_range[0].second <= src->ni());
00575         assert(src_range[1].second <= src->nj());
00576       #endif
00577 
00578         // TODO check if equivalence of MTensor spaces is matched by their relationship in src
00579 
00580         // assume that src is accessible from all nodes
00581         if (1) {
00582 
00583           // how do I determine max size of the tiles?
00584           const size_t maxsize1 = info_->maxtilesize();
00585           const size_t maxtilesize = maxsize1 * maxsize1;
00586           double* data = info_->mem()->malloc_local_double(maxtilesize);
00587 
00588           for (long t0 = range_[0].first; t0 < range_[0].second; ++t0) {
00589             const long size0 = info_->get_range(t0);
00590             const long offset0 = info_->get_offset(t0);
00591             const long spin0 = info_->get_spin(t0);
00592             const bool in_erange0 = tile0_in_erange[t0];
00593 
00594             for (long t1 = range_[1].first; t1 < range_[1].second; ++t1) {
00595               const long size1 = info_->get_range(t1);
00596               const long offset1 = info_->get_offset(t1);
00597               const long spin1 = info_->get_spin(t1);
00598               const bool in_erange1 = tile1_in_erange[t1];
00599 
00600               // cartesian ordinal is used as a key for hashing tiles
00601               const long tile_key = (t1-range_[1].first) + ntiles1 * (t0-range_[0].first);
00602 
00603               // since all procs can access integrals, use locality
00604               if (tensor_->exists(tile_key) && tensor_->is_this_local(tile_key)) {
00605 
00606                 if ((!in_erange0 || !in_erange1))
00607                   continue;
00608 
00609                 long size = size0 * size1;
00610                 std::fill(data, data+size, 0.0);
00611 
00612                 long i01 = 0;
00613                 for (int i0 = 0; i0 < size0; ++i0) {
00614                   const int ii0 = eimap0[offset0 + i0];
00615                   for (int i1 = 0; i1 < size1; ++i1, ++i01) {
00616                     const int ii1 = eimap1[offset1 + i1];
00617 
00618                     data[i01] = src(ii0, ii1);
00619                   }
00620                 }
00621 
00622                 tensor_->add_block(tile_key, data);
00623 
00624 #if 0
00625                 const double sum = std::accumulate(data, data+size, 0.0);
00626                 ExEnv::out0() << "tiles = (" << t0 << "," << t1
00627                 << ")  key = " << tile_key  << " sum = " << sum << endl;
00628 #endif
00629               } // if this task will process this tile
00630 
00631             } // t1
00632           } // t0
00633 
00634           info_->mem()->free_local_double(data);
00635 
00636         } // if this task can access the integrals
00637 
00638       } // MTensor<2>::convert() from RefSCMatrix or RefSymmSCMatrix
00639 
00640       void print(const std::string& label, std::ostream& os = ExEnv::out0()) const {
00641         tensor_->print(label, os);
00642       }
00643 
00644     private:
00645       Info const* info_;
00646       Tensor* tensor_;
00647       tile_ranges range_;  // range of tiles that defines this tensor. See info_ for tiling info
00648 
00649       // given a range of indices, return pair<mmin,mmax> where mmin and mmax are the minimum and maximum mapped indices
00650       element_range map_range(const element_range& rng,
00651                               const element_index_map& eimap) {
00652         element_range result(eimap[rng.first],
00653                              eimap[rng.first]);
00654         for(element_index i=rng.first; i != rng.second; ++i) {
00655           const element_index ii = eimap[i];
00656           result.first = std::min(result.first, ii);
00657           result.second = std::max(result.second, ii);
00658         }
00659         return result;
00660       }
00661 
00662       // given tile range [start, fence), element map, and element_range, result[t] is true if all elements within
00663       // the tile map to inside element_range
00664       std::vector<bool>
00665       tiles_in_erange(const tile_range& range,
00666                       const element_index_map& eimap,
00667                       const element_range& erange) {
00668 
00669         std::vector<bool> result(range.second, false);
00670         for(long t=range.first; t!=range.second; ++t) {
00671           const long start = info_->get_offset(t);
00672           const long size = info_->get_range(t);
00673           const long fence = start + size;
00674           const element_range tile(start, fence);
00675           // map elements
00676           const element_range mapped_tile = map_range(tile, eimap);
00677           result[t] = in(mapped_tile,erange);
00678         }
00679         return result;
00680       }
00681 
00682   };
00683 
00684 } // end of namespace sc
00685 
00686 #include <chemistry/qc/ccr12/mtensor.timpl.h>
00687 
00688 #endif // end of header guard
00689 
00690 // Local Variables:
00691 // mode: c++
00692 // c-file-style: "CLJ-CONDENSED"
00693 // End:

Generated at Sat Jul 7 2012 11:52:44 for MPQC 3.0.0-alpha using the documentation package Doxygen 1.8.0.