|
MPQC
3.0.0-alpha
|
00001 // 00002 // mtensor.h 00003 // 00004 // Copyright (C) 2009 Edward Valeev 00005 // 00006 // Author: Edward Valeev <evaleev@vt.edu> 00007 // Maintainer: EV 00008 // 00009 // This file is part of the SC Toolkit. 00010 // 00011 // The SC Toolkit is free software; you can redistribute it and/or modify 00012 // it under the terms of the GNU Library General Public License as published by 00013 // the Free Software Foundation; either version 2, or (at your option) 00014 // any later version. 00015 // 00016 // The SC Toolkit is distributed in the hope that it will be useful, 00017 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00019 // GNU Library General Public License for more details. 00020 // 00021 // You should have received a copy of the GNU Library General Public License 00022 // along with the SC Toolkit; see the file COPYING.LIB. If not, write to 00023 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 00024 // 00025 // The U.S. Government is granted a limited license as per AL 91-7. 00026 // 00027 00028 #ifndef _ccr12_mtensor_h 00029 #define _ccr12_mtensor_h 00030 00031 #include <numeric> 00032 #include <chemistry/qc/ccr12/tensor.h> 00033 #include <chemistry/qc/ccr12/ccr12_info.h> 00034 #include <math/scmat/matrix.h> 00035 00036 namespace { 00037 void print_tile(double* data, int n01, int n23) { 00038 using namespace sc; 00039 RefSCMatrix mat = SCMatrixKit::default_matrixkit()->matrix(new SCDimension(n01), 00040 new SCDimension(n23)); 00041 mat.assign(data); 00042 mat.print("tile"); 00043 } 00044 00045 } 00046 00047 namespace sc { 00048 00050 template <typename I> inline std::pair<I,I> intersect(const std::pair<I,I>& range1, const std::pair<I,I>& range2) { 00051 I start_max = std::max(range1.first, range2.first); 00052 I fence_min = std::min(range1.second, range2.second); 00053 if (start_max < fence_min) return make_pair(start_max, fence_min); 00054 return make_pair(I(0),I(0)); 00055 } 00056 00058 template <typename I> inline bool in(const std::pair<I,I>& r, const std::pair<I,I>& range) { 00059 return r.first >= range.first && r.second <= range.second; 00060 } 00062 template <typename I> inline bool in(I i, const std::pair<I,I>& range) { 00063 return i >= range.first && i < range.second; 00064 } 00065 00067 00069 template <size_t NDIM> 00070 class MTensor { 00071 public: 00072 typedef CCR12_Info Info; 00073 typedef long tile_index; 00074 typedef long element_index; 00075 typedef std::pair<tile_index, tile_index> tile_range; //< [start, fence) 00076 typedef std::vector<tile_range> tile_ranges; 00077 typedef std::pair<element_index, element_index> element_range; //< [start, fence) 00078 typedef std::vector<element_range> element_ranges; 00079 typedef std::vector<element_index> element_index_map; 00080 static const size_t ndim = NDIM; 00081 00085 MTensor(Info const* info, 00086 Tensor* tensor, 00087 const tile_ranges& range) : 00088 tensor_(tensor), info_(info), range_(range) 00089 { 00090 assert(range.size() == NDIM); 00091 } 00092 00104 void convert(const Ref<DistArray4>& src, unsigned int tbtype, 00105 const element_index_map& eimap0, 00106 const element_index_map& eimap1, 00107 const element_index_map& eimap2, 00108 const element_index_map& eimap3, 00109 element_ranges const* mapped_element_ranges = 0, 00110 bool src_is_2301 = false); 00111 00121 template <typename SrcType> void convert(const SrcType& src, 00122 int n1, 00123 int n3, 00124 const element_index_map& eimap0, 00125 const element_index_map& eimap1, 00126 const element_index_map& eimap2, 00127 const element_index_map& eimap3, 00128 element_ranges const* mapped_element_ranges = 0) { 00129 00130 assert(NDIM == 4); 00131 assert(mapped_element_ranges != 0); 00132 00133 // determine which tiles map to the allowed element ranges in src 00134 const std::vector<bool> tile0_in_erange = tiles_in_erange(range_[0], eimap0, (*mapped_element_ranges)[0]); 00135 const std::vector<bool> tile1_in_erange = tiles_in_erange(range_[1], eimap1, (*mapped_element_ranges)[1]); 00136 const std::vector<bool> tile2_in_erange = tiles_in_erange(range_[2], eimap2, (*mapped_element_ranges)[2]); 00137 const std::vector<bool> tile3_in_erange = tiles_in_erange(range_[3], eimap3, (*mapped_element_ranges)[3]); 00138 00139 // split work over tasks assuming that all tasks can access src 00140 const int nproc_with_ints = info_->mem()->n(); 00141 const int me = info_->mem()->me(); 00142 00143 const size_t ntiles0 = range_[0].second - range_[0].first; 00144 const size_t ntiles1 = range_[1].second - range_[1].first; 00145 const size_t ntiles2 = range_[2].second - range_[2].first; 00146 const size_t ntiles3 = range_[3].second - range_[3].first; 00147 00148 // if espace0 is equivalent to espace1, or espace2 equivalent to espace3, 00149 // can compute antisymmetric integrals using only (espace0 espace1| espace2 espace3) 00150 // else also need (espace0 espace1| espace3 espace2), or an equivalent 00151 const bool espace0_eq_espace1 = ((*mapped_element_ranges)[0] == (*mapped_element_ranges)[1]); 00152 const bool espace2_eq_espace3 = ((*mapped_element_ranges)[2] == (*mapped_element_ranges)[3]); 00153 const bool can_always_antisymmetrize = (espace0_eq_espace1 || espace2_eq_espace3); 00154 assert(can_always_antisymmetrize == true); // this is likely to break the logic downstream 00155 // I clearly don't understand enough at the moment 00156 00157 // TODO check if equivalence of MTensor spaces is matched by their relationship in src 00158 00159 // assume that src is accessible from all nodes 00160 if (1) { 00161 00162 // how do I determine max size of the tiles? 00163 const size_t maxsize1 = info_->maxtilesize(); 00164 const size_t maxtilesize = maxsize1 * maxsize1 * maxsize1 * maxsize1; 00165 double* data = info_->mem()->malloc_local_double(maxtilesize); 00166 00167 for (long t0 = range_[0].first; t0 < range_[0].second; ++t0) { 00168 const long size0 = info_->get_range(t0); 00169 const long offset0 = info_->get_offset(t0); 00170 const long spin0 = info_->get_spin(t0); 00171 const bool in_erange0 = tile0_in_erange[t0]; 00172 00173 for (long t1 = std::max(range_[1].first, t0); t1 < range_[1].second; ++t1) { 00174 const long size1 = info_->get_range(t1); 00175 const long offset1 = info_->get_offset(t1); 00176 const long spin1 = info_->get_spin(t1); 00177 const bool in_erange1 = tile1_in_erange[t1]; 00178 00179 const bool aaaa = (spin0 == spin1); 00180 00181 for (long t2 = range_[2].first; t2 < range_[2].second; ++t2) { 00182 const long size2 = info_->get_range(t2); 00183 const long offset2 = info_->get_offset(t2); 00184 const long spin2 = info_->get_spin(t2); 00185 const bool in_erange2 = tile2_in_erange[t2]; 00186 00187 const bool abab = ((spin0 != spin1) && (spin0 == spin2)); 00188 const bool abba = ((spin0 != spin1) && (spin0 != spin2)); 00189 00190 for (long t3 = std::max(range_[3].first, t2); t3 < range_[3].second; ++t3) { 00191 const long size3 = info_->get_range(t3); 00192 const long offset3 = info_->get_offset(t3); 00193 const long spin3 = info_->get_spin(t3); 00194 const bool in_erange3 = tile3_in_erange[t3]; 00195 00196 // cartesian ordinal is used as a key for hashing tiles 00197 const long tile_key = (t3-range_[3].first) + 00198 ntiles3 * ( (t2-range_[2].first) + 00199 ntiles2 * ( (t1-range_[1].first) + 00200 ntiles1 * (t0-range_[0].first) 00201 ) 00202 ); 00203 00204 // since all procs can access integrals, use locality 00205 if (tensor_->exists(tile_key) && tensor_->is_this_local(tile_key)) { 00206 00207 if ((!in_erange0 || !in_erange1 || !in_erange2 || !in_erange3)) 00208 continue; 00209 00210 // if erange[0] == erange[1], clearly can antisymmetrize 0 and 1 00211 // if erange[2] == erange[3], clearly can antisymmetrize 2 and 3 00212 bool antisymmetrize01 = espace0_eq_espace1; 00213 bool antisymmetrize23 = espace2_eq_espace3; 00214 // prefer to antisymmetrize 23 since can do that with 1 block of DistArray4 00215 if (antisymmetrize23) antisymmetrize01 = false; 00216 00217 long size = size0 * size1 * size2 * size3; 00218 std::fill(data, data+size, 0.0); 00219 00220 long i0123 = 0; 00221 for (int i0 = 0; i0 < size0; ++i0) { 00222 const int ii0 = eimap0[offset0 + i0]; 00223 for (int i1 = 0; i1 < size1; ++i1) { 00224 const int ii1 = eimap1[offset1 + i1]; 00225 00226 //if (ii0 == ii1 && aaaa) continue; 00227 00228 // split the work in round robin fashion by processors who have access to integrals 00229 long i01; 00230 if (antisymmetrize01) { 00231 const long iii0 = std::max(ii0, ii1); 00232 const long iii1 = std::min(ii0, ii1); 00233 i01 = iii0 * (iii0+1)/2 + iii1; 00234 } else { 00235 i01 = ii0 * n1 + ii1; 00236 } 00237 const long i01_proc = i01 % nproc_with_ints; 00238 if (i01_proc != me) 00239 continue; 00240 00241 if (antisymmetrize23) { 00242 00243 for (int i2 = 0; i2 < size2; ++i2) { 00244 const int ii2 = eimap2[offset2 + i2]; 00245 for (int i3 = 0; i3 < size3; ++i3, ++i0123) { 00246 const int ii3 = eimap3[offset3 + i3]; 00247 00248 const int i23 = ii2 * n3 + ii3; 00249 const int i32 = ii3 * n3 + ii2; 00250 00251 if (abab) { 00252 const double integral_0123 = src(i01,i23); 00253 data[i0123] = integral_0123; 00254 } else if (abba) { 00255 const double integral_0132 = src(i01,i32); 00256 data[i0123] = -integral_0132; 00257 } else { // aaaa 00258 const double integral_0123 = src(i01,i23); 00259 const double integral_0132 = src(i01,i32); 00260 data[i0123] = integral_0123 - integral_0132; 00261 } 00262 00263 } 00264 } 00265 00266 } // antisymmetrize23 00267 00268 else if (antisymmetrize01) { // means can't antisymmetrize 2 and 3 00269 00270 const int i10 = i1 * n1 + i0; 00271 00272 for (int i2 = 0; i2 < size2; ++i2) { 00273 const int ii2 = eimap2[offset2 + i2]; 00274 for (int i3 = 0; i3 < size3; ++i3, ++i0123) { 00275 const int ii3 = eimap3[offset3 + i3]; 00276 00277 const int i23 = ii2 * n3 + ii3; 00278 00279 if (abab) { 00280 const double integral_0123 = src(i01,i23); 00281 data[i0123] = integral_0123; 00282 } else if (abba) { 00283 const double integral_1023 = src(i10,i23); 00284 data[i0123] = -integral_1023; 00285 } else { // aaaa 00286 const double integral_0123 = src(i01,i23); 00287 const double integral_1023 = src(i10,i23); 00288 data[i0123] = integral_0123 - integral_1023; 00289 } 00290 00291 } 00292 } 00293 00294 } // antisymmetrize01 00295 00296 else { // do not antisymmetrize 00297 assert(false); // should not happen, but only SMITH knows :-) 00298 } 00299 00300 } 00301 } 00302 00303 tensor_->put_block(tile_key, data); 00304 00305 #if 0 00306 const double sum = std::accumulate(data, data+size, 0.0); 00307 ExEnv::out0() << "tiles = (" << t0 << "," << t1 << "," << t2 << "," << t3 00308 << ") key = " << tile_key << " sum = " << sum << std::endl; 00309 00310 for(int i=0; i<size; ++i) { 00311 ExEnv::out0() << "data[" << i << "] = " << data[i] << std::endl; 00312 } 00313 #endif 00314 } // if this task will process this tile 00315 00316 } // t3 00317 } // t2 00318 } // t1 00319 } // t0 00320 00321 info_->mem()->free_local_double(data); 00322 00323 } // if this task can access the integrals 00324 00325 } // MTensor<4>::convert() from RefSCMatrix or RefSymmSCMatrix 00326 00336 template <typename SrcType> void convert(const SrcType& src0, const SrcType& src1, 00337 int n0, 00338 int n1, 00339 int n2, 00340 int n3, 00341 const element_index_map& eimap0, 00342 const element_index_map& eimap1, 00343 const element_index_map& eimap2, 00344 const element_index_map& eimap3, 00345 element_ranges const* mapped_element_ranges = 0, 00346 bool src1_is_1023 = false) { 00347 00348 assert(NDIM == 4); 00349 assert(mapped_element_ranges != 0); 00350 00351 // determine which tiles map to the allowed element ranges in src 00352 const std::vector<bool> tile0_in_erange = tiles_in_erange(range_[0], eimap0, (*mapped_element_ranges)[0]); 00353 const std::vector<bool> tile1_in_erange = tiles_in_erange(range_[1], eimap1, (*mapped_element_ranges)[1]); 00354 const std::vector<bool> tile2_in_erange = tiles_in_erange(range_[2], eimap2, (*mapped_element_ranges)[2]); 00355 const std::vector<bool> tile3_in_erange = tiles_in_erange(range_[3], eimap3, (*mapped_element_ranges)[3]); 00356 00357 // split work over tasks assuming that all tasks can access src 00358 const int nproc_with_ints = info_->mem()->n(); 00359 const int me = info_->mem()->me(); 00360 00361 const size_t ntiles0 = range_[0].second - range_[0].first; 00362 const size_t ntiles1 = range_[1].second - range_[1].first; 00363 const size_t ntiles2 = range_[2].second - range_[2].first; 00364 const size_t ntiles3 = range_[3].second - range_[3].first; 00365 00366 // if espace0 is equivalent to espace1, or espace2 equivalent to espace3, 00367 // can compute antisymmetric integrals using only (espace0 espace1| espace2 espace3) 00368 // else also need (espace0 espace1| espace3 espace2), or an equivalent 00369 const bool espace0_eq_espace1 = ((*mapped_element_ranges)[0] == (*mapped_element_ranges)[1]); 00370 const bool espace2_eq_espace3 = ((*mapped_element_ranges)[2] == (*mapped_element_ranges)[3]); 00371 const bool can_always_antisymmetrize = (espace0_eq_espace1 || espace2_eq_espace3); 00372 assert(can_always_antisymmetrize == true); // this is likely to break the logic downstream 00373 // I clearly don't understand enough at the moment 00374 00375 // TODO check if equivalence of MTensor spaces is matched by their relationship in src 00376 00377 // assume that src is accessible from all nodes 00378 if (1) { 00379 00380 // how do I determine max size of the tiles? 00381 const size_t maxsize1 = info_->maxtilesize(); 00382 const size_t maxtilesize = maxsize1 * maxsize1 * maxsize1 * maxsize1; 00383 double* data = info_->mem()->malloc_local_double(maxtilesize); 00384 00385 for (long t0 = range_[0].first; t0 < range_[0].second; ++t0) { 00386 const long size0 = info_->get_range(t0); 00387 const long offset0 = info_->get_offset(t0); 00388 const long spin0 = info_->get_spin(t0); 00389 const bool in_erange0 = tile0_in_erange[t0]; 00390 00391 for (long t1 = std::max(range_[1].first, t0); t1 < range_[1].second; ++t1) { 00392 const long size1 = info_->get_range(t1); 00393 const long offset1 = info_->get_offset(t1); 00394 const long spin1 = info_->get_spin(t1); 00395 const bool in_erange1 = tile1_in_erange[t1]; 00396 00397 const bool aaaa = (spin0 == spin1); 00398 00399 for (long t2 = range_[2].first; t2 < range_[2].second; ++t2) { 00400 const long size2 = info_->get_range(t2); 00401 const long offset2 = info_->get_offset(t2); 00402 const long spin2 = info_->get_spin(t2); 00403 const bool in_erange2 = tile2_in_erange[t2]; 00404 00405 const bool abab = ((spin0 != spin1) && (spin0 == spin2)); 00406 const bool abba = ((spin0 != spin1) && (spin0 != spin2)); 00407 00408 for (long t3 = std::max(range_[3].first, t2); t3 < range_[3].second; ++t3) { 00409 const long size3 = info_->get_range(t3); 00410 const long offset3 = info_->get_offset(t3); 00411 const long spin3 = info_->get_spin(t3); 00412 const bool in_erange3 = tile3_in_erange[t3]; 00413 00414 // cartesian ordinal is used as a key for hashing tiles 00415 const long tile_key = (t3-range_[3].first) + 00416 ntiles3 * ( (t2-range_[2].first) + 00417 ntiles2 * ( (t1-range_[1].first) + 00418 ntiles1 * (t0-range_[0].first) 00419 ) 00420 ); 00421 00422 // since all procs can access integrals, use locality 00423 if (tensor_->exists(tile_key) && tensor_->is_this_local(tile_key)) { 00424 00425 if ((!in_erange0 || !in_erange1 || !in_erange2 || !in_erange3)) 00426 continue; 00427 00428 // if erange[0] == erange[1], clearly can antisymmetrize 0 and 1 00429 // if erange[2] == erange[3], clearly can antisymmetrize 2 and 3 00430 bool antisymmetrize01 = espace0_eq_espace1; 00431 bool antisymmetrize23 = espace2_eq_espace3; 00432 // prefer to antisymmetrize 23 since can do that with 1 block of DistArray4 00433 if (antisymmetrize23) antisymmetrize01 = false; 00434 00435 long size = size0 * size1 * size2 * size3; 00436 std::fill(data, data+size, 0.0); 00437 00438 long i0123 = 0; 00439 for (int i0 = 0; i0 < size0; ++i0) { 00440 const int ii0 = eimap0[offset0 + i0]; 00441 for (int i1 = 0; i1 < size1; ++i1) { 00442 const int ii1 = eimap1[offset1 + i1]; 00443 00444 //if (ii0 == ii1 && aaaa) continue; 00445 00446 // split the work in round robin fashion by processors who have access to integrals 00447 long i01; 00448 if (antisymmetrize01) { 00449 const long iii0 = std::max(ii0, ii1); 00450 const long iii1 = std::min(ii0, ii1); 00451 i01 = iii0 * (iii0+1)/2 + iii1; 00452 } else { 00453 i01 = ii0 * n1 + ii1; 00454 } 00455 const long i01_proc = i01 % nproc_with_ints; 00456 if (i01_proc != me) 00457 continue; 00458 00459 if (antisymmetrize23) { 00460 00461 for (int i2 = 0; i2 < size2; ++i2) { 00462 const int ii2 = eimap2[offset2 + i2]; 00463 for (int i3 = 0; i3 < size3; ++i3, ++i0123) { 00464 const int ii3 = eimap3[offset3 + i3]; 00465 00466 const int i23 = ii2 * n3 + ii3; 00467 const int i32 = ii3 * n2 + ii2; 00468 00469 if (abab) { 00470 const double integral_0123 = src0(i01,i23); 00471 data[i0123] = integral_0123; 00472 } else if (abba) { 00473 const double integral_0132 = src1(i01,i32); 00474 data[i0123] = -integral_0132; 00475 } else { // aaaa 00476 const double integral_0123 = src0(i01,i23); 00477 const double integral_0132 = src1(i01,i32); 00478 data[i0123] = integral_0123 - integral_0132; 00479 } 00480 00481 } 00482 } 00483 00484 } // antisymmetrize23 00485 00486 else if (antisymmetrize01) { // means can't antisymmetrize 2 and 3 00487 00488 const int i10 = i1 * n0 + i0; 00489 00490 for (int i2 = 0; i2 < size2; ++i2) { 00491 const int ii2 = eimap2[offset2 + i2]; 00492 for (int i3 = 0; i3 < size3; ++i3, ++i0123) { 00493 const int ii3 = eimap3[offset3 + i3]; 00494 00495 const int i23 = ii2 * n3 + ii3; 00496 00497 if (abab) { 00498 const double integral_0123 = src0(i01,i23); 00499 data[i0123] = integral_0123; 00500 } else if (abba) { 00501 const double integral_1023 = src1(i10,i23); 00502 data[i0123] = -integral_1023; 00503 } else { // aaaa 00504 const double integral_0123 = src0(i01,i23); 00505 const double integral_1023 = src1(i10,i23); 00506 data[i0123] = integral_0123 - integral_1023; 00507 } 00508 00509 } 00510 } 00511 00512 } // antisymmetrize01 00513 00514 else { // do not antisymmetrize 00515 assert(false); // should not happen, but only SMITH knows :-) 00516 } 00517 00518 } 00519 } 00520 00521 tensor_->put_block(tile_key, data); 00522 00523 #if 0 00524 const double sum = std::accumulate(data, data+size, 0.0); 00525 ExEnv::out0() << "tiles = (" << t0 << "," << t1 << "," << t2 << "," << t3 00526 << ") key = " << tile_key << " sum = " << sum << std::endl; 00527 00528 for(int i=0; i<size; ++i) { 00529 ExEnv::out0() << "data[" << i << "] = " << data[i] << std::endl; 00530 } 00531 #endif 00532 } // if this task will process this tile 00533 00534 } // t3 00535 } // t2 00536 } // t1 00537 } // t0 00538 00539 info_->mem()->free_local_double(data); 00540 00541 } // if this task can access the integrals 00542 00543 } // MTensor<4>::convert() from RefSCMatrix or RefSymmSCMatrix 00544 00551 template <typename SrcType> void convert(const SrcType& src, 00552 const element_index_map& eimap0, 00553 const element_index_map& eimap1, 00554 element_ranges const* mapped_element_ranges = 0) { 00555 00556 assert(NDIM == 2); 00557 00558 // determine which tiles map to the allowed element ranges in src 00559 const std::vector<bool> tile0_in_erange = tiles_in_erange(range_[0], eimap0, (*mapped_element_ranges)[0]); 00560 const std::vector<bool> tile1_in_erange = tiles_in_erange(range_[1], eimap1, (*mapped_element_ranges)[1]); 00561 00562 // split work over tasks assuming that all tasks can access src 00563 const int nproc_with_ints = info_->mem()->n(); 00564 const int me = info_->mem()->me(); 00565 00566 const size_t ntiles0 = range_[0].second - range_[0].first; 00567 const size_t ntiles1 = range_[1].second - range_[1].first; 00568 00569 // check that the needed index ranges are present in src 00570 #if 0 // assume that the user knows what she's doing 00571 element_range src_range[2]; 00572 src_range[0] = this->map_range(range_[0], eimap0); 00573 src_range[1] = this->map_range(range_[1], eimap1); 00574 assert(src_range[0].second <= src->ni()); 00575 assert(src_range[1].second <= src->nj()); 00576 #endif 00577 00578 // TODO check if equivalence of MTensor spaces is matched by their relationship in src 00579 00580 // assume that src is accessible from all nodes 00581 if (1) { 00582 00583 // how do I determine max size of the tiles? 00584 const size_t maxsize1 = info_->maxtilesize(); 00585 const size_t maxtilesize = maxsize1 * maxsize1; 00586 double* data = info_->mem()->malloc_local_double(maxtilesize); 00587 00588 for (long t0 = range_[0].first; t0 < range_[0].second; ++t0) { 00589 const long size0 = info_->get_range(t0); 00590 const long offset0 = info_->get_offset(t0); 00591 const long spin0 = info_->get_spin(t0); 00592 const bool in_erange0 = tile0_in_erange[t0]; 00593 00594 for (long t1 = range_[1].first; t1 < range_[1].second; ++t1) { 00595 const long size1 = info_->get_range(t1); 00596 const long offset1 = info_->get_offset(t1); 00597 const long spin1 = info_->get_spin(t1); 00598 const bool in_erange1 = tile1_in_erange[t1]; 00599 00600 // cartesian ordinal is used as a key for hashing tiles 00601 const long tile_key = (t1-range_[1].first) + ntiles1 * (t0-range_[0].first); 00602 00603 // since all procs can access integrals, use locality 00604 if (tensor_->exists(tile_key) && tensor_->is_this_local(tile_key)) { 00605 00606 if ((!in_erange0 || !in_erange1)) 00607 continue; 00608 00609 long size = size0 * size1; 00610 std::fill(data, data+size, 0.0); 00611 00612 long i01 = 0; 00613 for (int i0 = 0; i0 < size0; ++i0) { 00614 const int ii0 = eimap0[offset0 + i0]; 00615 for (int i1 = 0; i1 < size1; ++i1, ++i01) { 00616 const int ii1 = eimap1[offset1 + i1]; 00617 00618 data[i01] = src(ii0, ii1); 00619 } 00620 } 00621 00622 tensor_->add_block(tile_key, data); 00623 00624 #if 0 00625 const double sum = std::accumulate(data, data+size, 0.0); 00626 ExEnv::out0() << "tiles = (" << t0 << "," << t1 00627 << ") key = " << tile_key << " sum = " << sum << endl; 00628 #endif 00629 } // if this task will process this tile 00630 00631 } // t1 00632 } // t0 00633 00634 info_->mem()->free_local_double(data); 00635 00636 } // if this task can access the integrals 00637 00638 } // MTensor<2>::convert() from RefSCMatrix or RefSymmSCMatrix 00639 00640 void print(const std::string& label, std::ostream& os = ExEnv::out0()) const { 00641 tensor_->print(label, os); 00642 } 00643 00644 private: 00645 Info const* info_; 00646 Tensor* tensor_; 00647 tile_ranges range_; // range of tiles that defines this tensor. See info_ for tiling info 00648 00649 // given a range of indices, return pair<mmin,mmax> where mmin and mmax are the minimum and maximum mapped indices 00650 element_range map_range(const element_range& rng, 00651 const element_index_map& eimap) { 00652 element_range result(eimap[rng.first], 00653 eimap[rng.first]); 00654 for(element_index i=rng.first; i != rng.second; ++i) { 00655 const element_index ii = eimap[i]; 00656 result.first = std::min(result.first, ii); 00657 result.second = std::max(result.second, ii); 00658 } 00659 return result; 00660 } 00661 00662 // given tile range [start, fence), element map, and element_range, result[t] is true if all elements within 00663 // the tile map to inside element_range 00664 std::vector<bool> 00665 tiles_in_erange(const tile_range& range, 00666 const element_index_map& eimap, 00667 const element_range& erange) { 00668 00669 std::vector<bool> result(range.second, false); 00670 for(long t=range.first; t!=range.second; ++t) { 00671 const long start = info_->get_offset(t); 00672 const long size = info_->get_range(t); 00673 const long fence = start + size; 00674 const element_range tile(start, fence); 00675 // map elements 00676 const element_range mapped_tile = map_range(tile, eimap); 00677 result[t] = in(mapped_tile,erange); 00678 } 00679 return result; 00680 } 00681 00682 }; 00683 00684 } // end of namespace sc 00685 00686 #include <chemistry/qc/ccr12/mtensor.timpl.h> 00687 00688 #endif // end of header guard 00689 00690 // Local Variables: 00691 // mode: c++ 00692 // c-file-style: "CLJ-CONDENSED" 00693 // End: