25 #include "getfem/getfem_generic_assembly_compile_and_exec.h"
26 #include "getfem/getfem_generic_assembly_functions_and_operators.h"
31 #define GA_DEBUG_INFO(a)
38 template <
class VEC1,
class VEC2>
39 inline void copy_scaled_4(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
40 auto it1 = v1.begin();
41 auto it2 = v2.begin(), it2e = v2.end();
44 *it2++ = (*it1++) * a;
45 *it2++ = (*it1++) * a;
46 *it2++ = (*it1++) * a;
47 *it2++ = (*it1++) * a;
50 *it2++ = (*it1++) * a;
53 template <
class VEC1,
class VEC2>
54 inline void add_scaled_4(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
55 auto it1 = v1.begin();
56 auto it2 = v2.begin(), it2e = v2.end();
59 *it2++ += (*it1++) * a;
60 *it2++ += (*it1++) * a;
61 *it2++ += (*it1++) * a;
62 *it2++ += (*it1++) * a;
65 *it2++ += (*it1++) * a;
68 template <
class VEC1,
class VEC2>
69 inline void copy_scaled_8(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
70 auto it1 = v1.begin();
71 auto it2 = v2.begin(), it2e = v2.end();
74 *it2++ = (*it1++) * a;
75 *it2++ = (*it1++) * a;
76 *it2++ = (*it1++) * a;
77 *it2++ = (*it1++) * a;
78 *it2++ = (*it1++) * a;
79 *it2++ = (*it1++) * a;
80 *it2++ = (*it1++) * a;
81 *it2++ = (*it1++) * a;
84 *it2++ = (*it1++) * a;
87 template <
class VEC1,
class VEC2>
88 inline void add_scaled_8(
const VEC1 &v1,
const scalar_type a, VEC2 &v2) {
89 auto it1 = v1.begin();
90 auto it2 = v2.begin(), it2e = v2.end();
93 *it2++ += (*it1++) * a;
94 *it2++ += (*it1++) * a;
95 *it2++ += (*it1++) * a;
96 *it2++ += (*it1++) * a;
97 *it2++ += (*it1++) * a;
98 *it2++ += (*it1++) * a;
99 *it2++ += (*it1++) * a;
100 *it2++ += (*it1++) * a;
103 *it2++ += (*it1++) * a;
106 bool operator <(
const gauss_pt_corresp &gpc1,
107 const gauss_pt_corresp &gpc2) {
108 if (gpc1.pai != gpc2.pai)
109 return (gpc1.pai < gpc2.pai );
110 if (gpc1.nodes.size() != gpc2.nodes.size())
111 return (gpc1.nodes.size() < gpc2.nodes.size());
112 for (
size_type i = 0; i < gpc1.nodes.size(); ++i)
113 if (gpc1.nodes[i] != gpc2.nodes[i])
114 return (gpc1.nodes[i] < gpc2.nodes[i]);
115 if (gpc1.pgt1 != gpc2.pgt1)
116 return (gpc1.pgt1 < gpc2.pgt1);
117 if (gpc1.pgt2 != gpc2.pgt2)
118 return (gpc1.pgt2 < gpc2.pgt2);
122 bool operator <(
const ga_instruction_set::region_mim &rm1,
123 const ga_instruction_set::region_mim &rm2) {
124 if (rm1.mim() != rm2.mim())
return (rm1.mim() < rm2.mim());
125 if (rm1.region() != rm2.region())
return (rm1.region() < rm2.region());
126 return (rm1.psd() < rm2.psd());
133 struct ga_instruction_extract_local_im_data :
public ga_instruction {
136 papprox_integration &pai;
137 const base_vector &U;
138 const fem_interpolation_context &ctx;
141 GA_DEBUG_INFO(
"Instruction: extract local im data");
145 GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
146 ->approx_method() == pai,
"Im data have to be used only "
147 "on their original integration method.");
149 size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
151 "Im data with no data on the current integration point.");
152 auto it = U.begin()+ipt*qdim;
153 std::copy(it, it+qdim, t.begin());
156 ga_instruction_extract_local_im_data
157 (base_tensor &t_,
const im_data &imd_,
const base_vector &U_,
158 papprox_integration &pai_,
const fem_interpolation_context &ctx_,
160 : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
165 struct ga_instruction_slice_local_dofs :
public ga_instruction {
167 const base_vector &U;
168 const fem_interpolation_context &ctx;
172 GA_DEBUG_INFO(
"Instruction: Slice local dofs");
173 GMM_ASSERT1(qmult1 != 0 && qmult2 != 0,
"Internal error");
175 coeff, qmult1, qmult2);
178 ga_instruction_slice_local_dofs(
const mesh_fem &mf_,
const base_vector &U_,
179 const fem_interpolation_context &ctx_,
182 : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
183 qmult1(qmult1_), qmult2(qmult2_) {}
186 struct ga_instruction_update_pfp :
public ga_instruction {
188 const fem_interpolation_context &ctx;
189 fem_precomp_pool &fp_pool;
193 GA_DEBUG_INFO(
"Instruction: Pfp update");
194 if (ctx.have_pgp()) {
196 ? ctx.convex_num() : mf.convex_index().first_true();
197 pfem pf = mf.fem_of_element(cv);
198 if (!pfp || pf != pfp->get_pfem() ||
199 ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
200 pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
208 ga_instruction_update_pfp(
const mesh_fem &mf_, pfem_precomp &pfp_,
209 const fem_interpolation_context &ctx_,
210 fem_precomp_pool &fp_pool_)
211 : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
214 struct ga_instruction_first_ind_tensor :
public ga_instruction {
216 const fem_interpolation_context &ctx;
218 const mesh_fem *mfn, **mfg;
221 GA_DEBUG_INFO(
"Instruction: adapt first index of tensor");
222 const mesh_fem &mf = *(mfg ? *mfg : mfn);
223 GA_DEBUG_ASSERT(mfg ? *mfg : mfn,
"Internal error");
224 size_type cv_1 = ctx.is_convex_num_valid()
225 ? ctx.convex_num() : mf.convex_index().first_true();
226 pfem pf = mf.fem_of_element(cv_1);
227 GMM_ASSERT1(pf,
"An element without finite element method defined");
228 size_type Qmult = qdim / pf->target_dim();
230 if (t.sizes()[0] != s)
231 { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
235 ga_instruction_first_ind_tensor(base_tensor &t_,
236 const fem_interpolation_context &ctx_,
238 const mesh_fem **mfg_)
239 : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
242 struct ga_instruction_second_ind_tensor
243 :
public ga_instruction_first_ind_tensor {
246 GA_DEBUG_INFO(
"Instruction: adapt second index of tensor");
247 const mesh_fem &mf = *(mfg ? *mfg : mfn);
248 size_type cv_1 = ctx.is_convex_num_valid()
249 ? ctx.convex_num() : mf.convex_index().first_true();
250 pfem pf = mf.fem_of_element(cv_1);
251 GMM_ASSERT1(pf,
"An element without finite element methode defined");
252 size_type Qmult = qdim / pf->target_dim();
254 if (t.sizes()[1] != s)
255 { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
259 ga_instruction_second_ind_tensor(base_tensor &t_,
260 fem_interpolation_context &ctx_,
262 const mesh_fem **mfg_)
263 : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
267 struct ga_instruction_two_first_ind_tensor :
public ga_instruction {
269 const fem_interpolation_context &ctx1, &ctx2;
271 const mesh_fem *mfn1, **mfg1;
273 const mesh_fem *mfn2, **mfg2;
276 GA_DEBUG_INFO(
"Instruction: adapt two first indices of tensor");
277 const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
278 const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
279 size_type cv_1 = ctx1.is_convex_num_valid()
280 ? ctx1.convex_num() : mf1.convex_index().first_true();
281 size_type cv_2 = ctx2.is_convex_num_valid()
282 ? ctx2.convex_num() : mf2.convex_index().first_true();
283 pfem pf1 = mf1.fem_of_element(cv_1);
284 GMM_ASSERT1(pf1,
"An element without finite element method defined");
285 pfem pf2 = mf2.fem_of_element(cv_2);
286 GMM_ASSERT1(pf2,
"An element without finite element method defined");
287 size_type Qmult1 = qdim1 / pf1->target_dim();
288 size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
289 size_type Qmult2 = qdim2 / pf2->target_dim();
290 size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
291 GMM_ASSERT1(s1 > 0 && s2 >0,
"Element without degrees of freedom");
292 if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
293 bgeot::multi_index mi = t.sizes();
294 mi[0] = s1; mi[1] = s2;
300 ga_instruction_two_first_ind_tensor
301 (base_tensor &t_,
const fem_interpolation_context &ctx1_,
302 const fem_interpolation_context &ctx2_,
303 size_type qdim1_,
const mesh_fem *mfn1_,
const mesh_fem **mfg1_,
304 size_type qdim2_,
const mesh_fem *mfn2_,
const mesh_fem **mfg2_)
305 : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
306 mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
310 struct ga_instruction_X_component :
public ga_instruction {
312 const fem_interpolation_context &ctx;
316 GA_DEBUG_INFO(
"Instruction: X component");
321 ga_instruction_X_component
322 (scalar_type &t_,
const fem_interpolation_context &ctx_,
size_type n_)
323 : t(t_), ctx(ctx_), n(n_) {}
326 struct ga_instruction_X :
public ga_instruction {
328 const fem_interpolation_context &ctx;
331 GA_DEBUG_INFO(
"Instruction: X");
332 GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(),
"dimensions mismatch");
333 gmm::copy(ctx.xreal(), t.as_vector());
337 ga_instruction_X(base_tensor &t_,
const fem_interpolation_context &ctx_)
338 : t(t_), ctx(ctx_) {}
341 struct ga_instruction_copy_small_vect :
public ga_instruction {
343 const base_small_vector &vec;
346 GA_DEBUG_INFO(
"Instruction: copy small vector");
347 GMM_ASSERT1(t.size() == vec.size(),
"Invalid vector size.");
348 gmm::copy(vec, t.as_vector());
351 ga_instruction_copy_small_vect(base_tensor &t_,
352 const base_small_vector &vec_)
353 : t(t_), vec(vec_) {}
356 struct ga_instruction_copy_Normal :
public ga_instruction_copy_small_vect {
359 GA_DEBUG_INFO(
"Instruction: unit normal vector");
360 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal "
361 "vector. Possible reasons: not on boundary or "
362 "transformation failed.");
363 gmm::copy(vec, t.as_vector());
366 ga_instruction_copy_Normal(base_tensor &t_,
367 const base_small_vector &Normal_)
368 : ga_instruction_copy_small_vect(t_, Normal_) {}
371 struct ga_instruction_level_set_normal_vector :
public ga_instruction {
373 const mesh_im_level_set *mimls;
374 const fem_interpolation_context &ctx;
375 base_small_vector vec;
378 GA_DEBUG_INFO(
"Instruction: unit normal vector to a level-set");
379 mimls->compute_normal_vector(ctx, vec);
380 GMM_ASSERT1(t.size() == vec.size(),
"Invalid outward unit normal "
381 "vector. Possible reasons: not on boundary or "
382 "transformation failed.");
383 gmm::copy(vec, t.as_vector());
386 ga_instruction_level_set_normal_vector
387 (base_tensor &t_,
const mesh_im_level_set *mimls_,
388 const fem_interpolation_context &ctx_)
389 : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
392 struct ga_instruction_element_size :
public ga_instruction {
397 GA_DEBUG_INFO(
"Instruction: element_size");
398 GMM_ASSERT1(t.size() == 1,
"Invalid element size.");
402 ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
406 struct ga_instruction_element_K :
public ga_instruction {
408 const fem_interpolation_context &ctx;
411 GA_DEBUG_INFO(
"Instruction: element_K");
412 GMM_ASSERT1(t.size() == (ctx.K()).size(),
"Invalid tensor size.");
413 gmm::copy(ctx.K().as_vector(), t.as_vector());
416 ga_instruction_element_K(base_tensor &t_,
417 const fem_interpolation_context &ct)
421 struct ga_instruction_element_B :
public ga_instruction {
423 const fem_interpolation_context &ctx;
426 GA_DEBUG_INFO(
"Instruction: element_B");
427 GMM_ASSERT1(t.size() == (ctx.B()).size(),
"Invalid tensor size.");
428 gmm::copy(ctx.B().as_vector(), t.as_vector());
431 ga_instruction_element_B(base_tensor &t_,
432 const fem_interpolation_context &ct)
436 struct ga_instruction_val_base :
public ga_instruction {
438 fem_interpolation_context &ctx;
440 const pfem_precomp &pfp;
443 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
448 if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
450 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
451 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
457 ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
458 const mesh_fem &mf_,
const pfem_precomp &pfp_)
459 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
462 struct ga_instruction_xfem_plus_val_base :
public ga_instruction {
464 fem_interpolation_context &ctx;
469 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
470 if (ctx.have_pgp()) ctx.set_pfp(pfp);
471 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
472 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
473 int old_xfem_side = ctx.xfem_side();
474 ctx.set_xfem_side(1);
476 ctx.set_xfem_side(old_xfem_side);
480 ga_instruction_xfem_plus_val_base(base_tensor &tt,
481 fem_interpolation_context &ct,
482 const mesh_fem &mf_, pfem_precomp &pfp_)
483 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
486 struct ga_instruction_xfem_minus_val_base :
public ga_instruction {
488 fem_interpolation_context &ctx;
493 GA_DEBUG_INFO(
"Instruction: compute value of base functions");
494 if (ctx.have_pgp()) ctx.set_pfp(pfp);
495 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
496 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
497 int old_xfem_side = ctx.xfem_side();
498 ctx.set_xfem_side(-1);
500 ctx.set_xfem_side(old_xfem_side);
504 ga_instruction_xfem_minus_val_base
505 (base_tensor &tt, fem_interpolation_context &ct,
506 const mesh_fem &mf_, pfem_precomp &pfp_)
507 : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
510 struct ga_instruction_grad_base :
public ga_instruction_val_base {
513 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
518 if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
520 ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
521 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
522 ctx.grad_base_value(t);
527 ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
528 const mesh_fem &mf_, pfem_precomp &pfp_)
529 : ga_instruction_val_base(tt, ct, mf_, pfp_)
533 struct ga_instruction_xfem_plus_grad_base :
public ga_instruction_val_base {
536 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
537 if (ctx.have_pgp()) ctx.set_pfp(pfp);
538 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
539 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
540 int old_xfem_side = ctx.xfem_side();
541 ctx.set_xfem_side(1);
542 ctx.grad_base_value(t);
543 ctx.set_xfem_side(old_xfem_side);
547 ga_instruction_xfem_plus_grad_base
548 (base_tensor &tt, fem_interpolation_context &ct,
549 const mesh_fem &mf_, pfem_precomp &pfp_)
550 : ga_instruction_val_base(tt, ct, mf_, pfp_)
554 struct ga_instruction_xfem_minus_grad_base :
public ga_instruction_val_base {
557 GA_DEBUG_INFO(
"Instruction: compute gradient of base functions");
558 if (ctx.have_pgp()) ctx.set_pfp(pfp);
559 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
560 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
561 int old_xfem_side = ctx.xfem_side();
562 ctx.set_xfem_side(-1);
563 ctx.grad_base_value(t);
564 ctx.set_xfem_side(old_xfem_side);
568 ga_instruction_xfem_minus_grad_base
569 (base_tensor &tt, fem_interpolation_context &ct,
570 const mesh_fem &mf_, pfem_precomp &pfp_)
571 : ga_instruction_val_base(tt, ct, mf_, pfp_)
576 struct ga_instruction_hess_base :
public ga_instruction_val_base {
579 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
580 if (ctx.have_pgp()) ctx.set_pfp(pfp);
581 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
582 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
583 ctx.hess_base_value(t);
587 ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
588 const mesh_fem &mf_, pfem_precomp &pfp_)
589 : ga_instruction_val_base(tt, ct, mf_, pfp_)
593 struct ga_instruction_xfem_plus_hess_base :
public ga_instruction_val_base {
596 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
597 if (ctx.have_pgp()) ctx.set_pfp(pfp);
598 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
599 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
600 int old_xfem_side = ctx.xfem_side();
601 ctx.set_xfem_side(1);
602 ctx.hess_base_value(t);
603 ctx.set_xfem_side(old_xfem_side);
607 ga_instruction_xfem_plus_hess_base
608 (base_tensor &tt, fem_interpolation_context &ct,
609 const mesh_fem &mf_, pfem_precomp &pfp_)
610 : ga_instruction_val_base(tt, ct, mf_, pfp_)
614 struct ga_instruction_xfem_minus_hess_base :
public ga_instruction_val_base {
617 GA_DEBUG_INFO(
"Instruction: compute Hessian of base functions");
618 if (ctx.have_pgp()) ctx.set_pfp(pfp);
619 else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
620 GMM_ASSERT1(ctx.pf(),
"Undefined finite element method");
621 int old_xfem_side = ctx.xfem_side();
622 ctx.set_xfem_side(-1);
623 ctx.hess_base_value(t);
624 ctx.set_xfem_side(old_xfem_side);
628 ga_instruction_xfem_minus_hess_base
629 (base_tensor &tt, fem_interpolation_context &ct,
630 const mesh_fem &mf_, pfem_precomp &pfp_)
631 : ga_instruction_val_base(tt, ct, mf_, pfp_)
635 struct ga_instruction_val :
public ga_instruction {
638 const base_tensor &Z;
639 const base_vector &coeff;
643 GA_DEBUG_INFO(
"Instruction: variable value");
645 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
646 GA_DEBUG_ASSERT(t.size() == qdim,
"dimensions mismatch");
649 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
650 "Wrong size for coeff vector");
651 auto itc = coeff.begin();
auto itZ = Z.begin();
652 a = (*itc++) * (*itZ++);
653 while (itc != coeff.end()) a += (*itc++) * (*itZ++);
656 if (target_dim == 1) {
657 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
658 "Wrong size for coeff vector");
659 auto itc = coeff.begin();
auto itZ = Z.begin();
660 for (
auto it = t.begin(); it != t.end(); ++it)
661 *it = (*itc++) * (*itZ);
663 for (
size_type j = 1; j < ndof; ++j, ++itZ) {
664 for (
auto it = t.begin(); it != t.end(); ++it)
665 *it += (*itc++) * (*itZ);
669 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
670 "Wrong size for coeff vector");
673 auto itc = coeff.begin();
676 for (
size_type q = 0; q < Qmult; ++q, ++itc) {
677 for (
size_type r = 0; r < target_dim; ++r)
678 *it++ += (*itc) * Z[j + r*ndof];
686 ga_instruction_val(base_tensor &tt,
const base_tensor &Z_,
688 : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
691 struct ga_instruction_grad :
public ga_instruction_val {
694 GA_DEBUG_INFO(
"Instruction: gradient");
696 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
699 GA_DEBUG_ASSERT(t.size() == N,
"dimensions mismatch");
700 GA_DEBUG_ASSERT(coeff.size() == ndof,
"Wrong size for coeff vector");
701 auto itZ = Z.begin();
702 for (
auto it = t.begin(); it != t.end(); ++it) {
703 auto itc = coeff.begin();
704 *it = (*itc++) * (*itZ++);
705 while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
709 if (target_dim == 1) {
710 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
711 GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
712 "Wrong size for coeff vector");
714 auto itZ = Z.begin();
auto it = t.begin() + q;
717 auto itc = coeff.begin() + q;
718 *it = (*itc) * (*itZ++);
720 { itc += qdim; *it += (*itc) * (*itZ++); }
725 GA_DEBUG_ASSERT(t.size() == N*qdim,
"dimensions mismatch");
726 GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
727 "Wrong size for coeff vector");
730 auto itZ = Z.begin();
732 for (
size_type r = 0; r < target_dim; ++r)
734 t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
741 ga_instruction_grad(base_tensor &tt,
const base_tensor &Z_,
743 : ga_instruction_val(tt, Z_, co, q)
748 struct ga_instruction_hess :
public ga_instruction_val {
751 GA_DEBUG_INFO(
"Instruction: Hessian");
753 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
754 size_type NN = gmm::sqr(t.sizes().back());
755 GA_DEBUG_ASSERT(NN == Z.sizes()[2],
"Internal error");
757 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
758 "Wrong size for coeff vector");
759 auto it = Z.begin();
auto itt = t.begin();
760 for (
size_type kl = 0; kl < NN; ++kl, ++itt) {
761 *itt = scalar_type(0);
762 for (
auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
763 *itt += (*itc) * (*it);
765 GMM_ASSERT1(itt == t.end(),
"dimensions mismatch");
768 if (target_dim == 1) {
769 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
770 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
771 "Wrong size for coeff vector");
774 base_tensor::const_iterator it = Z.begin();
776 for (
size_type j = 0; j < ndof; ++j, ++it)
777 t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
781 GA_DEBUG_ASSERT(t.size() == NN*qdim,
"dimensions mismatch");
782 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
783 "Wrong size for coeff vector");
786 base_tensor::const_iterator it = Z.begin();
788 for (
size_type r = 0; r < target_dim; ++r)
789 for (
size_type j = 0; j < ndof; ++j, ++it)
790 t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
797 ga_instruction_hess(base_tensor &tt,
const base_tensor &Z_,
799 : ga_instruction_val(tt, Z_, co, q)
803 struct ga_instruction_diverg :
public ga_instruction_val {
806 GA_DEBUG_INFO(
"Instruction: divergence");
808 if (!ndof) {
gmm::clear(t.as_vector());
return 0; }
812 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
813 "Dimensions mismatch for divergence operator");
814 GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
815 "Wrong size for coeff vector");
817 t[0] = scalar_type(0);
818 base_tensor::const_iterator it = Z.begin();
821 if (k) it += (N*ndof + 1);
824 t[0] += coeff[j] * (*it);
832 t[0] += coeff[j*N+k] * (*it);
838 ga_instruction_diverg(base_tensor &tt,
const base_tensor &Z_,
840 : ga_instruction_val(tt, Z_, co, q)
844 struct ga_instruction_copy_val_base :
public ga_instruction {
846 const base_tensor &Z;
850 GA_DEBUG_INFO(
"Instruction: value of test functions");
852 GA_DEBUG_ASSERT(t.size() == Z.size(),
"Wrong size for base vector");
853 std::copy(Z.begin(), Z.end(), t.begin());
858 std::copy(Z.begin(), Z.end(), t.begin());
860 if (target_dim == 1) {
862 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
863 "Wrong size for base vector");
864 std::fill(t.begin(), t.end(), scalar_type(0));
865 auto itZ = Z.begin();
870 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
874 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
878 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
879 "Wrong size for base vector");
880 std::fill(t.begin(), t.end(), scalar_type(0));
881 auto itZ = Z.begin();
882 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
885 for (
size_type k = 0; k < target_dim; ++k) {
886 auto it = t.begin() + (ss * k);
887 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
892 { it2 += sss; *it2 = *itZ; }
901 ga_instruction_copy_val_base(base_tensor &tt,
const base_tensor &Z_,
905 struct ga_instruction_copy_grad_base :
public ga_instruction_copy_val_base {
908 GA_DEBUG_INFO(
"Instruction: gradient of test functions");
910 std::copy(Z.begin(), Z.end(), t.begin());
915 std::copy(Z.begin(), Z.end(), t.begin());
917 if (target_dim == 1) {
920 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
921 "Wrong size for gradient vector");
922 std::fill(t.begin(), t.end(), scalar_type(0));
923 base_tensor::const_iterator itZ = Z.begin();
924 size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
928 base_tensor::iterator it = t.begin() + (ssss*l);
929 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
931 base_tensor::iterator it2 = it;
933 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
939 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
940 "Wrong size for gradient vector");
941 std::fill(t.begin(), t.end(), scalar_type(0));
942 base_tensor::const_iterator itZ = Z.begin();
943 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
948 for (
size_type k = 0; k < target_dim; ++k) {
949 base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
950 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
952 base_tensor::iterator it2 = it;
954 for (
size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
963 ga_instruction_copy_grad_base(base_tensor &tt,
const base_tensor &Z_,
965 : ga_instruction_copy_val_base(tt,Z_,q) {}
968 struct ga_instruction_copy_vect_val_base :
public ga_instruction {
970 const base_tensor &Z;
974 GA_DEBUG_INFO(
"Instruction: vectorized value of test functions");
977 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
978 "Wrong size for base vector");
980 auto itZ = Z.begin();
985 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
989 for (
size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
994 ga_instruction_copy_vect_val_base(base_tensor &tt,
const base_tensor &Z_,
998 struct ga_instruction_copy_vect_grad_base
999 :
public ga_instruction_copy_vect_val_base {
1001 virtual int exec() {
1002 GA_DEBUG_INFO(
"Instruction: vectorized gradient of test functions");
1005 GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
1006 "Wrong size for gradient vector");
1008 base_tensor::const_iterator itZ = Z.begin();
1009 size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
1013 base_tensor::iterator it = t.begin() + (ssss*l);
1014 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
1016 base_tensor::iterator it2 = it;
1018 for (
size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
1024 ga_instruction_copy_vect_grad_base(base_tensor &tt,
const base_tensor &Z_,
1026 : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
1029 struct ga_instruction_copy_hess_base :
public ga_instruction_copy_val_base {
1031 virtual int exec() {
1032 GA_DEBUG_INFO(
"Instruction: Hessian of test functions");
1036 gmm::copy(Z.as_vector(), t.as_vector());
1039 GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
1040 "Wrong size for Hessian vector");
1042 base_tensor::const_iterator itZ = Z.begin();
1043 size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
1046 size_type NNdim = Z.sizes()[2]*target_dim;
1047 for (
size_type klm = 0; klm < NNdim; ++klm) {
1048 base_tensor::iterator it = t.begin() + (ss * klm);
1049 for (
size_type i = 0; i < ndof; ++i, ++itZ) {
1051 base_tensor::iterator it2 = it;
1053 for (
size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
1060 ga_instruction_copy_hess_base(base_tensor &tt,
const base_tensor &Z_,
1062 : ga_instruction_copy_val_base(tt, Z_, q) {}
1065 struct ga_instruction_copy_diverg_base :
public ga_instruction_copy_val_base {
1067 virtual int exec() {
1068 GA_DEBUG_INFO(
"Instruction: divergence of test functions");
1073 GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
1074 "Dimensions mismatch for divergence operator");
1075 GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1076 "Wrong size for divergence vector");
1078 base_tensor::const_iterator itZ = Z.begin();
1082 base_tensor::iterator it = t.begin();
1083 if (l) itZ += target_dim*ndof+1;
1085 if (i) { ++it; ++itZ; }
1092 base_tensor::iterator it = t.begin() + j;
1095 if (i) { it += Qmult; ++itZ; }
1103 ga_instruction_copy_diverg_base(base_tensor &tt,
const base_tensor &Z_,
1105 : ga_instruction_copy_val_base(tt, Z_, q) {}
1108 struct ga_instruction_elementary_trans {
1109 const base_vector &coeff_in;
1110 base_vector coeff_out;
1111 pelementary_transformation elemtrans;
1112 const mesh_fem &mf1, &mf2;
1113 const fem_interpolation_context &ctx;
1118 if (icv != ctx.convex_num() || M.size() == 0) {
1119 M.base_resize(m, n);
1120 icv = ctx.convex_num();
1121 elemtrans->give_transformation(mf1, mf2, icv, M);
1123 coeff_out.resize(gmm::mat_nrows(M));
1124 gmm::mult(M, coeff_in, coeff_out);
1127 ga_instruction_elementary_trans
1128 (
const base_vector &co, pelementary_transformation e,
1129 const mesh_fem &mf1_,
const mesh_fem &mf2_,
1130 const fem_interpolation_context &ctx_, base_matrix &M_,
1132 : coeff_in(co), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1134 ~ga_instruction_elementary_trans() {};
1137 struct ga_instruction_elementary_trans_val
1138 :
public ga_instruction_val, ga_instruction_elementary_trans {
1140 virtual int exec() {
1141 GA_DEBUG_INFO(
"Instruction: variable value with elementary "
1145 do_transformation(coeff_in.size(), ndof*Qmult);
1146 return ga_instruction_val::exec();
1149 ga_instruction_elementary_trans_val
1150 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1151 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1152 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1153 : ga_instruction_val(tt, Z_, coeff_out, q),
1154 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1157 struct ga_instruction_elementary_trans_grad
1158 :
public ga_instruction_grad, ga_instruction_elementary_trans {
1160 virtual int exec() {
1161 GA_DEBUG_INFO(
"Instruction: gradient with elementary transformation");
1164 do_transformation(coeff_in.size(), ndof*Qmult);
1165 return ga_instruction_grad::exec();
1168 ga_instruction_elementary_trans_grad
1169 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1170 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1171 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1172 : ga_instruction_grad(tt, Z_, coeff_out, q),
1173 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1176 struct ga_instruction_elementary_trans_hess
1177 :
public ga_instruction_hess, ga_instruction_elementary_trans {
1179 virtual int exec() {
1180 GA_DEBUG_INFO(
"Instruction: Hessian with elementary transformation");
1183 do_transformation(coeff_in.size(), ndof*Qmult);
1184 return ga_instruction_hess::exec();
1187 ga_instruction_elementary_trans_hess
1188 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1189 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1190 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1191 : ga_instruction_hess(tt, Z_, coeff_out, q),
1192 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1195 struct ga_instruction_elementary_trans_diverg
1196 :
public ga_instruction_diverg, ga_instruction_elementary_trans {
1198 virtual int exec() {
1199 GA_DEBUG_INFO(
"Instruction: divergence with elementary transformation");
1202 do_transformation(coeff_in.size(), ndof*Qmult);
1203 return ga_instruction_diverg::exec();
1206 ga_instruction_elementary_trans_diverg
1207 (base_tensor &tt,
const base_tensor &Z_,
const base_vector &co,
size_type q,
1208 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1209 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1210 : ga_instruction_diverg(tt, Z_, coeff_out, q),
1211 ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1214 struct ga_instruction_update_group_info :
public ga_instruction {
1215 const ga_workspace &workspace;
1216 const ga_instruction_set &gis;
1217 const ga_instruction_set::interpolate_info &inin;
1218 const std::string gname;
1219 ga_instruction_set::variable_group_info &vgi;
1221 virtual int exec() {
1222 GA_DEBUG_INFO(
"Instruction: Update group info for "+gname);
1223 if (vgi.cached_mesh && vgi.cached_mesh == inin.m)
1226 vgi.cached_mesh = inin.m;
1227 const std::string &varname
1228 = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1229 : workspace.first_variable_of_group(gname);
1230 vgi.varname = &varname;
1231 vgi.mf = workspace.associated_mf(varname);
1232 GA_DEBUG_ASSERT(vgi.mf,
"Group variable should always have a mesh_fem");
1233 vgi.reduced_mf = vgi.mf->is_reduced();
1234 if (vgi.reduced_mf) {
1235 const auto it = gis.really_extended_vars.find(varname);
1236 GA_DEBUG_ASSERT(it != gis.really_extended_vars.end(),
1237 "Variable " << varname <<
" not in extended variables");
1238 vgi.U = &(it->second);
1239 vgi.I = &(workspace.temporary_interval_of_variable(varname));
1241 vgi.U = &(workspace.value(varname));
1242 vgi.I = &(workspace.interval_of_variable(varname));
1244 vgi.alpha = workspace.factor_of_variable(varname);
1248 ga_instruction_update_group_info
1249 (
const ga_workspace &workspace_,
const ga_instruction_set &gis_,
1250 const ga_instruction_set::interpolate_info &inin_,
1251 const std::string &gname_, ga_instruction_set::variable_group_info &vgi_)
1252 : workspace(workspace_), gis(gis_), inin(inin_), gname(gname_), vgi(vgi_)
1256 struct ga_instruction_interpolate_filter :
public ga_instruction {
1258 const ga_instruction_set::interpolate_info &inin;
1262 virtual int exec() {
1263 GA_DEBUG_INFO(
"Instruction: interpolated filter");
1264 if ((pt_type ==
size_type(-1) && inin.pt_type) ||
1265 (pt_type !=
size_type(-1) && inin.pt_type == pt_type)) {
1266 GA_DEBUG_INFO(
"Instruction: interpolated filter: pass");
1270 GA_DEBUG_INFO(
"Instruction: interpolated filter: filtered");
1277 ga_instruction_interpolate_filter
1278 (base_tensor &t_,
const ga_instruction_set::interpolate_info &inin_,
1280 : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1283 struct ga_instruction_copy_interpolated_small_vect :
public ga_instruction {
1285 const base_small_vector &vec;
1286 const ga_instruction_set::interpolate_info &inin;
1288 virtual int exec() {
1289 GA_DEBUG_INFO(
"Instruction: copy small vector");
1290 GMM_ASSERT1(!(inin.has_ctx) || inin.ctx.is_convex_num_valid(),
1291 "Invalid element, probably transformation failed");
1292 GMM_ASSERT1(t.size() == vec.size(),
"Invalid vector size.");
1293 gmm::copy(vec, t.as_vector());
1296 ga_instruction_copy_interpolated_small_vect
1297 (base_tensor &t_,
const base_small_vector &vec_,
1298 const ga_instruction_set::interpolate_info &inin_)
1299 : t(t_), vec(vec_), inin(inin_) {}
1302 struct ga_instruction_interpolate :
public ga_instruction {
1305 const mesh_fem *mfn, **mfg;
1306 const base_vector *Un, **Ug;
1307 fem_interpolation_context &ctx;
1311 fem_precomp_pool &fp_pool;
1312 ga_instruction_set::interpolate_info &inin;
1314 virtual int exec() {
1315 GMM_ASSERT1(ctx.is_convex_num_valid(),
"No valid element for the "
1316 "transformation. Probably transformation failed");
1317 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1318 const base_vector &U = *(Ug ? *Ug : Un);
1319 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable "
1320 "on another mesh than the one it is defined on");
1322 pfem pf = mf.fem_of_element(ctx.convex_num());
1323 GMM_ASSERT1(pf,
"Undefined finite element method");
1324 if (ctx.have_pgp()) {
1326 inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1327 ctx.set_pfp(inin.pfps[&mf]);
1334 ga_instruction_interpolate
1335 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1336 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1338 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1339 : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1340 ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1343 struct ga_instruction_interpolate_val :
public ga_instruction_interpolate {
1345 virtual int exec() {
1346 GA_DEBUG_INFO(
"Instruction: interpolated variable value");
1347 ga_instruction_interpolate::exec();
1348 ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1353 ga_instruction_interpolate_val
1354 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1355 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1357 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1358 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1363 struct ga_instruction_interpolate_grad :
public ga_instruction_interpolate {
1365 virtual int exec() {
1366 GA_DEBUG_INFO(
"Instruction: interpolated variable grad");
1367 ga_instruction_interpolate::exec();
1368 base_matrix v(qdim, ctx.N());
1369 ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1370 gmm::copy(v.as_vector(), t.as_vector());
1374 ga_instruction_interpolate_grad
1375 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1376 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1378 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1379 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1384 struct ga_instruction_interpolate_hess :
public ga_instruction_interpolate {
1386 virtual int exec() {
1387 GA_DEBUG_INFO(
"Instruction: interpolated variable hessian");
1388 ga_instruction_interpolate::exec();
1389 base_matrix v(qdim, ctx.N()*ctx.N());
1390 ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1391 gmm::copy(v.as_vector(), t.as_vector());
1395 ga_instruction_interpolate_hess
1396 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1397 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1399 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1400 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1405 struct ga_instruction_interpolate_diverg :
public ga_instruction_interpolate {
1407 virtual int exec() {
1408 GA_DEBUG_INFO(
"Instruction: interpolated variable divergence");
1409 ga_instruction_interpolate::exec();
1410 ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1414 ga_instruction_interpolate_diverg
1415 (base_tensor &tt,
const mesh **m_,
const mesh_fem *mfn_,
1416 const mesh_fem **mfg_,
const base_vector *Un_,
const base_vector **Ug_,
1418 fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1419 : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1424 struct ga_instruction_interpolate_base {
1427 const mesh_fem *mfn, **mfg;
1429 ga_instruction_set::interpolate_info &inin;
1430 fem_precomp_pool &fp_pool;
1432 virtual int exec() {
1433 GMM_ASSERT1(inin.ctx.is_convex_num_valid(),
"No valid element for "
1434 "the transformation. Probably transformation failed");
1435 const mesh_fem &mf = *(mfg ? *mfg : mfn);
1436 GMM_ASSERT1(&(mf.linked_mesh()) == *m,
"Interpolation of a variable "
1437 "on another mesh than the one it is defined on");
1439 pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1440 GMM_ASSERT1(pf,
"Undefined finite element method");
1442 if (inin.ctx.have_pgp()) {
1444 inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1445 inin.ctx.set_pfp(inin.pfps[&mf]);
1447 inin.ctx.set_pf(pf);
1452 ga_instruction_interpolate_base
1453 (
const mesh **m_,
const mesh_fem *mfn_,
const mesh_fem **mfg_,
1454 const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1455 fem_precomp_pool &fp_pool_)
1456 : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1457 fp_pool(fp_pool_) {}
1460 struct ga_instruction_interpolate_val_base
1461 :
public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1463 virtual int exec() {
1464 GA_DEBUG_INFO(
"Instruction: interpolated base value");
1465 ga_instruction_interpolate_base::exec();
1466 inin.ctx.pf()->real_base_value(inin.ctx, ZZ);
1467 return ga_instruction_copy_val_base::exec();
1470 ga_instruction_interpolate_val_base
1471 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1473 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1474 : ga_instruction_copy_val_base(t_, ZZ, q),
1475 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1479 struct ga_instruction_interpolate_grad_base
1480 :
public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1482 virtual int exec() {
1483 GA_DEBUG_INFO(
"Instruction: interpolated base grad");
1484 ga_instruction_interpolate_base::exec();
1485 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1486 return ga_instruction_copy_grad_base::exec();
1489 ga_instruction_interpolate_grad_base
1490 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1492 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1493 : ga_instruction_copy_grad_base(t_, ZZ, q),
1494 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1498 struct ga_instruction_interpolate_hess_base
1499 :
public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1501 virtual int exec() {
1502 GA_DEBUG_INFO(
"Instruction: interpolated base hessian");
1503 ga_instruction_interpolate_base::exec();
1504 inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ);
1505 return ga_instruction_copy_hess_base::exec();
1508 ga_instruction_interpolate_hess_base
1509 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1511 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1512 : ga_instruction_copy_hess_base(t_, ZZ, q),
1513 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1517 struct ga_instruction_interpolate_diverg_base
1518 :
public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1520 virtual int exec() {
1521 GA_DEBUG_INFO(
"Instruction: interpolated base divergence");
1522 ga_instruction_interpolate_base::exec();
1523 inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ);
1524 return ga_instruction_copy_diverg_base::exec();
1527 ga_instruction_interpolate_diverg_base
1528 (base_tensor &t_,
const mesh **m_,
const mesh_fem *mfn_,
1530 ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1531 : ga_instruction_copy_diverg_base(t_, ZZ, q),
1532 ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1537 struct ga_instruction_elementary_trans_base {
1540 pelementary_transformation elemtrans;
1541 const mesh_fem &mf1, &mf2;
1542 const fem_interpolation_context &ctx;
1547 if (icv != ctx.convex_num() || M.size() == 0) {
1548 M.base_resize(m, n);
1549 icv = ctx.convex_num();
1550 elemtrans->give_transformation(mf1, mf2, icv, M);
1552 t_out.mat_reduction(t_in, M, 0);
1555 ga_instruction_elementary_trans_base
1556 (base_tensor &t_, pelementary_transformation e,
const mesh_fem &mf1_,
1557 const mesh_fem &mf2_,
1558 const fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1559 : t_out(t_), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1563 struct ga_instruction_elementary_trans_val_base
1564 :
public ga_instruction_copy_val_base,
1565 ga_instruction_elementary_trans_base {
1567 virtual int exec() {
1568 GA_DEBUG_INFO(
"Instruction: value of test functions with elementary "
1572 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1]);
1573 ga_instruction_copy_val_base::exec();
1574 do_transformation(t_out.sizes()[0], ndof*Qmult);
1578 ga_instruction_elementary_trans_val_base
1579 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1580 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1581 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1582 : ga_instruction_copy_val_base(t_in, Z_, q),
1583 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1587 struct ga_instruction_elementary_trans_grad_base
1588 :
public ga_instruction_copy_grad_base,
1589 ga_instruction_elementary_trans_base {
1591 virtual int exec() {
1592 GA_DEBUG_INFO(
"Instruction: gradient of test functions with elementary "
1596 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1597 ga_instruction_copy_grad_base::exec();
1598 do_transformation(t_out.sizes()[0], ndof*Qmult);
1602 ga_instruction_elementary_trans_grad_base
1603 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1604 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1605 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1606 : ga_instruction_copy_grad_base(t_in, Z_, q),
1607 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1611 struct ga_instruction_elementary_trans_hess_base
1612 :
public ga_instruction_copy_hess_base,
1613 ga_instruction_elementary_trans_base {
1615 virtual int exec() {
1616 GA_DEBUG_INFO(
"Instruction: Hessian of test functions with elementary "
1620 t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1621 ga_instruction_copy_hess_base::exec();
1622 do_transformation(t_out.sizes()[0], ndof*Qmult);
1626 ga_instruction_elementary_trans_hess_base
1627 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1628 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1629 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1630 : ga_instruction_copy_hess_base(t_in, Z_, q),
1631 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1635 struct ga_instruction_elementary_trans_diverg_base
1636 :
public ga_instruction_copy_diverg_base,
1637 ga_instruction_elementary_trans_base {
1639 virtual int exec() {
1640 GA_DEBUG_INFO(
"Instruction: divergence of test functions with elementary "
1644 t_in.adjust_sizes(Qmult*ndof);
1645 ga_instruction_copy_diverg_base::exec();
1646 do_transformation(t_out.sizes()[0], ndof*Qmult);
1650 ga_instruction_elementary_trans_diverg_base
1651 (base_tensor &t_,
const base_tensor &Z_,
size_type q,
1652 pelementary_transformation e,
const mesh_fem &mf1_,
const mesh_fem &mf2_,
1653 fem_interpolation_context &ctx_, base_matrix &M_,
size_type &icv_)
1654 : ga_instruction_copy_diverg_base(t_in, Z_, q),
1655 ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1660 struct ga_instruction_add :
public ga_instruction {
1662 const base_tensor &tc1, &tc2;
1663 virtual int exec() {
1664 GA_DEBUG_INFO(
"Instruction: addition");
1665 GA_DEBUG_ASSERT(t.size() == tc1.size(),
1666 "internal error " << t.size() <<
" != " << tc1.size());
1667 GA_DEBUG_ASSERT(t.size() == tc2.size(),
1668 "internal error " << t.size() <<
" != " << tc2.size());
1669 gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1672 ga_instruction_add(base_tensor &t_,
1673 const base_tensor &tc1_,
const base_tensor &tc2_)
1674 : t(t_), tc1(tc1_), tc2(tc2_) {}
1677 struct ga_instruction_add_to :
public ga_instruction {
1679 const base_tensor &tc1;
1680 virtual int exec() {
1681 GA_DEBUG_INFO(
"Instruction: addition");
1682 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1683 <<
" incompatible with " << tc1.size());
1684 gmm::add(tc1.as_vector(), t.as_vector());
1687 ga_instruction_add_to(base_tensor &t_,
const base_tensor &tc1_)
1688 : t(t_), tc1(tc1_) {}
1691 struct ga_instruction_add_to_coeff :
public ga_instruction {
1693 const base_tensor &tc1;
1695 virtual int exec() {
1696 GA_DEBUG_INFO(
"Instruction: addition with scale");
1697 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"internal error " << t.size()
1698 <<
" incompatible with " << tc1.size());
1699 gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1702 ga_instruction_add_to_coeff(base_tensor &t_,
const base_tensor &tc1_,
1703 scalar_type &coeff_)
1704 : t(t_), tc1(tc1_), coeff(coeff_) {}
1707 struct ga_instruction_sub :
public ga_instruction {
1709 const base_tensor &tc1, &tc2;
1710 virtual int exec() {
1711 GA_DEBUG_INFO(
"Instruction: subtraction");
1712 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1714 gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1718 ga_instruction_sub(base_tensor &t_,
1719 const base_tensor &tc1_,
const base_tensor &tc2_)
1720 : t(t_), tc1(tc1_), tc2(tc2_) {}
1723 struct ga_instruction_opposite :
public ga_instruction {
1725 virtual int exec() {
1726 GA_DEBUG_INFO(
"Instruction: multiplication with -1");
1727 gmm::scale(t.as_vector(), scalar_type(-1));
1730 ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1733 struct ga_instruction_print_tensor :
public ga_instruction {
1735 pga_tree_node pnode;
1736 const fem_interpolation_context &ctx;
1738 virtual int exec() {
1739 GA_DEBUG_INFO(
"Instruction: tensor print");
1740 cout <<
"Print term "; ga_print_node(pnode, cout);
1741 cout <<
" on Gauss point " << ipt <<
"/" << nbpt <<
" of element "
1742 << ctx.convex_num() <<
": " << t << endl;
1745 ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1746 const fem_interpolation_context &ctx_,
1748 : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1751 struct ga_instruction_copy_tensor :
public ga_instruction {
1753 const base_tensor &tc1;
1754 virtual int exec() {
1755 GA_DEBUG_INFO(
"Instruction: tensor copy");
1756 std::copy(tc1.begin(), tc1.end(), t.begin());
1760 ga_instruction_copy_tensor(base_tensor &t_,
const base_tensor &tc1_)
1761 : t(t_), tc1(tc1_) {}
1764 struct ga_instruction_clear_tensor :
public ga_instruction {
1766 virtual int exec() {
1767 GA_DEBUG_INFO(
"Instruction: clear tensor");
1768 std::fill(t.begin(), t.end(), scalar_type(0));
1771 ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1774 struct ga_instruction_copy_tensor_possibly_void :
public ga_instruction {
1776 const base_tensor &tc1;
1777 virtual int exec() {
1778 GA_DEBUG_INFO(
"Instruction: tensor copy possibly void");
1780 gmm::copy(tc1.as_vector(), t.as_vector());
1785 ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1786 const base_tensor &tc1_)
1787 : t(t_), tc1(tc1_) {}
1790 struct ga_instruction_copy_scalar :
public ga_instruction {
1791 scalar_type &t;
const scalar_type &t1;
1792 virtual int exec() {
1793 GA_DEBUG_INFO(
"Instruction: scalar copy");
1797 ga_instruction_copy_scalar(scalar_type &t_,
const scalar_type &t1_)
1801 struct ga_instruction_copy_vect :
public ga_instruction {
1803 const base_vector &t1;
1804 virtual int exec() {
1805 GA_DEBUG_INFO(
"Instruction: fixed size tensor copy");
1809 ga_instruction_copy_vect(base_vector &t_,
const base_vector &t1_)
1813 struct ga_instruction_trace :
public ga_instruction {
1815 const base_tensor &tc1;
1818 virtual int exec() {
1819 GA_DEBUG_INFO(
"Instruction: Trace");
1820 GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(),
"Wrong sizes");
1822 auto it = t.begin();
1823 auto it1 = tc1.begin();
1824 for (; it != t.end(); ++it, ++it1) {
1827 for (
size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1832 ga_instruction_trace(base_tensor &t_,
const base_tensor &tc1_,
size_type n_)
1833 : t(t_), tc1(tc1_), n(n_) {}
1836 struct ga_instruction_deviator :
public ga_instruction {
1838 const base_tensor &tc1;
1841 virtual int exec() {
1842 GA_DEBUG_INFO(
"Instruction: Deviator");
1843 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1845 gmm::copy(tc1.as_vector(), t.as_vector());
1849 base_tensor::iterator it = t.begin();
1850 base_tensor::const_iterator it1 = tc1.begin();
1851 for (; j < nb; ++it, ++it1, ++j) {
1853 base_tensor::const_iterator it2 = it1;
1855 for (
size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1856 tr /= scalar_type(n);
1858 base_tensor::iterator it3 = it;
1860 for (
size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1865 ga_instruction_deviator(base_tensor &t_,
const base_tensor &tc1_,
1867 : t(t_), tc1(tc1_), n(n_) {}
1870 struct ga_instruction_transpose :
public ga_instruction {
1872 const base_tensor &tc1;
1874 virtual int exec() {
1875 GA_DEBUG_INFO(
"Instruction: transpose");
1876 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1879 auto it = t.begin();
1886 for (
size_type l = 0; l < n0; ++l, ++it)
1891 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1894 ga_instruction_transpose(base_tensor &t_,
const base_tensor &tc1_,
1896 : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1899 struct ga_instruction_swap_indices :
public ga_instruction {
1901 const base_tensor &tc1;
1903 virtual int exec() {
1904 GA_DEBUG_INFO(
"Instruction: swap indices");
1905 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1906 size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1908 auto it = t.begin();
1913 size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1914 for (
size_type m = 0; m < ii1; ++m, ++it)
1917 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1920 ga_instruction_swap_indices(base_tensor &t_,
const base_tensor &tc1_,
1923 : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1926 struct ga_instruction_index_move_last :
public ga_instruction {
1928 const base_tensor &tc1;
1930 virtual int exec() {
1931 GA_DEBUG_INFO(
"Instruction: swap indices");
1932 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1935 auto it = t.begin();
1939 for (
size_type k = 0; k < ii1; ++k, ++it)
1942 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1945 ga_instruction_index_move_last(base_tensor &t_,
const base_tensor &tc1_,
1947 : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1950 struct ga_instruction_transpose_no_test :
public ga_instruction {
1952 const base_tensor &tc1;
1954 virtual int exec() {
1955 GA_DEBUG_INFO(
"Instruction: transpose");
1956 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1958 auto it = t.begin();
1963 for (
size_type k = 0; k < n2; ++k, ++it)
1964 *it = tc1[s2 + k*n1];
1967 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
1970 ga_instruction_transpose_no_test(base_tensor &t_,
const base_tensor &tc1_,
1973 : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1976 struct ga_instruction_transpose_test :
public ga_instruction {
1978 const base_tensor &tc1;
1979 virtual int exec() {
1980 GA_DEBUG_INFO(
"Instruction: copy tensor and transpose test functions");
1981 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
1982 GA_DEBUG_ASSERT(t.sizes().size() >= 2,
"Wrong sizes");
1984 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1986 base_tensor::iterator it = t.begin();
1989 for (
size_type i = 0; i < s1; ++i, ++it)
1990 *it = tc1[j+s2*i+k*s3];
1993 ga_instruction_transpose_test(base_tensor &t_,
const base_tensor &tc1_)
1994 : t(t_), tc1(tc1_) {}
1997 struct ga_instruction_sym :
public ga_instruction {
1999 const base_tensor &tc1;
2000 virtual int exec() {
2001 GA_DEBUG_INFO(
"Instruction: symmetric part");
2002 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2004 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2008 base_tensor::iterator it = t.begin() + s*(i + s1*j);
2009 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2010 it1T = tc1.begin() + s*(j + s2*i);
2011 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
2015 ga_instruction_sym(base_tensor &t_,
const base_tensor &tc1_)
2016 : t(t_), tc1(tc1_) {}
2019 struct ga_instruction_skew :
public ga_instruction {
2021 const base_tensor &tc1;
2022 virtual int exec() {
2023 GA_DEBUG_INFO(
"Instruction: skew-symmetric part");
2024 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2026 size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2030 base_tensor::iterator it = t.begin() + s*(i + s1*j);
2031 base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2032 it1T = tc1.begin() + s*(j + s2*i);
2033 for (
size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
2037 ga_instruction_skew(base_tensor &t_,
const base_tensor &tc1_)
2038 : t(t_), tc1(tc1_) {}
2041 struct ga_instruction_scalar_add :
public ga_instruction {
2043 const scalar_type &c, &d;
2044 virtual int exec() {
2045 GA_DEBUG_INFO(
"Instruction: scalar addition");
2049 ga_instruction_scalar_add(scalar_type &t_,
const scalar_type &c_,
2050 const scalar_type &d_)
2051 : t(t_), c(c_), d(d_) {}
2054 struct ga_instruction_scalar_sub :
public ga_instruction {
2056 const scalar_type &c, &d;
2057 virtual int exec() {
2058 GA_DEBUG_INFO(
"Instruction: scalar subtraction");
2062 ga_instruction_scalar_sub(scalar_type &t_,
const scalar_type &c_,
2063 const scalar_type &d_)
2064 : t(t_), c(c_), d(d_) {}
2067 struct ga_instruction_scalar_scalar_mult :
public ga_instruction {
2069 const scalar_type &c, &d;
2070 virtual int exec() {
2071 GA_DEBUG_INFO(
"Instruction: scalar multiplication");
2075 ga_instruction_scalar_scalar_mult(scalar_type &t_,
const scalar_type &c_,
2076 const scalar_type &d_)
2077 : t(t_), c(c_), d(d_) {}
2080 struct ga_instruction_scalar_scalar_div :
public ga_instruction {
2082 const scalar_type &c, &d;
2083 virtual int exec() {
2084 GA_DEBUG_INFO(
"Instruction: scalar division");
2088 ga_instruction_scalar_scalar_div(scalar_type &t_,
const scalar_type &c_,
2089 const scalar_type &d_)
2090 : t(t_), c(c_), d(d_) {}
2093 struct ga_instruction_scalar_mult :
public ga_instruction {
2094 base_tensor &t, &tc1;
2095 const scalar_type &c;
2096 virtual int exec() {
2097 GA_DEBUG_INFO(
"Instruction: multiplication of a tensor by a scalar " << c);
2098 gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2101 ga_instruction_scalar_mult(base_tensor &t_, base_tensor &tc1_,
2102 const scalar_type &c_)
2103 : t(t_), tc1(tc1_), c(c_) {}
2106 struct ga_instruction_scalar_div :
public ga_instruction {
2107 base_tensor &t, &tc1;
2108 const scalar_type &c;
2109 virtual int exec() {
2110 GA_DEBUG_INFO(
"Instruction: division of a tensor by a scalar");
2111 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
2113 base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2114 for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2117 ga_instruction_scalar_div(base_tensor &t_, base_tensor &tc1_,
2118 const scalar_type &c_)
2119 : t(t_), tc1(tc1_), c(c_) {}
2123 struct ga_instruction_cross_product_tf :
public ga_instruction {
2124 base_tensor &t, &tc1, &tc2;
2126 virtual int exec() {
2127 GA_DEBUG_INFO(
"Instruction: Cross product with test functions");
2129 size_type n1 = tc1.size() / 3, n2 = tc2.size() / 3, nn=n1*n2;
2130 GA_DEBUG_ASSERT(t.size() == nn*3,
"Bad tensor size for cross product");
2131 size_type mm=2*nn, n1_2 = 2*n1, n2_2 = 2*n2;
2132 base_tensor::iterator it = t.begin(), it2 = tc2.begin();
2135 for (
size_type i = 0; i < n2; ++i, ++it2) {
2136 base_tensor::iterator it1 = tc1.begin();
2137 for (
size_type j = 0; j < n1; ++j, ++it, ++it1) {
2138 *it = - it1[n1] *it2[n2_2] + it1[n1_2]*it2[n2];
2139 it[nn] = - it1[n1_2]*it2[0] + it1[0] *it2[n2_2];
2140 it[mm] = - it1[0] *it2[n2] + it1[n1] *it2[0];
2144 for (
size_type i = 0; i < n2; ++i, ++it2) {
2145 base_tensor::iterator it1 = tc1.begin();
2146 for (
size_type j = 0; j < n1; ++j, ++it, ++it1) {
2147 *it = it1[n1] *it2[n2_2] - it1[n1_2]*it2[n2];
2148 it[nn] = it1[n1_2]*it2[0] - it1[0] *it2[n2_2];
2149 it[mm] = it1[0] *it2[n2] - it1[n1] *it2[0];
2155 ga_instruction_cross_product_tf(base_tensor &t_, base_tensor &tc1_,
2156 base_tensor &tc2_,
bool inv_)
2157 : t(t_), tc1(tc1_), tc2(tc2_), inv(inv_) {}
2161 struct ga_instruction_cross_product :
public ga_instruction {
2162 base_tensor &t, &tc1, &tc2;
2163 virtual int exec() {
2164 GA_DEBUG_INFO(
"Instruction: Cross product with test functions");
2165 GA_DEBUG_ASSERT(t.size() == 3 && tc1.size() == 3 && tc2.size() == 3,
2166 "Bad tensor size for cross product");
2167 t[0] = tc1[1]*tc2[2] - tc1[2]*tc2[1];
2168 t[1] = tc1[2]*tc2[0] - tc1[0]*tc2[2];
2169 t[2] = tc1[0]*tc2[1] - tc1[1]*tc2[0];
2172 ga_instruction_cross_product(base_tensor &t_, base_tensor &tc1_,
2174 : t(t_), tc1(tc1_), tc2(tc2_) {}
2180 struct ga_instruction_dotmult :
public ga_instruction {
2181 base_tensor &t, &tc1, &tc2;
2182 virtual int exec() {
2183 GA_DEBUG_INFO(
"Instruction: componentwise multiplication");
2184 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2185 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2187 base_tensor::iterator it = t.begin();
2189 for (
size_type m = 0; m < s1_1; ++m, ++it)
2190 *it = tc1[m+s1_1*i] * tc2[i];
2193 ga_instruction_dotmult(base_tensor &t_, base_tensor &tc1_,
2195 : t(t_), tc1(tc1_), tc2(tc2_) {}
2198 struct ga_instruction_dotdiv :
public ga_instruction {
2199 base_tensor &t, &tc1, &tc2;
2200 virtual int exec() {
2201 GA_DEBUG_INFO(
"Instruction: componentwise division");
2202 size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2203 GA_DEBUG_ASSERT(t.size() == s1_1*s2,
"Wrong sizes");
2205 base_tensor::iterator it = t.begin();
2207 for (
size_type m = 0; m < s1_1; ++m, ++it)
2208 *it = tc1[m+s1_1*i] / tc2[i];
2211 ga_instruction_dotdiv(base_tensor &t_, base_tensor &tc1_,
2213 : t(t_), tc1(tc1_), tc2(tc2_) {}
2217 struct ga_instruction_dotmult_spec :
public ga_instruction {
2218 base_tensor &t, &tc1, &tc2;
2219 virtual int exec() {
2220 GA_DEBUG_INFO(
"Instruction: specific componentwise multiplication");
2221 size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2224 base_tensor::iterator it = t.begin();
2227 for (
size_type m = 0; m < s1_1; ++m, ++it)
2228 *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2229 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2232 ga_instruction_dotmult_spec(base_tensor &t_, base_tensor &tc1_,
2234 : t(t_), tc1(tc1_), tc2(tc2_) {}
2238 struct ga_instruction_contract_1_1 :
public ga_instruction {
2239 base_tensor &t, &tc1;
2241 virtual int exec() {
2242 GA_DEBUG_INFO(
"Instruction: single contraction on a single tensor");
2244 size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2246 base_tensor::iterator it = t.begin();
2249 for (
size_type k = 0; k < ii1; ++k, ++it) {
2250 *it = scalar_type(0);
2251 size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2253 *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2256 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2259 ga_instruction_contract_1_1(base_tensor &t_, base_tensor &tc1_,
2261 : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2265 struct ga_instruction_contract_2_1 :
public ga_instruction {
2266 base_tensor &t, &tc1, &tc2;
2268 virtual int exec() {
2269 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2271 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2272 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2274 base_tensor::iterator it = t.begin();
2280 for (
size_type q = 0; q < ift1; ++q, ++it) {
2281 *it = scalar_type(0);
2282 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2283 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2285 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2288 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2291 ga_instruction_contract_2_1(base_tensor &t_, base_tensor &tc1_,
2295 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2296 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2300 struct ga_instruction_contract_2_1_rev :
public ga_instruction {
2301 base_tensor &t, &tc1, &tc2;
2303 virtual int exec() {
2304 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2306 size_type ift1 = tc1.size() / (nn*ii1*ii2);
2307 size_type ift2 = tc2.size() / (nn*ii3*ii4);
2309 base_tensor::iterator it = t.begin();
2315 for (
size_type p = 0; p < ift2; ++p, ++it) {
2316 *it = scalar_type(0);
2317 size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2318 size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2320 *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2323 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2326 ga_instruction_contract_2_1_rev(base_tensor &t_, base_tensor &tc1_,
2330 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2331 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2335 struct ga_instruction_contract_2_2 :
public ga_instruction {
2336 base_tensor &t, &tc1, &tc2;
2337 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2339 virtual int exec() {
2340 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2342 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2343 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2345 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2346 if (inv_tc2) std::swap(sn1, sn2);
2348 base_tensor::iterator it = t.begin();
2356 for (
size_type s = 0; s < ift1; ++s, ++it) {
2357 *it = scalar_type(0);
2359 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2361 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2364 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2365 * tc2[ind2+n1*sn1+n2*sn2];
2368 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2371 ga_instruction_contract_2_2(base_tensor &t_, base_tensor &tc1_,
2377 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2378 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2383 struct ga_instruction_contract_2_2_rev :
public ga_instruction {
2384 base_tensor &t, &tc1, &tc2;
2385 size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2387 virtual int exec() {
2388 GA_DEBUG_INFO(
"Instruction: single contraction on two tensors");
2390 size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2391 size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2393 size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2394 if (inv_tc2) std::swap(sn1, sn2);
2396 base_tensor::iterator it = t.begin();
2404 for (
size_type r = 0; r < ift2; ++r, ++it) {
2405 *it = scalar_type(0);
2407 = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2409 = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2412 *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2413 * tc2[ind2+n1*sn1+n2*sn2];
2416 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2419 ga_instruction_contract_2_2_rev(base_tensor &t_, base_tensor &tc1_,
2425 : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2426 ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2432 struct ga_instruction_matrix_mult :
public ga_instruction {
2433 base_tensor &t, &tc1, &tc2;
2435 virtual int exec() {
2436 GA_DEBUG_INFO(
"Instruction: order one contraction "
2437 "(dot product or matrix multiplication)");
2442 base_tensor::iterator it = t.begin();
2444 for (
size_type i = 0; i < s1; ++i, ++it) {
2445 *it = scalar_type(0);
2447 *it += tc1[i+j*s1] * tc2[j+k*n];
2449 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2452 ga_instruction_matrix_mult(base_tensor &t_, base_tensor &tc1_,
2454 : t(t_), tc1(tc1_), tc2(tc2_), n(n_) {}
2458 struct ga_instruction_matrix_mult_spec :
public ga_instruction {
2459 base_tensor &t, &tc1, &tc2;
2462 virtual int exec() {
2463 GA_DEBUG_INFO(
"Instruction: specific order one contraction "
2464 "(dot product or matrix multiplication)");
2468 base_tensor::iterator it = t.begin();
2472 for (
size_type i = 0; i < q; ++i, ++it) {
2473 *it = scalar_type(0);
2475 *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2477 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2480 ga_instruction_matrix_mult_spec(base_tensor &t_, base_tensor &tc1_,
2483 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2487 struct ga_instruction_matrix_mult_spec2 :
public ga_instruction {
2488 base_tensor &t, &tc1, &tc2;
2491 virtual int exec() {
2492 GA_DEBUG_INFO(
"Instruction: specific order one contraction "
2493 "(dot product or matrix multiplication)");
2497 base_tensor::iterator it = t.begin();
2501 for (
size_type j = 0; j < l; ++j, ++it) {
2502 *it = scalar_type(0);
2504 *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2506 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
2509 ga_instruction_matrix_mult_spec2(base_tensor &t_, base_tensor &tc1_,
2512 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2516 struct ga_instruction_contraction :
public ga_instruction {
2517 base_tensor &t, &tc1, &tc2;
2519 virtual int exec() {
2520 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn);
2522 long m = int(tc1.size()/nn), k = int(nn), n = int(tc2.size()/nn);
2523 long lda = m, ldb = n, ldc = m;
2524 char T =
'T', N =
'N';
2525 scalar_type
alpha(1), beta(0);
2526 gmm::dgemm_(&N, &T, &m, &n, &k, &alpha, &(tc1[0]), &lda, &(tc2[0]), &ldb,
2527 &beta, &(t[0]), &ldc);
2529 size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2530 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2532 auto it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2533 for (
auto it = t.begin(); it != t.end(); ++it) {
2534 auto it11 = it1, it22 = it2;
2535 scalar_type a = (*it11) * (*it22);
2537 { it11 += s1; it22 += s2; a += (*it11) * (*it22); }
2539 ++it2;
if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2551 ga_instruction_contraction(base_tensor &t_, base_tensor &tc1_,
2553 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2557 struct ga_instruction_contraction_opt0_2 :
public ga_instruction {
2558 base_tensor &t, &tc1, &tc2;
2560 virtual int exec() {
2561 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2562 " optimized for vectorized second tensor of type 2");
2563 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2565 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2567 auto it = t.begin(), it1 = tc1.begin();
2568 for (
size_type i = 0; i < s1; ++i, ++it1) {
2569 auto it2 = tc2.begin();
2573 for (
size_type l = 0; l < q; ++l, ++it) {
2575 auto ittt1 = itt1, ittt2 = it2;
2576 *it = *ittt1 * (*ittt2);
2578 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2589 ga_instruction_contraction_opt0_2(base_tensor &t_, base_tensor &tc1_,
2592 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2597 struct ga_instruction_contraction_opt0_2_unrolled :
public ga_instruction {
2598 base_tensor &t, &tc1, &tc2;
2600 virtual int exec() {
2601 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*q <<
2602 " optimized for vectorized second tensor of type 2");
2603 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2605 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2607 auto it = t.begin(), it1 = tc1.begin();
2608 for (
size_type i = 0; i < s1; ++i, ++it1) {
2609 auto it2 = tc2.begin();
2613 for (
size_type l = 0; l < q; ++l, ++it) {
2615 auto ittt1 = itt1, ittt2 = it2;
2616 *it = *ittt1 * (*ittt2);
2618 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2625 ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_, base_tensor &tc1_,
2627 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2631 template <
int N,
int Q>
2632 struct ga_instruction_contraction_opt0_2_dunrolled :
public ga_instruction {
2633 base_tensor &t, &tc1, &tc2;
2634 virtual int exec() {
2635 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*Q
2636 <<
" optimized for vectorized second tensor of type 2");
2637 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2639 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2641 auto it = t.begin(), it1 = tc1.begin();
2642 for (
size_type i = 0; i < s1; ++i, ++it1) {
2643 auto it2 = tc2.begin();
2647 for (
size_type l = 0; l < Q; ++l, ++it) {
2649 auto ittt1 = itt1, ittt2 = it2;
2650 *it = *ittt1 * (*ittt2);
2652 ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2659 ga_instruction_contraction_opt0_2_dunrolled
2660 (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2661 : t(t_), tc1(tc1_), tc2(tc2_) {}
2665 struct ga_instruction_contraction_opt2_0 :
public ga_instruction {
2666 base_tensor &t, &tc1, &tc2;
2668 virtual int exec() {
2669 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << n*q <<
2670 " optimized for vectorized second tensor of type 2");
2671 size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2672 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2673 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2675 auto it = t.begin();
2677 auto it1 = tc1.begin() + i*q;
2679 auto it2 = tc2.begin() + l*s2;
2680 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2681 auto itt1 = it1, itt2 = it2;
2682 *it = *itt1 * (*itt2);
2684 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2691 ga_instruction_contraction_opt2_0(base_tensor &t_, base_tensor &tc1_,
2694 : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2699 struct ga_instruction_contraction_opt2_0_unrolled :
public ga_instruction {
2700 base_tensor &t, &tc1, &tc2;
2702 virtual int exec() {
2703 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*q
2704 <<
" optimized for vectorized second tensor of type 2");
2705 size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2706 size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2707 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2709 auto it = t.begin(), it1 = tc1.begin();
2710 for (
size_type i = 0; i < s1_q; ++i, it1 += q) {
2712 auto it2 = tc2.begin() + l*s2;
2713 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2714 auto itt1 = it1, itt2 = it2;
2715 *it = *itt1 * (*itt2);
2717 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2724 ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_, base_tensor &tc1_,
2726 : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2730 template <
int N,
int Q>
2731 struct ga_instruction_contraction_opt2_0_dunrolled :
public ga_instruction {
2732 base_tensor &t, &tc1, &tc2;
2733 virtual int exec() {
2734 GA_DEBUG_INFO(
"Instruction: unrolled contraction of size " << N*Q
2735 <<
" optimized for vectorized second tensor of type 2");
2736 size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
2737 size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
2738 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error");
2740 auto it = t.begin(), it1 = tc1.begin();
2741 for (
size_type i = 0; i < s1_q; ++i, it1 += Q) {
2743 auto it2 = tc2.begin() + l*s2;
2744 for (
size_type j = 0; j < s2; ++j, ++it, ++it2) {
2745 auto itt1 = it1, itt2 = it2;
2746 *it = *itt1 * (*itt2);
2748 itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2755 ga_instruction_contraction_opt2_0_dunrolled
2756 (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2757 : t(t_), tc1(tc1_), tc2(tc2_) {}
2761 struct ga_instruction_contraction_opt0_1 :
public ga_instruction {
2762 base_tensor &t, &tc1, &tc2;
2764 virtual int exec() {
2765 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
2766 " optimized for vectorized second tensor of type 1");
2767 size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
2769 auto it = t.begin(), it1 = tc1.begin();
2770 for (
size_type i = 0; i < s1; ++i, ++it1) {
2771 auto it2 = tc2.begin();
2775 *it++ = (*itt1) * (*it2);
2777 { itt1 += s1; *it++ = (*itt1) * (*it2); }
2782 ga_instruction_contraction_opt0_1(base_tensor &t_, base_tensor &tc1_,
2784 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2787 template<
int N>
inline void reduc_elem_unrolled_opt1_
2788 (
const base_vector::iterator &it,
const base_vector::iterator &it1,
2790 it[N-1] = it1[(N-1)*s1] * a;
2791 reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
2793 template<>
inline void reduc_elem_unrolled_opt1_<1>
2794 (
const base_vector::iterator &it,
const base_vector::iterator &it1,
2796 { *it = (*it1) * a; }
2800 struct ga_instruction_contraction_opt0_1_unrolled :
public ga_instruction {
2801 base_tensor &t, &tc1, &tc2;
2802 virtual int exec() {
2803 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N
2804 <<
" optimized for vectorized second tensor of type 1");
2805 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2806 auto it = t.begin(), it1 = tc1.begin();
2807 for (
size_type i = 0; i < s1; ++i, ++it1) {
2808 auto it2 = tc2.begin(), it2e = it2 + s2;
2809 for (; it2 != it2e; it2 += N, it += N)
2810 reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
2814 ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_, base_tensor &tc1_,
2816 : t(t_), tc1(tc1_), tc2(tc2_) {}
2820 struct ga_instruction_contraction_opt1_1 :
public ga_instruction {
2821 base_tensor &t, &tc1, &tc2;
2823 virtual int exec() {
2824 GA_DEBUG_INFO(
"Instruction: contraction operation of size " << nn <<
2825 " optimized for both vectorized tensor of type 1");
2826 size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
2827 GA_DEBUG_ASSERT(t.size() == s2*s1,
"Internal error");
2831 auto it2 = tc2.begin();
2834 auto it1 = tc1.begin(), it = t.begin() + j*nn;
2836 if (i) { it1 += nn, it += s2*nn; }
2837 scalar_type a = (*it1) * (*it2);
2839 *itt = a; itt += s2_1; *itt = a;
2840 for (
size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
2845 ga_instruction_contraction_opt1_1(base_tensor &t_, base_tensor &tc1_,
2847 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2852 template<
int N>
inline scalar_type reduc_elem_unrolled__
2853 (base_tensor::iterator &it1, base_tensor::iterator &it2,
2855 return (it1[(N-1)*s1])*(it2[(N-1)*s2])
2856 + reduc_elem_unrolled__<N-1>(it1, it2, s1, s2);
2858 template<>
inline scalar_type reduc_elem_unrolled__<1>
2859 (base_tensor::iterator &it1, base_tensor::iterator &it2,
2861 {
return (*it1)*(*it2); }
2864 template<
int N>
struct ga_instruction_contraction_unrolled
2865 :
public ga_instruction {
2866 base_tensor &t, &tc1, &tc2;
2867 virtual int exec() {
2868 GA_DEBUG_INFO(
"Instruction: unrolled contraction operation of size " << N);
2869 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2870 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error, " << t.size()
2871 <<
" != " << s1 <<
"*" << s2);
2872 base_tensor::iterator it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2873 for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
2874 *it = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2875 ++it2;
if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2879 ga_instruction_contraction_unrolled(base_tensor &t_, base_tensor &tc1_,
2881 : t(t_), tc1(tc1_), tc2(tc2_) {}
2884 template<
int N,
int S2>
inline void reduc_elem_d_unrolled__
2885 (base_tensor::iterator &it, base_tensor::iterator &it1,
2887 *it++ = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2888 reduc_elem_d_unrolled__<N, S2-1>(it, it1, ++it2, s1, s2);
2893 template<>
inline void reduc_elem_d_unrolled__<1, 0>
2894 (base_tensor::iterator &, base_tensor::iterator &,
2896 template<>
inline void reduc_elem_d_unrolled__<2, 0>
2897 (base_tensor::iterator &, base_tensor::iterator &,
2899 template<>
inline void reduc_elem_d_unrolled__<3, 0>
2900 (base_tensor::iterator &, base_tensor::iterator &,
2902 template<>
inline void reduc_elem_d_unrolled__<4, 0>
2903 (base_tensor::iterator &, base_tensor::iterator &,
2905 template<>
inline void reduc_elem_d_unrolled__<5, 0>
2906 (base_tensor::iterator &, base_tensor::iterator &,
2908 template<>
inline void reduc_elem_d_unrolled__<6, 0>
2909 (base_tensor::iterator &, base_tensor::iterator &,
2911 template<>
inline void reduc_elem_d_unrolled__<7, 0>
2912 (base_tensor::iterator &, base_tensor::iterator &,
2914 template<>
inline void reduc_elem_d_unrolled__<8, 0>
2915 (base_tensor::iterator &, base_tensor::iterator &,
2917 template<>
inline void reduc_elem_d_unrolled__<9, 0>
2918 (base_tensor::iterator &, base_tensor::iterator &,
2920 template<>
inline void reduc_elem_d_unrolled__<10, 0>
2921 (base_tensor::iterator &, base_tensor::iterator &,
2923 template<>
inline void reduc_elem_d_unrolled__<11, 0>
2924 (base_tensor::iterator &, base_tensor::iterator &,
2926 template<>
inline void reduc_elem_d_unrolled__<12, 0>
2927 (base_tensor::iterator &, base_tensor::iterator &,
2929 template<>
inline void reduc_elem_d_unrolled__<13, 0>
2930 (base_tensor::iterator &, base_tensor::iterator &,
2932 template<>
inline void reduc_elem_d_unrolled__<14, 0>
2933 (base_tensor::iterator &, base_tensor::iterator &,
2935 template<>
inline void reduc_elem_d_unrolled__<15, 0>
2936 (base_tensor::iterator &, base_tensor::iterator &,
2938 template<>
inline void reduc_elem_d_unrolled__<16, 0>
2939 (base_tensor::iterator &, base_tensor::iterator &,
2944 template<
int N,
int S2>
struct ga_ins_red_d_unrolled
2945 :
public ga_instruction {
2946 base_tensor &t, &tc1, &tc2;
2947 virtual int exec() {
2948 GA_DEBUG_INFO(
"Instruction: doubly unrolled contraction operation of size "
2950 size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2951 GA_DEBUG_ASSERT(s2 == S2,
"Internal error");
2952 GA_DEBUG_ASSERT(t.size() == s1*s2,
"Internal error, " << t.size()
2953 <<
" != " << s1 <<
"*" << s2);
2954 base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2955 for (
size_type ii = 0; ii < s1; ++ii, ++it1) {
2956 base_tensor::iterator it2 = tc2.begin();
2957 reduc_elem_d_unrolled__<N, S2>(it, it1, it2, s1, s2);
2959 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
2962 ga_ins_red_d_unrolled(base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2963 : t(t_), tc1(tc1_), tc2(tc2_) {}
2967 pga_instruction ga_instruction_contraction_switch
2968 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
2970 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
2972 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
2973 tc1_.qdim() == n && tc2_.qdim() == n) {
2975 t_.set_sparsity(10, tc1_.qdim());
2976 return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
2979 if (tc2_.sparsity() == 1) {
2982 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
2985 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
2988 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
2991 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
2994 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
2997 if (tc2_.sparsity() == 2) {
2999 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3006 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3010 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3014 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3017 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3024 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3028 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3032 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3035 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3042 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3046 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3050 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3053 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3057 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3060 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3063 return std::make_shared<ga_instruction_contraction_opt0_2>
3068 if (tc1_.sparsity() == 2) {
3070 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3077 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3081 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3085 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3088 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3095 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3099 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3103 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3106 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3113 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3117 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3121 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3124 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3127 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3130 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3133 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3136 return std::make_shared<ga_instruction_contraction_opt2_0>
3137 (t,tc1,tc2, n1, q1);
3143 case 2 :
return std::make_shared<ga_instruction_contraction_unrolled< 2>>
3145 case 3 :
return std::make_shared<ga_instruction_contraction_unrolled< 3>>
3147 case 4 :
return std::make_shared<ga_instruction_contraction_unrolled< 4>>
3149 case 5 :
return std::make_shared<ga_instruction_contraction_unrolled< 5>>
3151 case 6 :
return std::make_shared<ga_instruction_contraction_unrolled< 6>>
3153 case 7 :
return std::make_shared<ga_instruction_contraction_unrolled< 7>>
3155 case 8 :
return std::make_shared<ga_instruction_contraction_unrolled< 8>>
3157 case 9 :
return std::make_shared<ga_instruction_contraction_unrolled< 9>>
3159 case 10 :
return std::make_shared<ga_instruction_contraction_unrolled<10>>
3161 case 11 :
return std::make_shared<ga_instruction_contraction_unrolled<11>>
3163 case 12 :
return std::make_shared<ga_instruction_contraction_unrolled<12>>
3165 case 13 :
return std::make_shared<ga_instruction_contraction_unrolled<13>>
3167 case 14 :
return std::make_shared<ga_instruction_contraction_unrolled<14>>
3169 case 15 :
return std::make_shared<ga_instruction_contraction_unrolled<15>>
3171 case 16 :
return std::make_shared<ga_instruction_contraction_unrolled<16>>
3173 default :
return std::make_shared<ga_instruction_contraction>
3178 pga_instruction ga_uniform_instruction_contraction_switch
3179 (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3181 base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3183 if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3184 tc1_.qdim() == n && tc2_.qdim() == n) {
3186 t_.set_sparsity(10, tc1_.qdim());
3187 return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3189 if (tc2_.sparsity() == 1) {
3192 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3195 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3198 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3201 return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3204 return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3207 if (tc2_.sparsity() == 2) {
3209 size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3216 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3220 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3224 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3227 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3234 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3238 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3242 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3245 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3252 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3256 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3260 std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3263 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3267 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3270 return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3273 return std::make_shared<ga_instruction_contraction_opt0_2>
3278 if (tc1_.sparsity() == 2) {
3280 size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3287 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3291 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3295 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3298 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3305 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3309 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3313 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3316 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3323 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3327 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3331 std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3334 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3337 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3340 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3343 return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3346 return std::make_shared<ga_instruction_contraction_opt2_0>
3347 (t,tc1,tc2, n1, q1);
3357 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3358 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3359 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3360 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3364 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3365 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3366 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3367 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3371 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3372 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3373 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3374 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3378 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3379 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3380 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3381 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3385 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3386 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3387 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3388 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3392 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3393 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3394 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3395 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3399 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3400 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3401 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3402 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3406 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3407 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3408 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3409 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3413 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3414 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3415 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3416 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3420 case 2:
return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3421 case 3:
return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3422 case 4:
return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3423 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3425 default:
return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3431 struct ga_instruction_spec_contraction :
public ga_instruction {
3432 base_tensor &t, &tc1, &tc2;
3434 virtual int exec() {
3435 GA_DEBUG_INFO(
"Instruction: specific contraction operation of "
3437 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3439 base_tensor::iterator it = t.begin();
3442 for (
size_type m = 0; m < s1; ++m, ++it) {
3443 *it = scalar_type(0);
3445 *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3447 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3450 ga_instruction_spec_contraction(base_tensor &t_, base_tensor &tc1_,
3452 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3456 struct ga_instruction_spec2_contraction :
public ga_instruction {
3457 base_tensor &t, &tc1, &tc2;
3459 virtual int exec() {
3460 GA_DEBUG_INFO(
"Instruction: second specific contraction operation of "
3462 size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3463 size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3464 base_tensor::iterator it = t.begin();
3468 for (
size_type n = 0; n < s2; ++n, ++it) {
3469 *it = scalar_type(0);
3471 *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3473 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3476 ga_instruction_spec2_contraction(base_tensor &t_, base_tensor &tc1_,
3478 : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3482 struct ga_instruction_simple_tmult :
public ga_instruction {
3483 base_tensor &t, &tc1, &tc2;
3484 virtual int exec() {
3485 GA_DEBUG_INFO(
"Instruction: simple tensor product");
3487 GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(),
"Wrong sizes");
3488 base_tensor::iterator it2=tc2.begin(), it1=tc1.begin(), it1end=it1 + s1;
3489 for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3490 *it = *(it2) * (*it1);
3491 ++it1;
if (it1 == it1end) { it1 = tc1.begin(), ++it2; }
3495 ga_instruction_simple_tmult(base_tensor &t_, base_tensor &tc1_,
3497 : t(t_), tc1(tc1_), tc2(tc2_) {}
3500 template<
int S1>
inline void tmult_elem_unrolled__
3501 (base_tensor::iterator &it, base_tensor::iterator &it1,
3502 base_tensor::iterator &it2) {
3503 *it++ = (*it1++)*(*it2);
3504 tmult_elem_unrolled__<S1-1>(it, it1, it2);
3506 template<>
inline void tmult_elem_unrolled__<0>
3507 (base_tensor::iterator &, base_tensor::iterator &,
3508 base_tensor::iterator &) { }
3511 template<
int S1>
struct ga_instruction_simple_tmult_unrolled
3512 :
public ga_instruction {
3513 base_tensor &t, &tc1, &tc2;
3514 virtual int exec() {
3516 GA_DEBUG_ASSERT(tc1.size() == S1,
3517 "Wrong sizes " << tc1.size() <<
" != " << S1);
3518 GA_DEBUG_INFO(
"Instruction: simple tensor product, unrolled with "
3519 << S1 <<
" operations");
3520 GA_DEBUG_ASSERT(t.size() == S1 * s2,
3521 "Wrong sizes " << t.size() <<
" != " << S1 <<
"*" << s2);
3522 base_tensor::iterator it = t.begin(), it2 = tc2.begin();
3523 for (
size_type ii = 0; ii < s2; ++ii, ++it2) {
3524 base_tensor::iterator it1 = tc1.begin();
3525 tmult_elem_unrolled__<S1>(it, it1, it2);
3527 GA_DEBUG_ASSERT(it == t.end(),
"Internal error");
3530 ga_instruction_simple_tmult_unrolled(base_tensor &t_, base_tensor &tc1_,
3532 : t(t_), tc1(tc1_), tc2(tc2_) {}
3535 pga_instruction ga_uniform_instruction_simple_tmult
3536 (base_tensor &t, base_tensor &tc1, base_tensor &tc2) {
3537 switch(tc1.size()) {
3538 case 2 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3540 case 3 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3542 case 4 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3544 case 5 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3546 case 6 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3548 case 7 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3550 case 8 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3552 case 9 :
return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3554 case 10 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3556 case 11 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3558 case 12 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3560 case 13 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3562 case 14 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3564 case 15 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3566 case 16 :
return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3568 default :
return std::make_shared<ga_instruction_simple_tmult>
3575 struct ga_instruction_spec_tmult :
public ga_instruction {
3576 base_tensor &t, &tc1, &tc2;
3578 virtual int exec() {
3579 GA_DEBUG_INFO(
"Instruction: specific tensor product");
3580 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
3584 base_tensor::iterator it = t.begin();
3588 for (
size_type m = 0; m < s1_1; ++m, ++it)
3589 *it = tc1[m+i*s1_1] * tc2[n+j*s2_1];
3590 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3593 ga_instruction_spec_tmult(base_tensor &t_, base_tensor &tc1_,
3596 : t(t_), tc1(tc1_), tc2(tc2_), s1_2(s1_2_), s2_2(s2_2_) {}
3600 struct ga_instruction_spec2_tmult :
public ga_instruction {
3601 base_tensor &t, &tc1, &tc2;
3602 virtual int exec() {
3603 GA_DEBUG_INFO(
"Instruction: second specific tensor product");
3604 GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(),
"Wrong sizes");
3606 size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
3608 base_tensor::iterator it = t.begin();
3611 for (
size_type m = 0; m < s2_1; ++m, ++it)
3612 *it = tc1[i] * tc2[m+j*s2_1];
3613 GA_DEBUG_ASSERT(it == t.end(),
"Wrong sizes");
3616 ga_instruction_spec2_tmult(base_tensor &t_, base_tensor &tc1_,
3618 : t(t_), tc1(tc1_), tc2(tc2_) {}
3623 struct ga_instruction_simple_c_matrix :
public ga_instruction {
3625 std::vector<scalar_type *> components;
3626 virtual int exec() {
3627 GA_DEBUG_INFO(
"Instruction: gathering components for explicit "
3629 GA_DEBUG_ASSERT(t.size() == components.size(),
"Wrong sizes");
3630 for (
size_type i = 0; i < components.size(); ++i)
3631 t[i] = *(components[i]);
3634 ga_instruction_simple_c_matrix(base_tensor &t_,
3635 std::vector<scalar_type *> &components_)
3636 : t(t_), components(components_) {}
3639 struct ga_instruction_c_matrix_with_tests :
public ga_instruction {
3641 const std::vector<const base_tensor *> components;
3642 virtual int exec() {
3643 GA_DEBUG_INFO(
"Instruction: gathering components for explicit "
3644 "matrix with tests functions");
3645 size_type s = t.size() / components.size();
3646 GA_DEBUG_ASSERT(s,
"Wrong sizes");
3647 base_tensor::iterator it = t.begin();
3648 for (
size_type i = 0; i < components.size(); ++i) {
3649 const base_tensor &t1 = *(components[i]);
3650 if (t1.size() > 1) {
3651 GA_DEBUG_ASSERT(t1.size() == s,
"Wrong sizes, " << t1.size()
3653 for (
size_type j = 0; j < s; ++j) *it++ = t1[j];
3655 for (
size_type j = 0; j < s; ++j) *it++ = t1[0];
3660 ga_instruction_c_matrix_with_tests
3661 (base_tensor &t_,
const std::vector<const base_tensor *> &components_)
3662 : t(t_), components(components_) {}
3665 struct ga_instruction_eval_func_1arg_1res :
public ga_instruction {
3667 const scalar_type &c;
3668 pscalar_func_onearg f1;
3669 virtual int exec() {
3670 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
3671 "predefined function on a scalar");
3675 ga_instruction_eval_func_1arg_1res(scalar_type &t_,
const scalar_type &c_,
3676 pscalar_func_onearg f1_)
3677 : t(t_), c(c_), f1(f1_) {}
3680 struct ga_instruction_eval_func_1arg_1res_expr :
public ga_instruction {
3682 const scalar_type &c;
3683 const ga_predef_function &F;
3684 virtual int exec() {
3685 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
3686 "predefined function on a scalar");
3690 ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
3691 const scalar_type &c_,
3692 const ga_predef_function &F_)
3693 : t(t_), c(c_), F(F_) {}
3696 struct ga_instruction_eval_func_1arg :
public ga_instruction {
3697 base_tensor &t, &tc1;
3698 pscalar_func_onearg f1;
3699 virtual int exec() {
3700 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
3701 "predefined function on tensor");
3702 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3703 for (
size_type i = 0; i < t.size(); ++i) t[i] = (*f1)(tc1[i]);
3706 ga_instruction_eval_func_1arg(base_tensor &t_, base_tensor &c_,
3707 pscalar_func_onearg f1_)
3708 : t(t_), tc1(c_), f1(f1_) {}
3711 struct ga_instruction_eval_func_1arg_expr :
public ga_instruction {
3712 base_tensor &t, &tc1;
3713 const ga_predef_function &F;
3714 virtual int exec() {
3715 GA_DEBUG_INFO(
"Instruction: evaluation of a one argument "
3716 "predefined function on tensor");
3717 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3718 for (
size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i]);
3721 ga_instruction_eval_func_1arg_expr(base_tensor &t_, base_tensor &c_,
3722 const ga_predef_function &F_)
3723 : t(t_), tc1(c_), F(F_) {}
3726 struct ga_instruction_eval_func_2arg_1res :
public ga_instruction {
3728 const scalar_type &c, &d;
3729 pscalar_func_twoargs f2;
3730 virtual int exec() {
3731 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3732 "predefined function on two scalar");
3736 ga_instruction_eval_func_2arg_1res(scalar_type &t_,
const scalar_type &c_,
3737 const scalar_type &d_,
3738 pscalar_func_twoargs f2_)
3739 : t(t_), c(c_), d(d_), f2(f2_) {}
3742 struct ga_instruction_eval_func_2arg_1res_expr :
public ga_instruction {
3744 const scalar_type &c, &d;
3745 const ga_predef_function &F;
3746 virtual int exec() {
3747 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3748 "predefined function on two scalar");
3752 ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
3753 const scalar_type &c_,
3754 const scalar_type &d_,
3755 const ga_predef_function &F_)
3756 : t(t_), c(c_), d(d_), F(F_) {}
3759 struct ga_instruction_eval_func_2arg_first_scalar :
public ga_instruction {
3760 base_tensor &t, &tc1, &tc2;
3761 pscalar_func_twoargs f2;
3762 virtual int exec() {
3763 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3764 "predefined function on one scalar and one tensor");
3765 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
3766 for (
size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[0], tc2[i]);
3769 ga_instruction_eval_func_2arg_first_scalar
3770 (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3771 pscalar_func_twoargs f2_)
3772 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3775 struct ga_instruction_eval_func_2arg_first_scalar_expr
3776 :
public ga_instruction {
3777 base_tensor &t, &tc1, &tc2;
3778 const ga_predef_function &F;
3779 virtual int exec() {
3780 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3781 "predefined function on one scalar and one tensor");
3782 GA_DEBUG_ASSERT(t.size() == tc2.size(),
"Wrong sizes");
3783 for (
size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[0], tc2[i]);
3786 ga_instruction_eval_func_2arg_first_scalar_expr
3787 (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3788 const ga_predef_function &F_)
3789 : t(t_), tc1(c_), tc2(d_), F(F_) {}
3792 struct ga_instruction_eval_func_2arg_second_scalar :
public ga_instruction {
3793 base_tensor &t, &tc1, &tc2;
3794 pscalar_func_twoargs f2;
3795 virtual int exec() {
3796 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3797 "predefined function on one tensor and one scalar");
3798 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3799 for (
size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[0]);
3802 ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
3805 pscalar_func_twoargs f2_)
3806 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3809 struct ga_instruction_eval_func_2arg_second_scalar_expr
3810 :
public ga_instruction {
3811 base_tensor &t, &tc1, &tc2;
3812 const ga_predef_function &F;
3813 virtual int exec() {
3814 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3815 "predefined function on one tensor and one scalar");
3816 GA_DEBUG_ASSERT(t.size() == tc1.size(),
"Wrong sizes");
3817 for (
size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[0]);
3820 ga_instruction_eval_func_2arg_second_scalar_expr
3821 (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3822 const ga_predef_function &F_)
3823 : t(t_), tc1(c_), tc2(d_), F(F_) {}
3826 struct ga_instruction_eval_func_2arg :
public ga_instruction {
3827 base_tensor &t, &tc1, &tc2;
3828 pscalar_func_twoargs f2;
3829 virtual int exec() {
3830 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3831 "predefined function on two tensors");
3832 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3835 for (
size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[i]);
3838 ga_instruction_eval_func_2arg(base_tensor &t_, base_tensor &c_,
3839 base_tensor &d_, pscalar_func_twoargs f2_)
3840 : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3843 struct ga_instruction_eval_func_2arg_expr :
public ga_instruction {
3844 base_tensor &t, &tc1, &tc2;
3845 const ga_predef_function &F;
3846 virtual int exec() {
3847 GA_DEBUG_INFO(
"Instruction: evaluation of a two arguments "
3848 "predefined function on two tensors");
3849 GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3852 for (
size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[i]);
3855 ga_instruction_eval_func_2arg_expr(base_tensor &t_, base_tensor &c_,
3857 const ga_predef_function &F_)
3858 : t(t_), tc1(c_), tc2(d_), F(F_) {}
3861 struct ga_instruction_eval_OP :
public ga_instruction {
3863 const ga_nonlinear_operator &OP;
3864 ga_nonlinear_operator::arg_list args;
3865 virtual int exec() {
3866 GA_DEBUG_INFO(
"Instruction: operator evaluation");
3870 ga_instruction_eval_OP(base_tensor &t_,
const ga_nonlinear_operator &OP_,
3871 ga_nonlinear_operator::arg_list &args_)
3872 : t(t_), OP(OP_), args(args_) {}
3875 struct ga_instruction_eval_derivative_OP :
public ga_instruction {
3877 const ga_nonlinear_operator &OP;
3878 ga_nonlinear_operator::arg_list args;
3880 virtual int exec() {
3881 GA_DEBUG_INFO(
"Instruction: operator derivative evaluation");
3882 OP.derivative(args, der1, t);
3885 ga_instruction_eval_derivative_OP(base_tensor &t_,
3886 const ga_nonlinear_operator &OP_,
3887 ga_nonlinear_operator::arg_list &args_,
3889 : t(t_), OP(OP_), args(args_), der1(der1_) {}
3892 struct ga_instruction_eval_second_derivative_OP :
public ga_instruction {
3894 const ga_nonlinear_operator &OP;
3895 ga_nonlinear_operator::arg_list args;
3897 virtual int exec() {
3898 GA_DEBUG_INFO(
"Instruction: operator second derivative evaluation");
3899 OP.second_derivative(args, der1, der2, t);
3902 ga_instruction_eval_second_derivative_OP
3903 (base_tensor &t_,
const ga_nonlinear_operator &OP_,
3905 : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
3908 struct ga_instruction_tensor_slice :
public ga_instruction {
3909 base_tensor &t, &tc1;
3910 bgeot::multi_index mi, indices;
3911 virtual int exec() {
3912 GA_DEBUG_INFO(
"Instruction: tensor slice");
3914 for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
3915 mi3.incrementation(t.sizes())) {
3917 mi[indices[j]] = mi3[j];
3922 ga_instruction_tensor_slice(base_tensor &t_, base_tensor &tc1_,
3923 bgeot::multi_index &mi_,
3924 bgeot::multi_index &indices_)
3925 : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
3928 struct ga_instruction_transformation_call :
public ga_instruction {
3929 const ga_workspace &workspace;
3930 ga_instruction_set::interpolate_info &inin;
3931 pinterpolate_transformation trans;
3932 fem_interpolation_context &ctx;
3933 const base_small_vector &Normal;
3937 virtual int exec() {
3938 GA_DEBUG_INFO(
"Instruction: call interpolate transformation");
3942 inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
3943 face_num, P_ref, inin.Normal,
3944 inin.derivatives, compute_der);
3947 inin.m->points_of_convex(cv, inin.G);
3948 inin.ctx.change((inin.m)->trans_of_convex(cv),
3949 0, P_ref, inin.G, cv, face_num);
3950 inin.has_ctx =
true;
3953 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3955 inin.Normal.resize(0);
3956 inin.pt_y = inin.ctx.xreal();
3958 inin.ctx.invalid_convex_num();
3959 inin.Normal.resize(0);
3961 inin.has_ctx =
false;
3964 inin.ctx.invalid_convex_num();
3965 inin.Normal.resize(0);
3966 inin.pt_y.resize(0);
3967 inin.has_ctx =
false;
3969 GA_DEBUG_INFO(
"Instruction: end of call interpolate transformation");
3972 ga_instruction_transformation_call
3973 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
3974 pinterpolate_transformation t, fem_interpolation_context &ctxx,
3975 const base_small_vector &No,
const mesh &mm,
bool compute_der_)
3976 : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
3977 compute_der(compute_der_) {}
3980 struct ga_instruction_neighbor_transformation_call :
public ga_instruction {
3981 const ga_workspace &workspace;
3982 ga_instruction_set::interpolate_info &inin;
3983 pinterpolate_transformation trans;
3984 fem_interpolation_context &ctx;
3985 base_small_vector dummy_normal;
3988 papprox_integration &pai;
3990 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp;
3992 virtual int exec() {
3993 bool cancel_optimization =
false;
3994 GA_DEBUG_INFO(
"Instruction: call interpolate neighbor transformation");
3996 if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
3997 || cancel_optimization) {
3998 inin.ctx.invalid_convex_num();
4003 auto adj_face = m.adjacent_face(cv, f);
4005 GMM_WARNING2(
"Adjacent face not found, "
4006 "probably an non-interior face");
4007 inin.ctx.invalid_convex_num();
4009 gauss_pt_corresp gpc;
4010 gpc.pgt1 = m.trans_of_convex(cv);
4011 gpc.pgt2 = m.trans_of_convex(adj_face.cv);
4013 auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
4014 auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
4016 auto str1 = gpc.pgt1->structure();
4017 auto str2 = gpc.pgt2->structure();
4018 size_type nbptf1 = str1->nb_points_of_face(f);
4019 size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
4020 gpc.nodes.resize(nbptf1*2);
4021 for (
size_type i = 0; i < nbptf1; ++i) {
4022 gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
4024 for (
size_type j = 0; j < nbptf2; ++j) {
4025 if (inds_pt2[j] == inds_pt1[i]) {
4026 gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
4031 GMM_ASSERT1(found,
"Internal error");
4033 bgeot::pstored_point_tab pspt = 0;
4034 auto itm = neighbor_corresp.find(gpc);
4035 if (itm != neighbor_corresp.end()) {
4038 size_type nbpt = pai->nb_points_on_face(f);
4040 gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
4041 size_type first_ind = pai->ind_first_point_on_face(f);
4043 &spt = *(pai->pintegration_points());
4045 m.points_of_convex(cv, G);
4046 fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
4047 std::vector<base_node> P_ref(nbpt);
4050 ctx_x.set_xref(spt[first_ind+i]);
4051 bool converged =
true;
4052 gic.
invert(ctx_x.xreal(), P_ref[i], converged);
4053 bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
4054 GMM_ASSERT1(is_in && converged,
"Geometric transformation "
4055 "inversion has failed in neighbor transformation");
4057 pspt = store_point_tab(P_ref);
4058 neighbor_corresp[gpc] = pspt;
4060 m.points_of_convex(adj_face.cv, inin.G);
4061 bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
4062 inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
4067 if (inin.ctx.have_pgp() && inin.ctx.is_convex_num_valid()) {
4068 inin.ctx.set_ii(ipt);
4070 inin.has_ctx =
true;
4071 inin.pt_y = inin.ctx.xreal();
4073 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4080 inin.pt_type = trans->transform(workspace, m, ctx, dummy_normal,
4081 &(inin.m), cv, face_num, P_ref,
4082 dummy_normal, inin.derivatives,
4086 inin.m->points_of_convex(cv, inin.G);
4087 inin.ctx.change((inin.m)->trans_of_convex(cv),
4088 0, P_ref, inin.G, cv, face_num);
4089 inin.has_ctx =
true;
4092 gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4094 inin.Normal.resize(0);
4095 inin.pt_y = inin.ctx.xreal();
4097 inin.ctx.invalid_convex_num();
4099 inin.has_ctx =
false;
4102 inin.ctx.invalid_convex_num();
4103 inin.Normal.resize(0);
4104 inin.pt_y.resize(0);
4105 inin.has_ctx =
false;
4108 GA_DEBUG_INFO(
"Instruction: end of call neighbor interpolate "
4112 ga_instruction_neighbor_transformation_call
4113 (
const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4114 pinterpolate_transformation t, fem_interpolation_context &ctxx,
4115 const mesh &mm,
size_type &ipt_, papprox_integration &pai_,
4117 std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp_)
4118 : workspace(w), inin(i), trans(t), ctx(ctxx), m(mm),
4119 ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
4120 neighbor_corresp(neighbor_corresp_) {}
4124 struct ga_instruction_scalar_assembly :
public ga_instruction {
4125 const base_tensor &t;
4126 scalar_type &E, &coeff;
4127 virtual int exec() {
4128 GA_DEBUG_INFO(
"Instruction: scalar term assembly");
4132 ga_instruction_scalar_assembly(base_tensor &t_, scalar_type &E_,
4133 scalar_type &coeff_)
4134 : t(t_), E(E_), coeff(coeff_) {}
4137 struct ga_instruction_vector_assembly_mf :
public ga_instruction
4139 const base_tensor &t;
4140 base_vector &VI, &Vi;
4141 const fem_interpolation_context &ctx;
4142 const gmm::sub_interval *
const&I, *
const I__;
4143 const mesh_fem *
const&mf, *
const mf__;
4144 const bool &reduced_mf;
4145 const scalar_type &coeff;
4148 const bool interpolate;
4149 virtual int exec() {
4150 GA_DEBUG_INFO(
"Instruction: vector term assembly for fem variable");
4151 bool empty_weight = (coeff == scalar_type(0));
4152 if (ipt == 0 || interpolate) {
4153 if (empty_weight) elem.resize(0);
4154 elem.resize(t.size());
4156 copy_scaled_4(t, coeff, elem);
4157 }
else if (!empty_weight)
4159 add_scaled_4(t, coeff, elem);
4161 if (ipt == nbpt-1 || interpolate) {
4162 GA_DEBUG_ASSERT(mf,
"Internal error");
4163 if (!ctx.is_convex_num_valid())
return 0;
4166 if (qmult > 1) qmult /= mf->fem_of_element(cv_1)->target_dim();
4167 base_vector &V = reduced_mf ? Vi : VI;
4168 GA_DEBUG_ASSERT(V.size() >= I->first() + mf->nb_basic_dof(),
4169 "Bad assembly vector size " << V.size() <<
">=" <<
4170 I->first() <<
"+"<< mf->nb_basic_dof());
4171 auto itr = elem.cbegin();
4172 auto itw = V.begin() + I->first();
4173 for (
const auto &dof : mf->ind_scalar_basic_dof_of_element(cv_1))
4175 *(itw+dof+q) += *itr++;
4176 GMM_ASSERT1(itr == elem.end(),
"Internal error");
4181 ga_instruction_vector_assembly_mf
4182 (
const base_tensor &t_, base_vector &VI_, base_vector &Vi_,
4183 const fem_interpolation_context &ctx_,
4184 const gmm::sub_interval *&I_,
const mesh_fem *&mf_,
4185 const bool &reduced_mf_,
4188 : t(t_), VI(VI_), Vi(Vi_), ctx(ctx_),
4189 I(I_), I__(nullptr), mf(mf_), mf__(nullptr), reduced_mf(reduced_mf_),
4190 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4192 ga_instruction_vector_assembly_mf
4193 (
const base_tensor &t_, base_vector &V_,
4194 const fem_interpolation_context &ctx_,
4195 const gmm::sub_interval &I_,
const mesh_fem &mf_,
4198 : t(t_), VI(V_), Vi(V_), ctx(ctx_),
4199 I(I__), I__(&I_), mf(mf__), mf__(&mf_), reduced_mf(false_),
4200 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4202 const bool false_=
false;
4205 struct ga_instruction_vector_assembly_imd :
public ga_instruction {
4206 const base_tensor &t;
4208 const fem_interpolation_context &ctx;
4209 const gmm::sub_interval &I;
4213 const bool initialize;
4214 virtual int exec() {
4215 GA_DEBUG_INFO(
"Instruction: vector term assembly for im_data variable");
4217 size_type i = t.size() * imd.filtered_index_of_point(cv, ipt);
4218 GMM_ASSERT1(i+t.size() <= I.size(),
4219 "Internal error "<<i<<
"+"<<t.size()<<
" <= "<<I.size());
4220 auto itw = V.begin() + I.first() + i;
4222 for (
const auto &val : t.as_vector())
4225 for (
const auto &val : t.as_vector())
4226 *itw++ += coeff*val;
4229 ga_instruction_vector_assembly_imd
4230 (
const base_tensor &t_, base_vector &V_,
4231 const fem_interpolation_context &ctx_,
const gmm::sub_interval &I_,
4232 const im_data &imd_, scalar_type &coeff_,
const size_type &ipt_,
4233 bool initialize_=
false)
4234 : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), coeff(coeff_), ipt(ipt_),
4235 initialize(initialize_)
4239 struct ga_instruction_vector_assembly :
public ga_instruction {
4240 const base_tensor &t;
4242 const gmm::sub_interval &I;
4244 virtual int exec() {
4245 GA_DEBUG_INFO(
"Instruction: vector term assembly for "
4246 "fixed size variable");
4247 gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4250 ga_instruction_vector_assembly(
const base_tensor &t_, base_vector &V_,
4251 const gmm::sub_interval &I_,
4252 scalar_type &coeff_)
4253 : t(t_), V(V_), I(I_), coeff(coeff_) {}
4256 struct ga_instruction_assignment :
public ga_instruction {
4257 const base_tensor &t;
4259 const fem_interpolation_context &ctx;
4261 virtual int exec() {
4262 GA_DEBUG_INFO(
"Instruction: Assignement to im_data");
4263 imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4266 ga_instruction_assignment(
const base_tensor &t_, base_vector &V_,
4267 const fem_interpolation_context &ctx_,
4268 const im_data *imd_)
4269 : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4272 struct ga_instruction_extract_residual_on_imd_dofs :
public ga_instruction {
4274 const base_vector &V;
4275 const fem_interpolation_context &ctx;
4276 const gmm::sub_interval &I;
4279 virtual int exec() {
4280 GA_DEBUG_INFO(
"Instruction: extract residual for im_data variable");
4283 size_type i = t.size() * imd.filtered_index_of_point(cv, ipt);
4284 GMM_ASSERT1(i+t.size() <= I.size(),
4285 "Internal error "<<i<<
"+"<<t.size()<<
" <= "<<I.size());
4286 for (
auto &&val : t.as_vector())
4287 val = V[ifirst+(i++)];
4290 ga_instruction_extract_residual_on_imd_dofs
4291 (base_tensor &t_,
const base_vector &V_,
4292 const fem_interpolation_context &ctx_,
const gmm::sub_interval &I_,
4293 const im_data &imd_,
const size_type &ipt_)
4294 : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), ipt(ipt_)
4299 template <
class MAT>
4300 inline void add_elem_matrix
4301 (MAT &K,
const std::vector<size_type> &dofs1,
4302 const std::vector<size_type> &dofs2, std::vector<size_type> &,
4303 const base_vector &elem, scalar_type threshold,
size_type ) {
4305 base_vector::const_iterator it = elem.cbegin();
4308 if (gmm::abs(*it) > threshold)
4309 K(dof1, dof2) += *it;
4321 inline void add_elem_matrix
4323 const std::vector<size_type> &dofs1,
const std::vector<size_type> &dofs2,
4324 std::vector<size_type> &dofs1_sort,
4325 const base_vector &elem, scalar_type threshold,
size_type N) {
4329 dofs1_sort.resize(s1);
4332 while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4333 { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4342 gmm::elt_rsvector_<scalar_type> ev;
4345 base_vector::const_iterator it = elem.cbegin();
4348 if (first) first =
false;
4350 std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4354 col.reserve(maxest);
4357 if (gmm::abs(ev.e) > threshold) {
4366 if (gmm::abs(ev.e) > threshold) {
4373 if (col[l].c < ev.c) {
4381 auto itc = col.begin() + ind;
4382 if (ind != nb && itc->c == ev.c)
4385 if (nb - ind > 1300)
4386 GMM_WARNING2(
"Inefficient addition of element in rsvector with "
4387 << col.size() - ind <<
" non-zero entries");
4390 itc = col.begin() + ind;
4391 auto ite = col.end();
4394 for (; ite != itc; --ite) { --itee; *ite = *itee; }
4407 inline void add_elem_matrix_contiguous_rows
4410 const std::vector<size_type> &dofs2,
4411 const base_vector &elem, scalar_type threshold) {
4413 gmm::elt_rsvector_<scalar_type> ev;
4415 base_vector::const_iterator it = elem.cbegin();
4418 if (first) first =
false;
4420 std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4427 if (gmm::abs(ev.e) > threshold) {
4436 if (gmm::abs(ev.e) > threshold) {
4443 if (col[l].c < ev.c) {
4451 auto itc = col.begin() + ind;
4452 if (ind != nb && itc->c == ev.c)
4455 if (nb - ind > 1300)
4456 GMM_WARNING2(
"Inefficient addition of element in rsvector with "
4457 << col.size() - ind <<
" non-zero entries");
4460 itc = col.begin() + ind;
4461 auto ite = col.end();
4464 for (; ite != itc; --ite) { --itee; *ite = *itee; }
4476 inline void populate_dofs_vector
4477 (std::vector<size_type> &dofs,
4479 const getfem::mesh::ind_set &mfdofs)
4481 dofs.assign(size, ifirst);
4482 auto itd = dofs.begin();
4484 for (
const auto &dof : mfdofs) *itd++ += dof;
4486 for (
const auto &dof : mfdofs)
4487 for (
size_type q = 0; q < qmult; ++q) *itd++ += dof + q;
4490 inline void populate_dofs_vector
4492 const getfem::mesh::ind_set &mfdofs)
4494 dofs.assign(size, ifirst);
4495 auto itd = dofs.begin();
4496 for (
const auto &dof : mfdofs) *itd++ += dof;
4500 inline void populate_contiguous_dofs_vector
4503 dofs.assign(size, ifirst);
4504 for (
size_type i=0; i < size; ++i) dofs[i] += i;
4507 struct ga_instruction_matrix_assembly_base :
public ga_instruction {
4508 const base_tensor &t;
4509 const fem_interpolation_context &ctx1, &ctx2;
4510 const scalar_type &alpha1, &alpha2, &coeff;
4514 std::vector<size_type> dofs1, dofs2, dofs1_sort;
4515 void add_tensor_to_element_matrix(
bool initialize,
bool empty_weight) {
4517 if (empty_weight) elem.resize(0);
4518 elem.resize(t.size());
4520 copy_scaled_4(t, coeff*alpha1*alpha2, elem);
4521 }
else if (!empty_weight)
4524 add_scaled_4(t, coeff*alpha1*alpha2, elem);
4526 ga_instruction_matrix_assembly_base
4527 (
const base_tensor &t_,
4528 const fem_interpolation_context &ctx1_,
4529 const fem_interpolation_context &ctx2_,
4530 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
4532 : t(t_), ctx1(ctx1_), ctx2(ctx2_), alpha1(a1), alpha2(a2),
4533 coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
4534 dofs1(0), dofs2(0), dofs1_sort(0)
4537 const bool false_=
false;
4542 struct ga_instruction_matrix_assembly_mf_mf
4543 :
public ga_instruction_matrix_assembly_base
4545 model_real_sparse_matrix &Krr, &Kru, &Kur, &Kuu;
4546 const gmm::sub_interval *
const&I1, *
const&I2, *
const I1__, *
const I2__;
4547 const mesh_fem *
const&mf1, *
const&mf2, *
const mf1__, *
const mf2__;
4548 const bool &reduced_mf1, &reduced_mf2;
4549 virtual int exec() {
4550 GA_DEBUG_INFO(
"Instruction: matrix term assembly mf-mf");
4551 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
4553 bool initialize = (ipt == 0 || interpolate);
4554 bool empty_weight = (coeff == scalar_type(0));
4555 add_tensor_to_element_matrix(initialize, empty_weight);
4557 if (ipt == nbpt-1 || interpolate) {
4558 model_real_sparse_matrix &K = reduced_mf1 ? (reduced_mf2 ? Kuu : Kur)
4559 : (reduced_mf2 ? Kru : Krr);
4560 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
4563 if (ninf == scalar_type(0))
return 0;
4565 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4566 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4567 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4571 if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
4572 populate_dofs_vector(dofs1, s1, ifirst1, qmult1,
4573 mf1->ind_scalar_basic_dof_of_element(cv1));
4574 if (mf1 == mf2 && cv1 == cv2) {
4575 if (ifirst1 == ifirst2) {
4576 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4578 populate_dofs_vector(dofs2, dofs1.size(), ifirst2 - ifirst1, dofs1);
4579 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4582 N = std::max(N, ctx2.N());
4584 if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
4585 populate_dofs_vector(dofs2, s2, ifirst2, qmult2,
4586 mf2->ind_scalar_basic_dof_of_element(cv2));
4587 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4593 ga_instruction_matrix_assembly_mf_mf
4594 (
const base_tensor &t_,
4595 model_real_sparse_matrix &Krr_, model_real_sparse_matrix &Kru_,
4596 model_real_sparse_matrix &Kur_, model_real_sparse_matrix &Kuu_,
4597 const fem_interpolation_context &ctx1_,
4598 const fem_interpolation_context &ctx2_,
4599 const ga_instruction_set::variable_group_info &vgi1,
4600 const ga_instruction_set::variable_group_info &vgi2,
4603 : ga_instruction_matrix_assembly_base
4604 (t_, ctx1_, ctx2_, vgi1.
alpha, vgi2.
alpha, coeff_, nbpt_, ipt_,
4606 Krr(Krr_), Kru(Kru_), Kur(Kur_), Kuu(Kuu_),
4607 I1(vgi1.I), I2(vgi2.I), I1__(nullptr), I2__(nullptr),
4608 mf1(vgi1.mf), mf2(vgi2.mf), mf1__(nullptr), mf2__(nullptr),
4609 reduced_mf1(vgi1.reduced_mf), reduced_mf2(vgi2.reduced_mf) {}
4611 ga_instruction_matrix_assembly_mf_mf
4612 (
const base_tensor &t_,
4613 model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
4614 const fem_interpolation_context &ctx1_,
4615 const fem_interpolation_context &ctx2_,
4616 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
4617 const ga_instruction_set::variable_group_info &vgi2,
4620 : ga_instruction_matrix_assembly_base
4621 (t_, ctx1_, ctx2_, a1, vgi2.
alpha, coeff_, nbpt_, ipt_, interpolate_),
4622 Krr(Kxr_), Kru(Kxu_), Kur(Kxr_), Kuu(Kxu_),
4623 I1(I1__), I2(vgi2.I), I1__(&I1_), I2__(nullptr),
4624 mf1(mf1__), mf2(vgi2.mf), mf1__(&mf1_), mf2__(nullptr),
4625 reduced_mf1(false_), reduced_mf2(vgi2.reduced_mf) {}
4627 ga_instruction_matrix_assembly_mf_mf
4628 (
const base_tensor &t_,
4629 model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
4630 const fem_interpolation_context &ctx1_,
4631 const fem_interpolation_context &ctx2_,
4632 const ga_instruction_set::variable_group_info &vgi1,
4633 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
4636 : ga_instruction_matrix_assembly_base
4637 (t_, ctx1_, ctx2_, vgi1.
alpha, a2, coeff_, nbpt_, ipt_, interpolate_),
4638 Krr(Krx_), Kru(Krx_), Kur(Kux_), Kuu(Kux_),
4639 I1(vgi1.I), I2(I2__), I1__(nullptr), I2__(&I2_),
4640 mf1(vgi1.mf), mf2(mf2__), mf1__(nullptr), mf2__(&mf2_),
4641 reduced_mf1(vgi1.reduced_mf), reduced_mf2(false_) {}
4643 ga_instruction_matrix_assembly_mf_mf
4644 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4645 const fem_interpolation_context &ctx1_,
4646 const fem_interpolation_context &ctx2_,
4647 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
4648 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
4651 : ga_instruction_matrix_assembly_base
4652 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, interpolate_),
4653 Krr(K_), Kru(K_), Kur(K_), Kuu(K_),
4654 I1(I1__), I2(I2__), I1__(&I1_), I2__(&I2_),
4655 mf1(mf1__), mf2(mf2__), mf1__(&mf1_), mf2__(&mf2_),
4656 reduced_mf1(false_), reduced_mf2(false_) {}
4660 struct ga_instruction_matrix_assembly_imd_mf
4661 :
public ga_instruction_matrix_assembly_base
4663 model_real_sparse_matrix &Kxr, &Kxu;
4664 const gmm::sub_interval *I1, *I2__, *
const &I2;
4665 const im_data *imd1;
4666 const mesh_fem *
const mf2__, *
const &mf2;
4667 const bool &reduced_mf2;
4668 virtual int exec() {
4669 GA_DEBUG_INFO(
"Instruction: matrix term assembly "
4670 "(imdata or fixed size)-mf");
4671 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
4673 bool empty_weight = (coeff == scalar_type(0));
4674 add_tensor_to_element_matrix(
true, empty_weight);
4677 if (ninf == scalar_type(0))
return 0;
4679 model_real_sparse_matrix &K = reduced_mf2 ? Kxu : Kxr;
4680 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
4681 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4682 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4683 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4684 if (imd1) ifirst1 += s1 * imd1->filtered_index_of_point(cv1, ipt);
4686 populate_contiguous_dofs_vector(dofs1, s1, ifirst1);
4688 if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
4689 populate_dofs_vector(dofs2, s2, ifirst2, qmult2,
4690 mf2->ind_scalar_basic_dof_of_element(cv2));
4691 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx2.N());
4695 ga_instruction_matrix_assembly_imd_mf
4696 (
const base_tensor &t_,
4697 model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
4698 const fem_interpolation_context &ctx1_,
4699 const fem_interpolation_context &ctx2_,
4700 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
4701 const ga_instruction_set::variable_group_info &vgi2,
4702 const scalar_type &coeff_,
const size_type &ipt_)
4703 : ga_instruction_matrix_assembly_base
4704 (t_, ctx1_, ctx2_, a1, vgi2.
alpha, coeff_, zero_, ipt_, false),
4705 Kxr(Kxr_), Kxu(Kxu_), I1(&I1_), I2__(nullptr), I2(vgi2.I),
4706 imd1(imd1_), mf2__(nullptr), mf2(vgi2.mf), reduced_mf2(vgi2.reduced_mf)
4709 ga_instruction_matrix_assembly_imd_mf
4710 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4711 const fem_interpolation_context &ctx1_,
4712 const fem_interpolation_context &ctx2_,
4713 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
4714 const gmm::sub_interval &I2_,
const mesh_fem &mf2_,
const scalar_type &a2,
4715 const scalar_type &coeff_,
const size_type &ipt_)
4716 : ga_instruction_matrix_assembly_base
4717 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4718 Kxr(K_), Kxu(K_), I1(&I1_), I2__(&I2_), I2(I2__),
4719 imd1(imd1_), mf2__(&mf2_), mf2(mf2__), reduced_mf2(false_) {}
4722 struct ga_instruction_matrix_assembly_mf_imd
4723 :
public ga_instruction_matrix_assembly_base
4725 model_real_sparse_matrix &Krx, &Kux;
4726 const gmm::sub_interval *
const &I1, *
const I1__, *I2;
4727 const mesh_fem *
const &mf1, *
const mf1__;
4728 const bool &reduced_mf1;
4729 const im_data *imd2;
4730 virtual int exec() {
4731 GA_DEBUG_INFO(
"Instruction: matrix term assembly "
4732 "mf-(imdata or fixed size)");
4733 if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid())
return 0;
4735 bool empty_weight = (coeff == scalar_type(0));
4736 add_tensor_to_element_matrix(
true, empty_weight);
4739 if (ninf == scalar_type(0))
return 0;
4741 model_real_sparse_matrix &K = reduced_mf1 ? Kux : Krx;
4742 GA_DEBUG_ASSERT(I1->size() && I2->size(),
"Internal error");
4743 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4744 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4745 size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4746 if (imd2) ifirst2 += s2 * imd2->filtered_index_of_point(cv2, ipt);
4749 if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
4750 populate_dofs_vector(dofs1, s1, ifirst1, qmult1,
4751 mf1->ind_scalar_basic_dof_of_element(cv1));
4752 populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
4753 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx1.N());
4757 ga_instruction_matrix_assembly_mf_imd
4758 (
const base_tensor &t_,
4759 model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
4760 const fem_interpolation_context &ctx1_,
4761 const fem_interpolation_context &ctx2_,
4762 const ga_instruction_set::variable_group_info &vgi1,
4763 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
4764 const scalar_type &coeff_,
const size_type &ipt_)
4765 : ga_instruction_matrix_assembly_base
4766 (t_, ctx1_, ctx2_, vgi1.
alpha, a2, coeff_, zero_, ipt_, false),
4767 Krx(Krx_), Kux(Kux_), I1(vgi1.I), I1__(nullptr), I2(&I2_),
4768 mf1(vgi1.mf), mf1__(nullptr), reduced_mf1(vgi1.reduced_mf), imd2(imd2_)
4771 ga_instruction_matrix_assembly_mf_imd
4772 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4773 const fem_interpolation_context &ctx1_,
4774 const fem_interpolation_context &ctx2_,
4775 const gmm::sub_interval &I1_,
const mesh_fem &mf1_,
const scalar_type &a1,
4776 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
4777 const scalar_type &coeff_,
const size_type &ipt_)
4778 : ga_instruction_matrix_assembly_base
4779 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4780 Krx(K_), Kux(K_), I1(I1__), I1__(&I1_), I2(&I2_),
4781 mf1(mf1__), mf1__(&mf1_), reduced_mf1(false_), imd2(imd2_) {}
4786 struct ga_instruction_matrix_assembly_imd_imd
4787 :
public ga_instruction_matrix_assembly_base
4789 model_real_sparse_matrix &K;
4790 const gmm::sub_interval &I1, &I2;
4791 const im_data *imd1, *imd2;
4792 virtual int exec() {
4793 GA_DEBUG_INFO(
"Instruction: matrix term assembly "
4794 "(imdata or fixed size)-(imdata or fixed size)");
4795 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4797 bool empty_weight = (coeff == scalar_type(0));
4798 add_tensor_to_element_matrix(
true, empty_weight);
4801 if (ninf == scalar_type(0))
return 0;
4803 size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4804 size_type ifirst1 = I1.first(), ifirst2 = I2.first();
4806 ifirst1 += s1 * imd1->filtered_index_of_point(ctx1.convex_num(), ipt);
4808 ifirst2 += s2 * imd2->filtered_index_of_point(ctx2.convex_num(), ipt);
4810 populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
4811 add_elem_matrix_contiguous_rows(K, ifirst1, s1, dofs2, elem, ninf*1E-14);
4814 ga_instruction_matrix_assembly_imd_imd
4815 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4816 const fem_interpolation_context &ctx1_,
4817 const fem_interpolation_context &ctx2_,
4818 const gmm::sub_interval &I1_,
const im_data *imd1_,
const scalar_type &a1,
4819 const gmm::sub_interval &I2_,
const im_data *imd2_,
const scalar_type &a2,
4820 const scalar_type &coeff_,
const size_type &ipt_)
4821 : ga_instruction_matrix_assembly_base
4822 (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4823 K(K_), I1(I1_), I2(I2_), imd1(imd1_), imd2(imd2_) {}
4827 struct ga_instruction_matrix_assembly_standard_scalar
4828 :
public ga_instruction_matrix_assembly_base
4830 model_real_sparse_matrix &K;
4831 const gmm::sub_interval &I1, &I2;
4832 const mesh_fem *pmf1, *pmf2;
4833 virtual int exec() {
4834 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
4837 elem.resize(t.size());
4839 copy_scaled_4(t, coeff*alpha1*alpha2, elem);
4843 add_scaled_4(t, coeff*alpha1*alpha2, elem);
4845 if (ipt == nbpt-1) {
4846 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4849 if (ninf == scalar_type(0))
return 0;
4851 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
4853 auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4854 GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0],
"Internal error");
4855 populate_dofs_vector(dofs1, ct1.size(), I1.first(), ct1);
4857 if (pmf2 == pmf1 && cv1 == cv2) {
4858 if (I1.first() == I2.first()) {
4859 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4861 populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
4863 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4867 auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4868 GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1],
"Internal error");
4869 populate_dofs_vector(dofs2, ct2.size(), I2.first(), ct2);
4870 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4875 ga_instruction_matrix_assembly_standard_scalar
4876 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4877 const fem_interpolation_context &ctx1_,
4878 const fem_interpolation_context &ctx2_,
4879 const gmm::sub_interval &I1_,
const gmm::sub_interval &I2_,
4880 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
4881 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
4883 : ga_instruction_matrix_assembly_base
4884 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
4885 K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
4888 struct ga_instruction_matrix_assembly_standard_vector
4889 :
public ga_instruction_matrix_assembly_base
4891 model_real_sparse_matrix &K;
4892 const gmm::sub_interval &I1, &I2;
4893 const mesh_fem *pmf1, *pmf2;
4894 virtual int exec() {
4895 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
4898 elem.resize(t.size());
4899 copy_scaled_8(t, coeff*alpha1*alpha2, elem);
4904 add_scaled_8(t, coeff*alpha1*alpha2, elem);
4906 if (ipt == nbpt-1) {
4907 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4910 if (ninf == scalar_type(0))
return 0;
4911 size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
4913 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4916 if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
4917 populate_dofs_vector(dofs1, s1, I1.first(), qmult1,
4918 pmf1->ind_scalar_basic_dof_of_element(cv1));
4920 if (pmf2 == pmf1 && cv1 == cv2 && I1.first() == I2.first()) {
4921 add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4923 if (pmf2 == pmf1 && cv1 == cv2) {
4924 populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
4929 if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
4930 populate_dofs_vector(dofs2, s2, I2.first(), qmult2,
4931 pmf2->ind_scalar_basic_dof_of_element(cv2));
4933 add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4938 ga_instruction_matrix_assembly_standard_vector
4939 (
const base_tensor &t_, model_real_sparse_matrix &K_,
4940 const fem_interpolation_context &ctx1_,
4941 const fem_interpolation_context &ctx2_,
4942 const gmm::sub_interval &I1_,
const gmm::sub_interval &I2_,
4943 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
4944 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
4946 : ga_instruction_matrix_assembly_base
4947 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
4948 K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
4952 struct ga_instruction_matrix_assembly_standard_vector_opt10
4953 :
public ga_instruction_matrix_assembly_base
4955 model_real_sparse_matrix &K;
4956 const gmm::sub_interval &I1, &I2;
4957 const mesh_fem *pmf1, *pmf2;
4958 virtual int exec() {
4959 GA_DEBUG_INFO(
"Instruction: matrix term assembly for standard "
4960 "vector fems optimized for format 10 qdim " << QQ);
4962 size_type ss1 = t.sizes()[0]/QQ, ss2 = t.sizes()[1]/QQ;
4963 scalar_type e = coeff*alpha1*alpha2;
4965 elem.resize(ss1*ss2);
4966 auto itel = elem.begin();
4968 auto it = t.begin() + j*s1_q;
4969 for (
size_type i = 0; i < ss1; ++i, it += QQ)
4970 *itel++ = (*it) * e;
4973 auto itel = elem.begin();
4975 auto it = t.begin() + j*s1_q;
4976 for (
size_type i = 0; i < ss1; ++i, it += QQ)
4977 *itel++ += (*it) * e;
4980 if (ipt == nbpt-1) {
4981 GA_DEBUG_ASSERT(I1.size() && I2.size(),
"Internal error");
4984 if (ninf == scalar_type(0))
return 0;
4986 size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4987 size_type i1 = I1.first(), i2 = I2.first();
4989 populate_dofs_vector(dofs1, ss1, i1,
4990 pmf1->ind_scalar_basic_dof_of_element(cv1));
4991 bool same_dofs(pmf2 == pmf1 && cv1 == cv2 && i1 == i2);
4995 populate_dofs_vector(dofs2, ss2, i2,
4996 pmf2->ind_scalar_basic_dof_of_element(cv2));
4998 std::vector<size_type> &dofs2_ = same_dofs ? dofs1 : dofs2;
4999 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5000 for (
size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5001 if (!same_dofs)
for (
size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5002 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5004 for (
size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5005 if (!same_dofs)
for (
size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5006 add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5012 ga_instruction_matrix_assembly_standard_vector_opt10
5013 (
const base_tensor &t_, model_real_sparse_matrix &Kn_,
5014 const fem_interpolation_context &ctx1_,
5015 const fem_interpolation_context &ctx2_,
5016 const gmm::sub_interval &In1_,
const gmm::sub_interval &In2_,
5017 const mesh_fem *mfn1_,
const mesh_fem *mfn2_,
5018 const scalar_type &a1,
const scalar_type &a2,
const scalar_type &coeff_,
5020 : ga_instruction_matrix_assembly_base
5021 (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5022 K(Kn_), I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_)
5024 static_assert(QQ >= 2 && QQ <=3,
5025 "Template implemented only for QQ=2 and QQ=3");
5030 struct ga_instruction_condensation_sub :
public ga_instruction {
5033 gmm::dense_matrix<base_tensor *> KQJprime;
5034 std::vector<base_tensor *> RQprime;
5035 gmm::dense_matrix<base_tensor const *> KQQloc, KQJloc;
5036 base_tensor invKqqqq, Kqqjj;
5038 std::vector<std::array<size_type,3>> partQ, partJ;
5039 const scalar_type &coeff;
5040 virtual int exec() {
5041 GA_DEBUG_INFO(
"Instruction: variable cluster subdiagonal condensation");
5043 for (
const auto &qqq1 : partQ) {
5044 size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5045 for (
const auto &qqq2 : partQ) {
5046 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5047 if (KQQloc(q1,q2)) {
5048 auto itr = KQQloc(q1,q2)->cbegin();
5049 GMM_ASSERT1(KQQloc(q1,q2)->size()
5050 == (qq1end-qq1start)*(qq2end-qq2start),
5052 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2)
5053 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5054 invKqqqq(qq1,qq2) = *itr++;
5059 bgeot::lu_inverse(&(invKqqqq[0]), invKqqqq.size(0));
5063 for (
auto &&jjj : partJ) {
5066 for (
const auto &qqq : partQ) {
5070 GMM_ASSERT1(new_j == KQJloc(q,j)->size(1),
"Internal error");
5072 new_j = KQJloc(q,j)->size(1);
5076 for (
const auto &qqq : partQ) {
5078 KQJprime(q,j)->adjust_sizes(qqq[2]-qqq[1], new_j);
5085 Kqqjj.adjust_sizes(partQ.back()[2], partJ.back()[2]);
5091 for (
const auto &jjj : partJ) {
5092 size_type j = jjj[0], jjstart = jjj[1], jjend = jjj[2];
5093 for (
const auto &qqq2 : partQ) {
5094 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5096 auto itr = KQJloc(q2,j)->begin();
5097 for (
size_type jj=jjstart; jj < jjend; ++jj) {
5098 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5099 for (
size_type qq1=0; qq1 < partQ.back()[2]; ++qq1) {
5100 Kqqjj(qq1,jj) += invKqqqq(qq1,qq2)*(*itr);
5105 GMM_ASSERT1(itr == KQJloc(q2,j)->cend(),
"Internal error");
5109 for (
const auto &qqq2 : partQ) {
5110 size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5112 auto itr = RQprime[q2]->cbegin();
5113 for (
size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5114 for (
size_type qq1=0; qq1 < invKqqqq.size(0); ++qq1)
5115 Rqq[qq1] += invKqqqq(qq1,qq2)*(*itr);
5117 GMM_ASSERT1(itr == RQprime[q2]->cend(),
"Internal error");
5123 for (
const auto &qqq1 : partQ) {
5124 size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5126 auto itw = RQprime[q1]->begin();
5127 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5128 *itw++ = Rqq[qq1]/coeff;
5130 for (
const auto &jjj2 : partJ) {
5131 size_type j2 = jjj2[0], jj2start = jjj2[1], jj2end = jjj2[2];
5132 auto itw = KQJprime(q1,j2)->begin();
5133 for (
size_type jj2=jj2start; jj2 < jj2end; ++jj2)
5134 for (
size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5135 *itw++ = Kqqjj(qq1,jj2);
5141 ga_instruction_condensation_sub(gmm::dense_matrix<base_tensor *> &KQJpr,
5142 std::vector<base_tensor *> &RQpr,
5143 const gmm::dense_matrix<base_tensor *> &KQQ,
5144 const gmm::dense_matrix<base_tensor *> &KQJ,
5145 const std::set<size_type> &Qset,
5146 const scalar_type &coeff_)
5147 : KQJprime(KQJpr), RQprime(RQpr), coeff(coeff_)
5150 KQQloc.resize(KQQ.nrows(), KQQ.ncols());
5151 KQJloc.resize(KQJ.nrows(), KQJ.ncols());
5152 for (
size_type i=0; i < KQQ.as_vector().size(); ++i) KQQloc[i] = KQQ[i];
5153 for (
size_type i=0; i < KQJ.as_vector().size(); ++i) KQJloc[i] = KQJ[i];
5155 for (
size_type j=0; j < KQJ.ncols(); ++j)
5158 partJ.push_back(std::array<size_type,3>{j,0,0});
5164 partQ.push_back(std::array<size_type,3>{q,0,0});
5166 for (
auto &qqq1 : partQ) {
5171 GMM_ASSERT1(new_q == KQQ(q1,q2)->size(0) &&
5172 new_q == KQQ(q2,q1)->size(1),
"Internal error");
5174 new_q = KQQ(q1,q2)->size(0);
5179 invKqqqq.adjust_sizes(partQ.back()[2], partQ.back()[2]);
5180 Rqq.resize(partQ.back()[2]);
5186 struct ga_instruction_condensation_super_K :
public ga_instruction {
5188 std::vector<base_tensor *> KiQ, KQj;
5191 virtual int exec() {
5192 GA_DEBUG_INFO(
"Instruction: contribution of condensation to kept part");
5196 Kij.adjust_sizes(m,n);
5199 const base_tensor &K1 = *KiQ[k], &K2 = *KQj[k];
5201 GMM_ASSERT1(K1.size(0) == m && K2.size(1) == n && K2.size(0) == qqsize,
5204 base_tensor::iterator it = Kij.begin();
5206 for (
size_type ii = 0; ii < m; ++ii, ++it)
5207 for (
size_type qq = 0; qq < qqsize; ++qq)
5208 *it -= K1[ii+qq*m] * K2[qq+jj*qqsize];
5209 GA_DEBUG_ASSERT(it == Kij.end(),
"Wrong sizes");
5213 ga_instruction_condensation_super_K(base_tensor &Kij_,
5214 const std::vector<base_tensor *> KiQ_,
5215 const std::vector<base_tensor *> KQj_)
5216 : Kij(Kij_), KiQ(KiQ_), KQj(KQj_)
5219 GMM_ASSERT1(KiQ.size() == KQj.size(),
"Internal error");
5223 struct ga_instruction_condensation_super_R :
public ga_instruction {
5225 std::vector<base_tensor *> KiQ, RQpr;
5228 virtual int exec() {
5229 GA_DEBUG_INFO(
"Instruction: contribution of condensation to primary rhs");
5235 const base_tensor &K1 = *KiQ[k], &R2 = *RQpr[k];
5237 GMM_ASSERT1(K1.size(0) == m && R2.size(0) == qqsize,
"Internal error");
5238 base_tensor::iterator it = Ri.begin();
5239 for (
size_type ii = 0; ii < m; ++ii, ++it)
5240 for (
size_type qq = 0; qq < qqsize; ++qq)
5241 *it -= K1[ii+qq*m] * R2[qq];
5242 GA_DEBUG_ASSERT(it == Ri.end(),
"Wrong sizes");
5246 ga_instruction_condensation_super_R(base_tensor &Ri_,
5247 const std::vector<base_tensor *> KiQ_,
5248 const std::vector<base_tensor *> RQpr_)
5249 : Ri(Ri_), KiQ(KiQ_), RQpr(RQpr_)
5252 GMM_ASSERT1(KiQ.size() == RQpr.size(),
"Internal error");
5260 static void extend_variable_in_gis(
const ga_workspace &workspace,
5261 const std::string &varname,
5262 ga_instruction_set &gis) {
5263 if (workspace.variable_group_exists(varname)) {
5264 for (
const std::string &v : workspace.variable_group(varname))
5265 extend_variable_in_gis(workspace, v, gis);
5266 }
else if (gis.extended_vars.count(varname) == 0) {
5267 const mesh_fem *mf = workspace.associated_mf(varname);
5268 if (mf->is_reduced()) {
5269 auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
5270 base_vector &U = gis.really_extended_vars[varname];
5272 mf->extend_vector(workspace.value(varname), U);
5273 gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
5275 gis.extended_vars[varname] = &(workspace.value(varname));
5280 static void ga_clear_node_list
5281 (pga_tree_node pnode, std::map<scalar_type,
5282 std::list<pga_tree_node> > &node_list) {
5283 std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
5284 for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
5285 it != loc_node_list.end(); ) {
5286 if (*it == pnode) it = loc_node_list.erase(it);
else ++it;
5288 for (
size_type i = 0; i < pnode->children.size(); ++i)
5289 ga_clear_node_list(pnode->children[i], node_list);
5294 static void ga_compile_node(
const pga_tree_node pnode,
5295 ga_workspace &workspace,
5296 ga_instruction_set &gis,
5297 ga_instruction_set::region_mim_instructions &rmi,
5298 const mesh &m,
bool function_case,
5299 ga_if_hierarchy &if_hierarchy) {
5301 if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
5302 pnode->node_type == GA_NODE_OPERATOR ||
5303 pnode->node_type == GA_NODE_SPEC_FUNC ||
5304 pnode->node_type == GA_NODE_CONSTANT ||
5305 pnode->node_type == GA_NODE_ALLINDICES ||
5306 pnode->node_type == GA_NODE_RESHAPE ||
5307 pnode->node_type == GA_NODE_SWAP_IND ||
5308 pnode->node_type == GA_NODE_IND_MOVE_LAST ||
5309 pnode->node_type == GA_NODE_CONTRACT)
return;
5313 pga_instruction pgai;
5314 ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
5315 ga_if_hierarchy new_if_hierarchy;
5317 const mesh_fem *mf1 = 0, *mf2 = 0;
5318 const mesh_fem **mfg1 = 0, **mfg2 = 0;
5319 fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
5320 bool tensor_to_clear =
false;
5321 bool tensor_to_adapt =
false;
5323 if (pnode->test_function_type) {
5324 if (pnode->name_test1.size())
5325 mf1 = workspace.associated_mf(pnode->name_test1);
5328 const std::string &intn1 = pnode->interpolate_name_test1;
5330 if (workspace.secondary_domain_exists(intn1)) {
5331 pctx1 = &(rmi.secondary_domain_infos.ctx);
5333 tensor_to_adapt =
true;
5334 pctx1 = &(rmi.interpolate_infos[intn1].ctx);
5335 if (workspace.variable_group_exists(pnode->name_test1)) {
5336 ga_instruction_set::variable_group_info &vgi =
5337 rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
5344 if (pnode->name_test2.size())
5345 mf2 = workspace.associated_mf(pnode->name_test2);
5348 const std::string &intn2 = pnode->interpolate_name_test2;
5350 if (workspace.secondary_domain_exists(intn2)) {
5351 pctx2 = &(rmi.secondary_domain_infos.ctx);
5353 tensor_to_adapt =
true;
5354 pctx2 = &(rmi.interpolate_infos[intn2].ctx);
5355 if (workspace.variable_group_exists(pnode->name_test2)) {
5356 ga_instruction_set::variable_group_info &vgi =
5357 rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
5368 pnode->t.set_to_original(); pnode->t.set_sparsity(0, 0);
5369 bool is_uniform =
false;
5370 if (pnode->test_function_type == 1) {
5372 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5373 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5374 if (mf1 && mf1->is_uniform())
5375 { is_uniform =
true; pctx1->invalid_convex_num(); }
5376 }
else if (pnode->test_function_type == 2) {
5378 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5379 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5380 if (mf2 && mf2->is_uniform())
5381 { is_uniform =
true; pctx2->invalid_convex_num(); }
5382 }
else if (pnode->test_function_type == 3) {
5383 if ((mf1 || mfg1) && (mf2 || mfg2)) {
5384 pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
5385 (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
5386 pnode->qdim2, mf2, mfg2);
5387 if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
5389 pctx1->invalid_convex_num();
5390 pctx2->invalid_convex_num();
5392 }
else if (mf1 || mfg1) {
5393 pgai = std::make_shared<ga_instruction_first_ind_tensor>
5394 (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5395 if (mf1 && mf1->is_uniform())
5396 { is_uniform =
true; pctx1->invalid_convex_num(); }
5397 }
else if (mf2 || mfg2) {
5398 pgai = std::make_shared<ga_instruction_second_ind_tensor>
5399 (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5400 if (mf2 && mf2->is_uniform())
5401 { is_uniform =
true; pctx2->invalid_convex_num(); }
5406 pnode->t.set_to_original();
5407 if (rmi.node_list.count(pnode->hash_value) != 0) {
5408 for (pga_tree_node &pnode1 : rmi.node_list[pnode->hash_value]) {
5412 if (sub_tree_are_equal(pnode, pnode1, workspace, 1)) {
5413 pnode->t.set_to_copy(pnode1->t);
5416 if (sub_tree_are_equal(pnode, pnode1, workspace, 2)) {
5418 if (pnode->nb_test_functions() == 2) {
5422 else { rmi.instructions.push_back(std::move(pgai)); }
5424 pgai = std::make_shared<ga_instruction_transpose_test>
5425 (pnode->tensor(), pnode1->tensor());
5426 rmi.instructions.push_back(std::move(pgai));
5428 pnode->t.set_to_copy(pnode1->t);
5433 std::stringstream ss;
5434 ss <<
"Detected wrong equivalent nodes:" << endl;
5435 ga_print_node(pnode, ss);
5436 ss << endl <<
" and " << endl;
5437 ga_print_node(pnode1, ss);
5438 ss << endl <<
"No problem, but hash values could be adapted." << endl;
5439 GMM_TRACE2(ss.str());
5444 if (is_uniform) { pgai->exec(); }
5446 if (tensor_to_adapt)
5447 rmi.instructions.push_back(std::move(pgai));
5449 rmi.elt_instructions.push_back(std::move(pgai));
5453 size_type interpolate_filter_inst = rmi.instructions.size();
5454 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5455 pgai = pga_instruction();
5456 rmi.instructions.push_back(std::move(pgai));
5457 if_hierarchy.increment();
5458 new_if_hierarchy.child_of(if_hierarchy);
5459 pif_hierarchy = &new_if_hierarchy;
5462 for (
size_type i = 0; i < pnode->children.size(); ++i)
5463 ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
5464 function_case, *pif_hierarchy);
5466 if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5467 const std::string &intn = pnode->interpolate_name;
5468 ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
5469 pgai = std::make_shared<ga_instruction_interpolate_filter>
5470 (pnode->tensor(), inin, pnode->nbc1,
5471 int(rmi.instructions.size() - interpolate_filter_inst));
5472 rmi.instructions[interpolate_filter_inst].swap(pgai);
5473 pgai = std::make_shared<ga_instruction_copy_tensor>
5474 (pnode->tensor(), pnode->children[0]->tensor());
5475 rmi.instructions.push_back(std::move(pgai));
5476 ga_clear_node_list(pnode->children[0], rmi.node_list);
5479 static scalar_type minus = -scalar_type(1);
5480 size_type nbch = pnode->children.size();
5481 pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
5482 pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
5483 bgeot::multi_index mi;
5484 const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
5486 size_type dim0 = child0 ? child0->tensor_order() : 0;
5487 size_type dim1 = child1 ? child1->tensor_order() : 0;
5489 switch (pnode->node_type) {
5491 case GA_NODE_PREDEF_FUNC:
case GA_NODE_OPERATOR:
case GA_NODE_SPEC_FUNC:
5492 case GA_NODE_CONSTANT:
case GA_NODE_ALLINDICES:
case GA_NODE_ZERO:
5493 case GA_NODE_RESHAPE:
case GA_NODE_CROSS_PRODUCT:
5494 case GA_NODE_SWAP_IND:
case GA_NODE_IND_MOVE_LAST:
5495 case GA_NODE_CONTRACT:
case GA_NODE_INTERPOLATE_FILTER:
5499 GMM_ASSERT1(!function_case,
5500 "No use of X is allowed in scalar functions");
5502 GA_DEBUG_ASSERT(pnode->tensor().size() == 1,
"dimensions mismatch");
5503 GMM_ASSERT1(pnode->nbc1 <= m.dim(),
5504 "Bad index for X in expression");
5505 pgai = std::make_shared<ga_instruction_X_component>
5506 (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
5508 if (pnode->tensor().size() != m.dim())
5509 pnode->init_vector_tensor(m.dim());
5510 pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
5512 rmi.instructions.push_back(std::move(pgai));
5515 case GA_NODE_ELT_SIZE:
5516 GMM_ASSERT1(!function_case,
5517 "No use of element_size is allowed in functions");
5518 if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
5519 pgai = std::make_shared<ga_instruction_element_size>
5520 (pnode->tensor(), gis.elt_size);
5521 gis.need_elt_size =
true;
5522 rmi.instructions.push_back(std::move(pgai));
5526 GMM_ASSERT1(!function_case,
5527 "No use of element_K is allowed in functions");
5528 pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
5530 rmi.instructions.push_back(std::move(pgai));
5534 GMM_ASSERT1(!function_case,
5535 "No use of element_B is allowed in functions");
5536 pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
5538 rmi.instructions.push_back(std::move(pgai));
5541 case GA_NODE_NORMAL:
5543 GMM_ASSERT1(!function_case,
5544 "No use of Normal is allowed in functions");
5545 if (pnode->tensor().size() != m.dim())
5546 pnode->init_vector_tensor(m.dim());
5547 const mesh_im_level_set *mimls
5548 =
dynamic_cast<const mesh_im_level_set *
>(rmi.im);
5549 if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
5551 pgai = std::make_shared<ga_instruction_level_set_normal_vector>
5552 (pnode->tensor(), mimls, gis.ctx);
5553 rmi.instructions.push_back(std::move(pgai));
5555 pgai = std::make_shared<ga_instruction_copy_Normal>
5556 (pnode->tensor(), gis.Normal);
5557 rmi.instructions.push_back(std::move(pgai));
5562 case GA_NODE_INTERPOLATE_X:
5563 case GA_NODE_INTERPOLATE_NORMAL:
5564 GMM_ASSERT1(!function_case,
5565 "No use of Interpolate is allowed in functions");
5566 if (pnode->tensor().size() != m.dim())
5567 pnode->init_vector_tensor(m.dim());
5568 if (pnode->node_type == GA_NODE_INTERPOLATE_X)
5569 pgai = std::make_shared<ga_instruction_copy_interpolated_small_vect>
5571 rmi.interpolate_infos[pnode->interpolate_name].pt_y,
5572 rmi.interpolate_infos[pnode->interpolate_name]);
5573 else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
5574 pgai = std::make_shared<ga_instruction_copy_Normal>
5576 rmi.interpolate_infos[pnode->interpolate_name].Normal);
5577 rmi.instructions.push_back(std::move(pgai));
5580 case GA_NODE_INTERPOLATE_ELT_K:
5581 case GA_NODE_INTERPOLATE_ELT_B:
5582 GMM_ASSERT1(!function_case,
5583 "No use of Interpolate is allowed in functions");
5584 if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_K)
5585 pgai = std::make_shared<ga_instruction_element_K>
5587 rmi.interpolate_infos[pnode->interpolate_name].ctx);
5588 else if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_B)
5589 pgai = std::make_shared<ga_instruction_element_B>
5591 rmi.interpolate_infos[pnode->interpolate_name].ctx);
5592 rmi.instructions.push_back(std::move(pgai));
5595 case GA_NODE_SECONDARY_DOMAIN_X:
5596 case GA_NODE_SECONDARY_DOMAIN_NORMAL:
5598 GMM_ASSERT1(!function_case,
5599 "No use of Secondary_domain is allowed in functions");
5600 auto psd = workspace.secondary_domain(pnode->interpolate_name);
5601 size_type sddim = psd->mim().linked_mesh().dim();
5602 if (pnode->tensor().size() != sddim)
5603 pnode->init_vector_tensor(sddim);
5604 if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_X)
5605 pgai = std::make_shared<ga_instruction_X>
5606 (pnode->tensor(), rmi.secondary_domain_infos.ctx);
5607 else if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_NORMAL)
5608 pgai = std::make_shared<ga_instruction_copy_Normal>
5609 (pnode->tensor(), rmi.secondary_domain_infos.Normal);
5610 rmi.instructions.push_back(std::move(pgai));
5614 case GA_NODE_VAL:
case GA_NODE_GRAD:
5615 case GA_NODE_HESS:
case GA_NODE_DIVERG:
5616 case GA_NODE_ELEMENTARY_VAL:
case GA_NODE_ELEMENTARY_GRAD:
5617 case GA_NODE_ELEMENTARY_HESS:
case GA_NODE_ELEMENTARY_DIVERG:
5618 case GA_NODE_XFEM_PLUS_VAL:
case GA_NODE_XFEM_PLUS_GRAD:
5619 case GA_NODE_XFEM_PLUS_HESS:
case GA_NODE_XFEM_PLUS_DIVERG:
5620 case GA_NODE_XFEM_MINUS_VAL:
case GA_NODE_XFEM_MINUS_GRAD:
5621 case GA_NODE_XFEM_MINUS_HESS:
case GA_NODE_XFEM_MINUS_DIVERG:
5623 bool is_elementary = (pnode->node_type == GA_NODE_ELEMENTARY_VAL ||
5624 pnode->node_type == GA_NODE_ELEMENTARY_GRAD ||
5625 pnode->node_type == GA_NODE_ELEMENTARY_HESS ||
5626 pnode->node_type == GA_NODE_ELEMENTARY_DIVERG);
5627 if (function_case) {
5628 GMM_ASSERT1(!is_elementary,
5629 "No elementary transformation is allowed in functions");
5630 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
5631 pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
5632 pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
5633 pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
5634 "Xfem_plus not allowed in functions");
5635 GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
5636 pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
5637 pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
5638 pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
5639 "Xfem_plus not allowed in functions");
5640 const mesh_fem *mf = workspace.associated_mf(pnode->name);
5641 const im_data *imd = workspace.associated_im_data(pnode->name);
5642 GMM_ASSERT1(!mf,
"No fem expression is allowed in "
5643 "function expression");
5644 GMM_ASSERT1(!imd,
"No integration method data is allowed in "
5645 "function expression");
5646 if (gmm::vect_size(workspace.value(pnode->name)) == 1)
5647 pgai = std::make_shared<ga_instruction_copy_scalar>
5648 (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
5650 pgai = std::make_shared<ga_instruction_copy_vect>
5651 (pnode->tensor().as_vector(), workspace.value(pnode->name));
5652 rmi.instructions.push_back(std::move(pgai));
5654 const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
5655 const im_data *imd = workspace.associated_im_data(pnode->name);
5657 if (is_elementary) {
5658 mf = workspace.associated_mf(pnode->elementary_target);
5659 GMM_ASSERT1(mf && mfo,
5660 "Wrong context for elementary transformation");
5661 GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
5662 "The finite element of variable " << pnode->name
5663 <<
" has to be defined on the same mesh as the "
5664 <<
"integration method or interpolation used");
5668 GMM_ASSERT1(pnode->node_type == GA_NODE_VAL,
5669 "Only values can be extracted on im_data (no " <<
5670 "gradient, Hessian, xfem or elementary tranformation" <<
5672 pgai = std::make_shared<ga_instruction_extract_local_im_data>
5673 (pnode->tensor(), *imd, workspace.value(pnode->name),
5674 gis.pai, gis.ctx, workspace.qdim(pnode->name));
5675 rmi.instructions.push_back(std::move(pgai));
5677 GMM_ASSERT1(mf,
"Internal error");
5679 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
5680 "The finite element of variable " <<
5681 (is_elementary ? pnode->elementary_target : pnode->name)
5682 <<
" has to be defined on the same mesh as the "
5683 <<
"integration method or interpolation used");
5686 if (rmi.local_dofs.count(pnode->name) == 0) {
5687 rmi.local_dofs[pnode->name] = base_vector(1);
5688 extend_variable_in_gis(workspace, pnode->name, gis);
5691 if (qmult2 > 1 && !(mfo->is_uniformly_vectorized()))
5693 pgai = std::make_shared<ga_instruction_slice_local_dofs>
5694 (*mfo, *(gis.extended_vars[pnode->name]), gis.ctx,
5695 rmi.local_dofs[pnode->name],
5696 workspace.qdim(pnode->name) / mfo->get_qdim(), qmult2);
5697 rmi.elt_instructions.push_back(std::move(pgai));
5701 if (mf->is_uniform()) {
5702 if (rmi.pfps.count(mf) == 0) {
5704 pgai = std::make_shared<ga_instruction_update_pfp>
5705 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5706 rmi.begin_instructions.push_back(std::move(pgai));
5708 }
else if (rmi.pfps.count(mf) == 0 ||
5709 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
5710 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
5712 pgai = std::make_shared<ga_instruction_update_pfp>
5713 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5714 rmi.instructions.push_back(std::move(pgai));
5718 pgai = pga_instruction();
5719 switch (pnode->node_type) {
5720 case GA_NODE_VAL:
case GA_NODE_ELEMENTARY_VAL:
5721 if (rmi.base.count(mf) == 0 ||
5722 !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
5723 rmi.base_hierarchy[mf].push_back(if_hierarchy);
5724 pgai = std::make_shared<ga_instruction_val_base>
5725 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5728 case GA_NODE_XFEM_PLUS_VAL:
5729 if (rmi.xfem_plus_base.count(mf) == 0 ||
5730 !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
5732 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
5733 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
5734 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5737 case GA_NODE_XFEM_MINUS_VAL:
5738 if (rmi.xfem_minus_base.count(mf) == 0 ||
5739 !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
5741 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
5742 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
5743 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5746 case GA_NODE_GRAD:
case GA_NODE_DIVERG:
5747 case GA_NODE_ELEMENTARY_GRAD:
case GA_NODE_ELEMENTARY_DIVERG:
5748 if (rmi.grad.count(mf) == 0 ||
5749 !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
5750 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5751 pgai = std::make_shared<ga_instruction_grad_base>
5752 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5755 case GA_NODE_XFEM_PLUS_GRAD:
case GA_NODE_XFEM_PLUS_DIVERG:
5756 if (rmi.xfem_plus_grad.count(mf) == 0 ||
5757 !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
5759 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
5760 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
5761 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5764 case GA_NODE_XFEM_MINUS_GRAD:
case GA_NODE_XFEM_MINUS_DIVERG:
5765 if (rmi.xfem_minus_grad.count(mf) == 0 ||
5766 !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
5768 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
5769 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
5770 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5773 case GA_NODE_HESS:
case GA_NODE_ELEMENTARY_HESS:
5774 if (rmi.hess.count(mf) == 0 ||
5775 !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
5776 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5777 pgai = std::make_shared<ga_instruction_hess_base>
5778 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5781 case GA_NODE_XFEM_PLUS_HESS:
5782 if (rmi.xfem_plus_hess.count(mf) == 0 ||
5783 !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
5785 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
5786 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
5787 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5790 case GA_NODE_XFEM_MINUS_HESS:
5791 if (rmi.xfem_minus_hess.count(mf) == 0 ||
5792 !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
5794 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
5795 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
5796 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5800 default : GMM_ASSERT1(
false,
"Internal error");
5802 if (pgai) rmi.instructions.push_back(std::move(pgai));
5805 switch (pnode->node_type) {
5807 pgai = std::make_shared<ga_instruction_val>
5808 (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
5809 workspace.qdim(pnode->name));
5812 pgai = std::make_shared<ga_instruction_grad>
5813 (pnode->tensor(), rmi.grad[mf],
5814 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5817 pgai = std::make_shared<ga_instruction_hess>
5818 (pnode->tensor(), rmi.hess[mf],
5819 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5821 case GA_NODE_DIVERG:
5822 pgai = std::make_shared<ga_instruction_diverg>
5823 (pnode->tensor(), rmi.grad[mf],
5824 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5826 case GA_NODE_XFEM_PLUS_VAL:
5827 pgai = std::make_shared<ga_instruction_val>
5828 (pnode->tensor(), rmi.xfem_plus_base[mf],
5829 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5831 case GA_NODE_XFEM_PLUS_GRAD:
5832 pgai = std::make_shared<ga_instruction_grad>
5833 (pnode->tensor(), rmi.xfem_plus_grad[mf],
5834 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5836 case GA_NODE_XFEM_PLUS_HESS:
5837 pgai = std::make_shared<ga_instruction_hess>
5838 (pnode->tensor(), rmi.xfem_plus_hess[mf],
5839 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5841 case GA_NODE_XFEM_PLUS_DIVERG:
5842 pgai = std::make_shared<ga_instruction_diverg>
5843 (pnode->tensor(), rmi.xfem_plus_grad[mf],
5844 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5846 case GA_NODE_XFEM_MINUS_VAL:
5847 pgai = std::make_shared<ga_instruction_val>
5848 (pnode->tensor(), rmi.xfem_minus_base[mf],
5849 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5851 case GA_NODE_XFEM_MINUS_GRAD:
5852 pgai = std::make_shared<ga_instruction_grad>
5853 (pnode->tensor(), rmi.xfem_minus_grad[mf],
5854 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5856 case GA_NODE_XFEM_MINUS_HESS:
5857 pgai = std::make_shared<ga_instruction_hess>
5858 (pnode->tensor(), rmi.xfem_minus_hess[mf],
5859 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5861 case GA_NODE_XFEM_MINUS_DIVERG:
5862 pgai = std::make_shared<ga_instruction_diverg>
5863 (pnode->tensor(), rmi.xfem_minus_grad[mf],
5864 rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5866 case GA_NODE_ELEMENTARY_VAL:
5868 ga_instruction_set::elementary_trans_info &eti
5869 = rmi.elementary_trans_infos
5870 [std::make_tuple(pnode->elementary_name, mfo, mf)];
5872 std::make_shared<ga_instruction_elementary_trans_val>
5873 (pnode->tensor(), rmi.base[mf],
5874 rmi.local_dofs[pnode->name],
5875 workspace.qdim(pnode->elementary_target),
5876 workspace.elementary_transformation(pnode->elementary_name),
5877 *mfo, *mf, gis.ctx, eti.M, eti.icv);
5880 case GA_NODE_ELEMENTARY_GRAD:
5882 ga_instruction_set::elementary_trans_info &eti
5883 = rmi.elementary_trans_infos
5884 [std::make_tuple(pnode->elementary_name, mfo, mf)];
5886 std::make_shared<ga_instruction_elementary_trans_grad>
5887 (pnode->tensor(), rmi.grad[mf],
5888 rmi.local_dofs[pnode->name],
5889 workspace.qdim(pnode->elementary_target),
5890 workspace.elementary_transformation(pnode->elementary_name),
5891 *mfo, *mf, gis.ctx, eti.M, eti.icv);
5894 case GA_NODE_ELEMENTARY_HESS:
5896 ga_instruction_set::elementary_trans_info &eti
5897 = rmi.elementary_trans_infos
5898 [std::make_tuple(pnode->elementary_name, mfo, mf)];
5900 std::make_shared<ga_instruction_elementary_trans_hess>
5901 (pnode->tensor(), rmi.hess[mf],
5902 rmi.local_dofs[pnode->name],
5903 workspace.qdim(pnode->elementary_target),
5904 workspace.elementary_transformation(pnode->elementary_name),
5905 *mfo, *mf, gis.ctx, eti.M, eti.icv);
5908 case GA_NODE_ELEMENTARY_DIVERG:
5910 ga_instruction_set::elementary_trans_info &eti
5911 = rmi.elementary_trans_infos
5912 [std::make_tuple(pnode->elementary_name, mfo, mf)];
5914 std::make_shared<ga_instruction_elementary_trans_diverg>
5915 (pnode->tensor(), rmi.grad[mf],
5916 rmi.local_dofs[pnode->name],
5917 workspace.qdim(pnode->elementary_target),
5918 workspace.elementary_transformation(pnode->elementary_name),
5919 *mfo, *mf, gis.ctx, eti.M, eti.icv);
5924 rmi.instructions.push_back(std::move(pgai));
5930 case GA_NODE_SECONDARY_DOMAIN_VAL:
case GA_NODE_SECONDARY_DOMAIN_GRAD:
5931 case GA_NODE_SECONDARY_DOMAIN_HESS:
case GA_NODE_SECONDARY_DOMAIN_DIVERG:
5933 GMM_ASSERT1(!function_case,
"internal error");
5934 const mesh_fem *mf = workspace.associated_mf(pnode->name);
5935 const im_data *imd = workspace.associated_im_data(pnode->name);
5936 const std::string &intn = pnode->interpolate_name;
5937 auto &sdi = rmi.secondary_domain_infos;
5939 fem_interpolation_context *pctx = &(sdi.ctx);
5940 papprox_integration pai = sdi.pai;
5941 psecondary_domain psd = workspace.secondary_domain(intn);
5944 pgai = std::make_shared<ga_instruction_extract_local_im_data>
5945 (pnode->tensor(), *imd, workspace.value(pnode->name),
5946 pai, *pctx, workspace.qdim(pnode->name));
5947 rmi.instructions.push_back(std::move(pgai));
5949 GMM_ASSERT1(mf,
"Internal error");
5950 GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
5951 "The finite element of variable " << pnode->name <<
5952 " has to be defined on the same mesh as the "
5953 "integration method or interpolation used on the "
5954 "secondary domain");
5957 if (sdi.local_dofs.count(pnode->name) == 0) {
5958 sdi.local_dofs[pnode->name] = base_vector(1);
5959 extend_variable_in_gis(workspace, pnode->name, gis);
5961 if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
5963 pgai = std::make_shared<ga_instruction_slice_local_dofs>
5964 (*mf, *(gis.extended_vars[pnode->name]), *pctx,
5965 sdi.local_dofs[pnode->name],
5966 workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
5967 rmi.elt_instructions.push_back(std::move(pgai));
5971 if (mf->is_uniform()) {
5972 if (sdi.pfps.count(mf) == 0) {
5974 pgai = std::make_shared<ga_instruction_update_pfp>
5975 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
5976 rmi.begin_instructions.push_back(std::move(pgai));
5978 }
else if (sdi.pfps.count(mf) == 0 ||
5979 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
5980 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
5982 pgai = std::make_shared<ga_instruction_update_pfp>
5983 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
5984 rmi.instructions.push_back(std::move(pgai));
5988 pgai = pga_instruction();
5989 switch (pnode->node_type) {
5990 case GA_NODE_SECONDARY_DOMAIN_VAL:
5991 if (sdi.base.count(mf) == 0 ||
5992 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
5993 rmi.base_hierarchy[mf].push_back(if_hierarchy);
5994 pgai = std::make_shared<ga_instruction_val_base>
5995 (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
5998 case GA_NODE_SECONDARY_DOMAIN_GRAD:
5999 case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6000 if (sdi.grad.count(mf) == 0 ||
6001 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6002 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6003 pgai = std::make_shared<ga_instruction_grad_base>
6004 (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6007 case GA_NODE_SECONDARY_DOMAIN_HESS:
6008 if (sdi.hess.count(mf) == 0 ||
6009 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6010 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6011 pgai = std::make_shared<ga_instruction_hess_base>
6012 (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6015 default : GMM_ASSERT1(
false,
"Internal error");
6017 if (pgai) rmi.instructions.push_back(std::move(pgai));
6020 switch (pnode->node_type) {
6021 case GA_NODE_SECONDARY_DOMAIN_VAL:
6022 pgai = std::make_shared<ga_instruction_val>
6023 (pnode->tensor(), sdi.base[mf], sdi.local_dofs[pnode->name],
6024 workspace.qdim(pnode->name));
6026 case GA_NODE_SECONDARY_DOMAIN_GRAD:
6027 pgai = std::make_shared<ga_instruction_grad>
6028 (pnode->tensor(), sdi.grad[mf],
6029 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6031 case GA_NODE_SECONDARY_DOMAIN_HESS:
6032 pgai = std::make_shared<ga_instruction_hess>
6033 (pnode->tensor(), sdi.hess[mf],
6034 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6036 case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6037 pgai = std::make_shared<ga_instruction_diverg>
6038 (pnode->tensor(), sdi.grad[mf],
6039 sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6043 rmi.instructions.push_back(std::move(pgai));
6048 case GA_NODE_INTERPOLATE_VAL:
case GA_NODE_INTERPOLATE_GRAD:
6049 case GA_NODE_INTERPOLATE_HESS:
case GA_NODE_INTERPOLATE_DIVERG:
6051 extend_variable_in_gis(workspace, pnode->name, gis);
6053 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6054 const std::string &intn = pnode->interpolate_name;
6055 const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
6056 fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
6057 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6058 if (workspace.variable_group_exists(pnode->name)) {
6059 ga_instruction_set::variable_group_info &vgi =
6060 rmi.interpolate_infos[intn].groups_info[pnode->name];
6061 mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
6064 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
6066 pgai = std::make_shared<ga_instruction_interpolate_val>
6067 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6068 workspace.qdim(pnode->name),
6069 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6070 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
6072 pgai = std::make_shared<ga_instruction_interpolate_grad>
6073 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6074 workspace.qdim(pnode->name),
6075 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6076 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
6078 pgai = std::make_shared<ga_instruction_interpolate_hess>
6079 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6080 workspace.qdim(pnode->name),
6081 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6083 pgai = std::make_shared<ga_instruction_interpolate_diverg>
6084 (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6085 workspace.qdim(pnode->name),
6086 gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6088 rmi.instructions.push_back(std::move(pgai));
6092 case GA_NODE_INTERPOLATE_DERIVATIVE:
6093 GMM_ASSERT1(!function_case,
6094 "No use of Interpolate is allowed in functions");
6095 pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
6097 rmi.interpolate_infos[pnode->interpolate_name_der]
6098 .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
6099 rmi.instructions.push_back(std::move(pgai));
6102 case GA_NODE_VAL_TEST:
case GA_NODE_GRAD_TEST:
6103 case GA_NODE_HESS_TEST:
case GA_NODE_DIVERG_TEST:
6104 case GA_NODE_ELEMENTARY_VAL_TEST:
case GA_NODE_ELEMENTARY_GRAD_TEST:
6105 case GA_NODE_ELEMENTARY_HESS_TEST:
case GA_NODE_ELEMENTARY_DIVERG_TEST:
6106 case GA_NODE_XFEM_PLUS_VAL_TEST:
case GA_NODE_XFEM_PLUS_GRAD_TEST:
6107 case GA_NODE_XFEM_PLUS_HESS_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6108 case GA_NODE_XFEM_MINUS_VAL_TEST:
case GA_NODE_XFEM_MINUS_GRAD_TEST:
6109 case GA_NODE_XFEM_MINUS_HESS_TEST:
case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6113 bool is_elementary = (pnode->node_type==GA_NODE_ELEMENTARY_VAL_TEST ||
6114 pnode->node_type==GA_NODE_ELEMENTARY_GRAD_TEST ||
6115 pnode->node_type==GA_NODE_ELEMENTARY_HESS_TEST ||
6116 pnode->node_type==GA_NODE_ELEMENTARY_DIVERG_TEST);
6117 const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6118 if (is_elementary) {
6119 mf = workspace.associated_mf(pnode->elementary_target);
6120 GMM_ASSERT1(mf && mfo,
6121 "Wrong context for elementary transformation");
6122 GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6123 "The finite element of variable " << pnode->name
6124 <<
" has to be defined on the same mesh as the "
6125 <<
"integration method or interpolation used");
6129 GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6130 "The finite element of variable " <<
6131 (is_elementary ? pnode->elementary_target : pnode->name)
6132 <<
" and the applied integration method have to be"
6133 <<
" defined on the same mesh");
6137 if (rmi.pfps.count(mf) == 0) {
6139 pgai = std::make_shared<ga_instruction_update_pfp>
6140 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6141 rmi.begin_instructions.push_back(std::move(pgai));
6143 }
else if (rmi.pfps.count(mf) == 0 ||
6144 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6145 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6147 pgai = std::make_shared<ga_instruction_update_pfp>
6148 (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6149 rmi.instructions.push_back(std::move(pgai));
6153 pgai = pga_instruction();
6154 switch (pnode->node_type) {
6155 case GA_NODE_VAL_TEST:
case GA_NODE_ELEMENTARY_VAL_TEST:
6156 if (rmi.base.count(mf) == 0 ||
6157 !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6158 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6159 pgai = std::make_shared<ga_instruction_val_base>
6160 (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6163 case GA_NODE_XFEM_PLUS_VAL_TEST:
6164 if (rmi.xfem_plus_base.count(mf) == 0 ||
6165 !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6167 rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6168 pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6169 (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6172 case GA_NODE_XFEM_MINUS_VAL_TEST:
6173 if (rmi.xfem_minus_base.count(mf) == 0 ||
6174 !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6176 rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6177 pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6178 (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6181 case GA_NODE_GRAD_TEST:
case GA_NODE_DIVERG_TEST:
6182 case GA_NODE_ELEMENTARY_GRAD_TEST:
6183 case GA_NODE_ELEMENTARY_DIVERG_TEST:
6184 if (rmi.grad.count(mf) == 0 ||
6185 !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6186 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6187 pgai = std::make_shared<ga_instruction_grad_base>
6188 (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6191 case GA_NODE_XFEM_PLUS_GRAD_TEST:
case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6192 if (rmi.xfem_plus_grad.count(mf) == 0 ||
6193 !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6195 rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6196 pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6197 (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6200 case GA_NODE_XFEM_MINUS_GRAD_TEST:
6201 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6202 if (rmi.xfem_minus_grad.count(mf) == 0 ||
6203 !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6205 rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6206 pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6207 (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6210 case GA_NODE_HESS_TEST:
case GA_NODE_ELEMENTARY_HESS_TEST:
6211 if (rmi.hess.count(mf) == 0 ||
6212 !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6213 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6214 pgai = std::make_shared<ga_instruction_hess_base>
6215 (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6218 case GA_NODE_XFEM_PLUS_HESS_TEST:
6219 if (rmi.xfem_plus_hess.count(mf) == 0 ||
6220 !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6222 rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6223 pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6224 (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6227 case GA_NODE_XFEM_MINUS_HESS_TEST:
6228 if (rmi.xfem_minus_hess.count(mf) == 0 ||
6229 !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6231 rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6232 pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6233 (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6237 default : GMM_ASSERT1(
false,
"Internal error");
6239 if (pgai) rmi.instructions.push_back(std::move(pgai));
6242 switch(pnode->node_type) {
6243 case GA_NODE_VAL_TEST:
6245 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6246 pnode->t.set_sparsity(1, mf->get_qdim());
6247 tensor_to_clear =
true;
6248 pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6249 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6251 pgai = std::make_shared<ga_instruction_copy_val_base>
6252 (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6255 case GA_NODE_GRAD_TEST:
6257 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6258 pnode->t.set_sparsity(2, mf->get_qdim());
6259 tensor_to_clear =
true;
6260 pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6261 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6263 pgai = std::make_shared<ga_instruction_copy_grad_base>
6264 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6267 case GA_NODE_HESS_TEST:
6269 pgai = std::make_shared<ga_instruction_copy_hess_base>
6270 (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
6271 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6272 pnode->t.set_sparsity(3, mf->get_qdim());
6274 case GA_NODE_DIVERG_TEST:
6276 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6277 (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6279 case GA_NODE_XFEM_PLUS_VAL_TEST:
6281 pgai = std::make_shared<ga_instruction_copy_val_base>
6282 (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
6283 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6284 pnode->t.set_sparsity(1, mf->get_qdim());
6286 case GA_NODE_XFEM_PLUS_GRAD_TEST:
6288 pgai = std::make_shared<ga_instruction_copy_grad_base>
6289 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6290 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6291 pnode->t.set_sparsity(2, mf->get_qdim());
6293 case GA_NODE_XFEM_PLUS_HESS_TEST:
6295 pgai = std::make_shared<ga_instruction_copy_hess_base>
6296 (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
6297 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6298 pnode->t.set_sparsity(3, mf->get_qdim());
6300 case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6302 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6303 (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6305 case GA_NODE_XFEM_MINUS_VAL_TEST:
6307 pgai = std::make_shared<ga_instruction_copy_val_base>
6308 (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
6309 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6310 pnode->t.set_sparsity(1, mf->get_qdim());
6312 case GA_NODE_XFEM_MINUS_GRAD_TEST:
6314 pgai = std::make_shared<ga_instruction_copy_grad_base>
6315 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6316 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6317 pnode->t.set_sparsity(2, mf->get_qdim());
6319 case GA_NODE_XFEM_MINUS_HESS_TEST:
6321 pgai = std::make_shared<ga_instruction_copy_hess_base>
6322 (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
6323 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6324 pnode->t.set_sparsity(3, mf->get_qdim());
6326 case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6328 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6329 (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6331 case GA_NODE_ELEMENTARY_VAL_TEST:
6333 ga_instruction_set::elementary_trans_info &eti
6334 = rmi.elementary_trans_infos
6335 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6337 std::make_shared<ga_instruction_elementary_trans_val_base>
6338 (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
6339 workspace.elementary_transformation(pnode->elementary_name),
6340 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6343 case GA_NODE_ELEMENTARY_GRAD_TEST:
6345 ga_instruction_set::elementary_trans_info &eti
6346 = rmi.elementary_trans_infos
6347 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6349 std::make_shared<ga_instruction_elementary_trans_grad_base>
6350 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6351 workspace.elementary_transformation(pnode->elementary_name),
6352 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6355 case GA_NODE_ELEMENTARY_HESS_TEST:
6357 ga_instruction_set::elementary_trans_info &eti
6358 = rmi.elementary_trans_infos
6359 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6361 std::make_shared<ga_instruction_elementary_trans_hess_base>
6362 (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
6363 workspace.elementary_transformation(pnode->elementary_name),
6364 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6367 case GA_NODE_ELEMENTARY_DIVERG_TEST:
6369 ga_instruction_set::elementary_trans_info &eti
6370 = rmi.elementary_trans_infos
6371 [std::make_tuple(pnode->elementary_name, mfo, mf)];
6373 std::make_shared<ga_instruction_elementary_trans_diverg_base>
6374 (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6375 workspace.elementary_transformation(pnode->elementary_name),
6376 *mfo, *mf, gis.ctx, eti.M, eti.icv);
6381 if (pgai) rmi.instructions.push_back(std::move(pgai));
6383 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6387 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6388 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6389 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6390 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6392 GMM_ASSERT1(!function_case,
"internal error");
6393 const mesh_fem *mf = workspace.associated_mf(pnode->name);
6394 const std::string &intn = pnode->interpolate_name;
6395 auto &sdi = rmi.secondary_domain_infos;
6397 fem_interpolation_context *pctx = &(sdi.ctx);
6398 papprox_integration pai = sdi.pai;
6399 psecondary_domain psd = workspace.secondary_domain(intn);
6401 GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6402 "The finite element of variable " << pnode->name <<
6403 " and the applied integration method have to be"
6404 " defined on the same mesh for secondary domain");
6408 if (sdi.pfps.count(mf) == 0) {
6410 pgai = std::make_shared<ga_instruction_update_pfp>
6411 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6412 rmi.begin_instructions.push_back(std::move(pgai));
6414 }
else if (sdi.pfps.count(mf) == 0 ||
6415 !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6416 rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6418 pgai = std::make_shared<ga_instruction_update_pfp>
6419 (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6420 rmi.instructions.push_back(std::move(pgai));
6424 pgai = pga_instruction();
6425 switch (pnode->node_type) {
6426 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6427 if (sdi.base.count(mf) == 0 ||
6428 !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6429 rmi.base_hierarchy[mf].push_back(if_hierarchy);
6430 pgai = std::make_shared<ga_instruction_val_base>
6431 (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6434 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6435 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6436 if (sdi.grad.count(mf) == 0 ||
6437 !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6438 rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6439 pgai = std::make_shared<ga_instruction_grad_base>
6440 (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6443 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6444 if (sdi.hess.count(mf) == 0 ||
6445 !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6446 rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6447 pgai = std::make_shared<ga_instruction_hess_base>
6448 (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6451 default : GMM_ASSERT1(
false,
"Internal error");
6453 if (pgai) rmi.instructions.push_back(std::move(pgai));
6456 switch(pnode->node_type) {
6457 case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6459 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6460 pnode->t.set_sparsity(1, mf->get_qdim());
6461 tensor_to_clear =
true;
6462 pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6463 (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6465 pgai = std::make_shared<ga_instruction_copy_val_base>
6466 (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6469 case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6471 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6472 pnode->t.set_sparsity(2, mf->get_qdim());
6473 tensor_to_clear =
true;
6474 pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6475 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6477 pgai = std::make_shared<ga_instruction_copy_grad_base>
6478 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6481 case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6483 pgai = std::make_shared<ga_instruction_copy_hess_base>
6484 (pnode->tensor(), sdi.hess[mf], mf->get_qdim());
6485 if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6486 pnode->t.set_sparsity(3, mf->get_qdim());
6488 case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6490 pgai = std::make_shared<ga_instruction_copy_diverg_base>
6491 (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6495 if (pgai) rmi.instructions.push_back(std::move(pgai));
6497 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6501 case GA_NODE_INTERPOLATE_VAL_TEST:
case GA_NODE_INTERPOLATE_GRAD_TEST:
6502 case GA_NODE_INTERPOLATE_HESS_TEST:
case GA_NODE_INTERPOLATE_DIVERG_TEST:
6504 const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6505 const std::string &intn = pnode->interpolate_name;
6506 const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6507 if (workspace.variable_group_exists(pnode->name)) {
6508 ga_instruction_set::variable_group_info &vgi =
6509 rmi.interpolate_infos[intn].groups_info[pnode->name];
6510 mfg = &(vgi.mf); mfn = 0;
6513 if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
6515 pgai = std::make_shared<ga_instruction_interpolate_val_base>
6516 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6517 workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
6519 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
6521 pgai = std::make_shared<ga_instruction_interpolate_grad_base>
6522 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6523 workspace.qdim(pnode->name),
6524 rmi.interpolate_infos[intn], gis.fp_pool);
6525 }
else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
6527 pgai = std::make_shared<ga_instruction_interpolate_hess_base>
6528 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6529 workspace.qdim(pnode->name),
6530 rmi.interpolate_infos[intn], gis.fp_pool);
6533 pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
6534 (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6535 workspace.qdim(pnode->name),
6536 rmi.interpolate_infos[intn], gis.fp_pool);
6538 rmi.instructions.push_back(std::move(pgai));
6539 workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6544 switch(pnode->op_type) {
6547 if (pnode->tensor().size() == 1) {
6548 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
6549 "Internal error: child0 not scalar");
6550 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
6551 "Internal error: child1 not scalar");
6552 pgai = std::make_shared<ga_instruction_scalar_add>
6553 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6555 pgai = std::make_shared<ga_instruction_add>
6556 (pnode->tensor(), child0->tensor(), child1->tensor());
6558 if (child0->t.sparsity() == child1->t.sparsity()
6559 && child0->t.qdim() == child1->t.qdim())
6560 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6561 rmi.instructions.push_back(std::move(pgai));
6565 if (pnode->tensor().size() == 1) {
6566 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
6567 "Internal error: child0 not scalar");
6568 GA_DEBUG_ASSERT(child1->tensor().size() == 1,
6569 "Internal error: child1 not scalar");
6570 pgai = std::make_shared<ga_instruction_scalar_sub>
6571 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6573 pgai = std::make_shared<ga_instruction_sub>
6574 (pnode->tensor(), child0->tensor(), child1->tensor());
6576 if (child0->t.sparsity() == child1->t.sparsity()
6577 && child0->t.qdim() == child1->t.qdim())
6578 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6579 rmi.instructions.push_back(std::move(pgai));
6582 case GA_UNARY_MINUS:
6583 if (pnode->tensor().size() == 1) {
6584 GA_DEBUG_ASSERT(child0->tensor().size() == 1,
"Internal error");
6585 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6586 (pnode->tensor()[0], child0->tensor()[0], minus);
6588 pgai = std::make_shared<ga_instruction_scalar_mult>
6589 (pnode->tensor(), child0->tensor(), minus);
6591 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6592 rmi.instructions.push_back(std::move(pgai));
6596 case GA_DOT:
case GA_COLON:
case GA_MULT:
6598 size_type tps0 = child0->tensor_proper_size();
6599 size_type tps1 = child1->tensor_proper_size();
6600 size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
6603 pgai = pga_instruction();
6604 if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
6605 (pnode->op_type == GA_COLON && dim1 <= 2) ||
6606 (pnode->op_type == GA_MULT && dim0 == 4) ||
6607 (pnode->op_type == GA_MULT && dim1 <= 1) ||
6608 child0->tensor().size() == 1 || tps1 == 1) {
6610 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6611 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6612 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6614 else if (child0->tensor().size() == 1) {
6615 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6616 pgai = std::make_shared<ga_instruction_scalar_mult>
6617 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6619 else if (child1->tensor().size() == 1) {
6620 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6621 pgai = std::make_shared<ga_instruction_scalar_mult>
6622 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6624 else if (pnode->test_function_type < 3) {
6627 pgai = ga_uniform_instruction_simple_tmult
6628 (pnode->tensor(), child0->tensor(), child1->tensor());
6630 pgai = std::make_shared<ga_instruction_simple_tmult>
6631 (pnode->tensor(), child0->tensor(), child1->tensor());
6635 pgai = ga_uniform_instruction_simple_tmult
6636 (pnode->tensor(), child1->tensor(), child0->tensor());
6638 pgai = std::make_shared<ga_instruction_simple_tmult>
6639 (pnode->tensor(), child1->tensor(), child0->tensor());
6640 }
else if (is_uniform)
6641 pgai = ga_uniform_instruction_contraction_switch
6642 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6644 pgai = ga_instruction_contraction_switch
6645 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6648 if (child1->test_function_type == 1 ||
6649 child1->test_function_type == 3) {
6650 if (child1->test_function_type == 3 ||
6651 child1->tensor_proper_size() <= s2) {
6654 pgai = ga_uniform_instruction_simple_tmult
6655 (pnode->tensor(), child1->tensor(), child0->tensor());
6657 pgai = std::make_shared<ga_instruction_simple_tmult>
6658 (pnode->tensor(), child1->tensor(), child0->tensor());
6659 }
else if (is_uniform)
6660 pgai = ga_uniform_instruction_contraction_switch
6661 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6663 pgai = ga_instruction_contraction_switch
6664 (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6666 pgai = std::make_shared<ga_instruction_spec_contraction>
6667 (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
6668 }
else if (child1->test_function_type == 0 ||
6669 (child0->tensor_proper_size() == s2 &&
6670 child1->tensor_proper_size() == s2)) {
6673 pgai = ga_uniform_instruction_simple_tmult
6674 (pnode->tensor(), child0->tensor(), child1->tensor());
6676 pgai = std::make_shared<ga_instruction_simple_tmult>
6677 (pnode->tensor(), child0->tensor(), child1->tensor());
6680 pgai = ga_uniform_instruction_contraction_switch
6681 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6683 pgai = ga_instruction_contraction_switch
6684 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6687 if (child0->tensor_proper_size() == s2)
6688 pgai = ga_uniform_instruction_contraction_switch
6689 (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6690 else if (child1->tensor_proper_size() == s2)
6691 pgai = std::make_shared<ga_instruction_spec_contraction>
6692 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6694 pgai = std::make_shared<ga_instruction_spec2_contraction>
6695 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6700 if (pnode->test_function_type < 3) {
6703 pgai = ga_uniform_instruction_simple_tmult
6704 (pnode->tensor(), child0->tensor(), child1->tensor());
6706 pgai = std::make_shared<ga_instruction_simple_tmult>
6707 (pnode->tensor(), child0->tensor(), child1->tensor());
6709 if (child1->test_function_type == 0)
6710 pgai = std::make_shared<ga_instruction_matrix_mult>
6711 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6713 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
6714 (pnode->tensor(), child0->tensor(), child1->tensor(),
6715 s2, tps0/s2, tps1/s2);
6718 if (child0->tensor_proper_size() == 1) {
6719 if (child0->test_function_type == 0 ||
6720 child0->test_function_type == 1) {
6722 pgai = ga_uniform_instruction_simple_tmult
6723 (pnode->tensor(), child0->tensor(), child1->tensor());
6725 pgai = std::make_shared<ga_instruction_simple_tmult>
6726 (pnode->tensor(), child0->tensor(), child1->tensor());
6728 pgai = std::make_shared<ga_instruction_spec_tmult>
6729 (pnode->tensor(), child1->tensor(), child0->tensor(),
6732 if (child1->test_function_type == 0)
6733 pgai = std::make_shared<ga_instruction_matrix_mult>
6734 (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6735 else if (child1->test_function_type == 2)
6736 pgai = std::make_shared<ga_instruction_matrix_mult_spec>
6737 (pnode->tensor(), child0->tensor(), child1->tensor(),
6738 s2, tps0/s2, tps1/s2);
6740 pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
6741 (pnode->tensor(), child0->tensor(), child1->tensor(),
6742 s2, tps0/s2, tps1/s2);
6746 rmi.instructions.push_back(std::move(pgai));
6751 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6752 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
6753 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6754 }
else if (child1->tensor().size() == 1) {
6755 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6756 pgai = std::make_shared<ga_instruction_scalar_div>
6757 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6758 }
else GMM_ASSERT1(
false,
"Internal error");
6759 rmi.instructions.push_back(std::move(pgai));
6763 pnode->t.set_to_copy(child0->t);
6764 pgai = std::make_shared<ga_instruction_print_tensor>
6765 (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
6766 rmi.instructions.push_back(std::move(pgai));
6770 if (pnode->tensor_proper_size() > 1) {
6771 size_type n1 = child0->tensor_proper_size(0);
6772 size_type n2 = (child0->tensor_order() > 1) ?
6773 child0->tensor_proper_size(1) : 1;
6775 for (
size_type i = 2; i < child0->tensor_order(); ++i)
6776 nn *= child0->tensor_proper_size(i);
6777 if (child0->nb_test_functions() == 0)
6778 pgai = std::make_shared<ga_instruction_transpose_no_test>
6779 (pnode->tensor(), child0->tensor(), n1, n2, nn);
6781 pgai = std::make_shared<ga_instruction_transpose>
6782 (pnode->tensor(), child0->tensor(), n1, n2, nn);
6783 rmi.instructions.push_back(std::move(pgai));
6785 pnode->t.set_to_copy(child0->t);
6790 if (pnode->tensor_proper_size() != 1) {
6791 pgai = std::make_shared<ga_instruction_sym>
6792 (pnode->tensor(), child0->tensor());
6793 rmi.instructions.push_back(std::move(pgai));
6795 pnode->t.set_to_copy(child0->t);
6801 pgai = std::make_shared<ga_instruction_skew>
6802 (pnode->tensor(), child0->tensor());
6803 rmi.instructions.push_back(std::move(pgai));
6809 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
6811 pnode->t.set_to_copy(child0->t);
6813 pgai = std::make_shared<ga_instruction_trace>
6814 (pnode->tensor(), child0->tensor(), N);
6815 rmi.instructions.push_back(std::move(pgai));
6822 size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
6823 pgai = std::make_shared<ga_instruction_deviator>
6824 (pnode->tensor(), child0->tensor(), N);
6825 rmi.instructions.push_back(std::move(pgai));
6831 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6832 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6833 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6834 }
else if (child0->tensor().size() == 1) {
6835 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6836 pgai = std::make_shared<ga_instruction_scalar_mult>
6837 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6839 else if (child1->tensor().size() == 1) {
6840 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6841 pgai = std::make_shared<ga_instruction_scalar_mult>
6842 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6844 else if (child1->test_function_type == 0)
6845 pgai = std::make_shared<ga_instruction_dotmult>
6846 (pnode->tensor(), child0->tensor(), child1->tensor());
6847 else if (child0->test_function_type == 0)
6848 pgai = std::make_shared<ga_instruction_dotmult>
6849 (pnode->tensor(), child1->tensor(), child0->tensor());
6850 else if (child0->test_function_type == 1)
6851 pgai = std::make_shared<ga_instruction_dotmult_spec>
6852 (pnode->tensor(), child0->tensor(), child1->tensor());
6854 pgai = std::make_shared<ga_instruction_dotmult_spec>
6855 (pnode->tensor(), child1->tensor(), child0->tensor());
6857 rmi.instructions.push_back(std::move(pgai));
6862 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6863 pgai = std::make_shared<ga_instruction_scalar_scalar_div>
6864 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6865 }
else if (child1->tensor().size() == 1) {
6866 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6867 pgai = std::make_shared<ga_instruction_scalar_div>
6868 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6869 }
else if (child1->test_function_type == 0) {
6870 pgai = std::make_shared<ga_instruction_dotdiv>
6871 (pnode->tensor(), child0->tensor(), child1->tensor());
6872 }
else GMM_ASSERT1(
false,
"Internal error");
6873 rmi.instructions.push_back(std::move(pgai));
6878 if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6879 pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6880 (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6881 }
else if (child0->tensor().size() == 1) {
6882 pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6883 pgai = std::make_shared<ga_instruction_scalar_mult>
6884 (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6886 else if (child1->tensor().size() == 1) {
6887 pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6888 pgai = std::make_shared<ga_instruction_scalar_mult>
6889 (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6891 else if (child1->test_function_type == 0) {
6893 pgai = ga_uniform_instruction_simple_tmult
6894 (pnode->tensor(), child0->tensor(), child1->tensor());
6896 pgai = std::make_shared<ga_instruction_simple_tmult>
6897 (pnode->tensor(), child0->tensor(), child1->tensor());
6898 }
else if (child1->tensor_proper_size() == 1)
6899 pgai = std::make_shared<ga_instruction_spec2_tmult>
6900 (pnode->tensor(), child0->tensor(), child1->tensor());
6902 pgai = std::make_shared<ga_instruction_spec_tmult>
6903 (pnode->tensor(), child0->tensor(), child1->tensor(),
6904 child0->tensor_proper_size(),
6905 child1->tensor_proper_size());
6907 rmi.instructions.push_back(std::move(pgai));
6910 default:GMM_ASSERT1(
false,
"Unexpected operation. Internal error.");
6914 case GA_NODE_C_MATRIX:
6916 if (pnode->test_function_type) {
6917 std::vector<const base_tensor *> components(pnode->children.size());
6918 for (
size_type i = 0; i < pnode->children.size(); ++i)
6919 components[i] = &(pnode->children[i]->tensor());
6920 pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
6921 (pnode->tensor(), components);
6923 std::vector<scalar_type *> components(pnode->children.size());
6924 for (
size_type i = 0; i < pnode->children.size(); ++i)
6925 components[i] = &(pnode->children[i]->tensor()[0]);
6926 pgai = std::make_shared<ga_instruction_simple_c_matrix>
6927 (pnode->tensor(), components);
6929 rmi.instructions.push_back(std::move(pgai));
6933 case GA_NODE_PARAMS:
6934 if (child0->node_type == GA_NODE_RESHAPE) {
6935 pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
6937 rmi.instructions.push_back(std::move(pgai));
6938 }
else if (child0->node_type == GA_NODE_CROSS_PRODUCT) {
6939 pga_tree_node child2 = pnode->children[2];
6940 if (child1->test_function_type==2 && child2->test_function_type==1)
6941 pgai = std::make_shared<ga_instruction_cross_product_tf>
6942 (pnode->tensor(), child2->tensor(), child1->tensor(),
true);
6943 else if (child1->test_function_type || child2->test_function_type)
6944 pgai = std::make_shared<ga_instruction_cross_product_tf>
6945 (pnode->tensor(), child1->tensor(), child2->tensor(),
false);
6947 pgai = std::make_shared<ga_instruction_cross_product>
6948 (pnode->tensor(), child1->tensor(), child2->tensor());
6949 rmi.instructions.push_back(std::move(pgai));
6950 }
else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
6952 ind =
size_type(round(pnode->children[2]->tensor()[0])-1);
6954 for (
size_type i = 0; i < child1->tensor_order(); ++i)
6955 if (i>ind) ii2 *= child1->tensor_proper_size(i);
6956 size_type nn = child1->tensor_proper_size(ind);
6957 pgai = std::make_shared<ga_instruction_index_move_last>
6958 (pnode->tensor(), child1->tensor(), nn, ii2);
6959 rmi.instructions.push_back(std::move(pgai));
6960 }
else if (child0->node_type == GA_NODE_SWAP_IND) {
6963 ind[i] =
size_type(round(pnode->children[i]->tensor()[0])-1);
6964 if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
6966 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
6967 if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
6968 if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
6970 size_type nn1 = child1->tensor_proper_size(ind[2]);
6971 size_type nn2 = child1->tensor_proper_size(ind[3]);
6973 pgai = std::make_shared<ga_instruction_swap_indices>
6974 (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
6975 rmi.instructions.push_back(std::move(pgai));
6976 }
else if (child0->node_type == GA_NODE_CONTRACT) {
6977 std::vector<size_type> ind(2), indsize(2);
6978 pga_tree_node child2(0);
6979 if (pnode->children.size() == 4)
6980 { ind[0] = 2; ind[1] = 3; }
6981 else if (pnode->children.size() == 5)
6982 { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
6983 else if (pnode->children.size() == 7) {
6984 ind.resize(4); indsize.resize(4);
6985 ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
6986 child2 = pnode->children[4];
6989 for (
size_type i = 1; i < pnode->children.size(); ++i) {
6991 ind[kk] =
size_type(round(pnode->children[i]->tensor()[0])-1);
6992 indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
6997 if (pnode->children.size() == 4) {
6999 if (i1 > i2) std::swap(i1, i2);
7001 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7002 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7003 if (i > i2) ii3 *= child1->tensor_proper_size(i);
7005 pgai = std::make_shared<ga_instruction_contract_1_1>
7006 (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
7008 else if (pnode->children.size() == 5) {
7011 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
7012 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7013 if (i < i1) ii1 *= child1->tensor_proper_size(i);
7014 if (i > i1) ii2 *= child1->tensor_proper_size(i);
7016 for (
size_type i = 0; i < child2->tensor_order(); ++i) {
7017 if (i < i2) ii3 *= child2->tensor_proper_size(i);
7018 if (i > i2) ii4 *= child2->tensor_proper_size(i);
7020 if (child1->test_function_type==1 && child2->test_function_type==2)
7021 pgai = std::make_shared<ga_instruction_contract_2_1_rev>
7022 (pnode->tensor(), child1->tensor(), child2->tensor(),
7023 indsize[0], ii1, ii2, ii3, ii4);
7025 pgai = std::make_shared<ga_instruction_contract_2_1>
7026 (pnode->tensor(), child1->tensor(), child2->tensor(),
7027 indsize[0], ii1, ii2, ii3, ii4);
7029 else if (pnode->children.size() == 7) {
7031 size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
7032 size_type nn1 = indsize[0], nn2 = indsize[1];
7033 size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
7035 { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
7036 for (
size_type i = 0; i < child1->tensor_order(); ++i) {
7037 if (i < i1) ii1 *= child1->tensor_proper_size(i);
7038 if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7039 if (i > i2) ii3 *= child1->tensor_proper_size(i);
7041 for (
size_type i = 0; i < child2->tensor_order(); ++i) {
7042 if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
7043 if ((i > i3 && i < i4) || (i > i4 && i < i3))
7044 ii5 *= child2->tensor_proper_size(i);
7045 if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
7047 if (child1->test_function_type==1 && child2->test_function_type==2)
7048 pgai = std::make_shared<ga_instruction_contract_2_2_rev>
7049 (pnode->tensor(), child1->tensor(), child2->tensor(),
7050 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7052 pgai = std::make_shared<ga_instruction_contract_2_2>
7053 (pnode->tensor(), child1->tensor(), child2->tensor(),
7054 nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7056 rmi.instructions.push_back(std::move(pgai));
7057 }
else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
7059 std::string name = child0->name;
7060 const ga_predef_function_tab &PREDEF_FUNCTIONS
7062 ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
7063 const ga_predef_function &F = it->second;
7065 pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
7068 if (child1->tensor().size() == 1) {
7070 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
7071 (pnode->tensor()[0], child1->tensor()[0], F.f1());
7073 pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
7074 (pnode->tensor()[0], child1->tensor()[0], F);
7077 pgai = std::make_shared<ga_instruction_eval_func_1arg>
7078 (pnode->tensor(), child1->tensor(), F.f1());
7080 pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
7081 (pnode->tensor(), child1->tensor(), F);
7084 if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
7086 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
7087 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7090 pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
7091 (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7093 }
else if (child1->tensor().size() == 1) {
7096 std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
7097 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7100 std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
7101 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7102 }
else if (child2->tensor().size() == 1) {
7105 std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
7106 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7109 std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
7110 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7113 pgai = std::make_shared<ga_instruction_eval_func_2arg>
7114 (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7116 pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
7117 (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7120 rmi.instructions.push_back(std::move(pgai));
7122 }
else if (child0->node_type == GA_NODE_SPEC_FUNC) {
7124 GMM_ASSERT1(
false,
"Internal error");
7126 }
else if (child0->node_type == GA_NODE_OPERATOR) {
7128 ga_predef_operator_tab &PREDEF_OPERATORS
7130 ga_predef_operator_tab::T::iterator it
7131 = PREDEF_OPERATORS.tab.find(child0->name);
7132 const ga_nonlinear_operator &OP = *(it->second);
7133 ga_nonlinear_operator::arg_list args;
7134 for (
size_type i = 1; i < pnode->children.size(); ++i)
7135 args.push_back(&(pnode->children[i]->tensor()));
7137 if (child0->der1 && child0->der2 == 0) {
7138 pgai = std::make_shared<ga_instruction_eval_derivative_OP>
7139 (pnode->tensor(), OP, args, child0->der1);
7140 }
else if (child0->der1 && child0->der2) {
7141 pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
7142 (pnode->tensor(), OP, args, child0->der1, child0->der2);
7144 pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
7147 rmi.instructions.push_back(std::move(pgai));
7150 bgeot::multi_index mi1(size0.size()), indices;
7151 size_type nb_test = pnode->nb_test_functions();
7152 if (pnode->tensor().size() == 1) {
7153 for (
size_type i = 0; i < child0->tensor_order(); ++i)
7154 mi1[i+nb_test] =
size_type(round(pnode->children[i+1]->tensor()[0])-1);
7155 pgai = std::make_shared<ga_instruction_copy_scalar>
7156 (pnode->tensor()[0], child0->tensor()(mi1));
7158 for (
size_type i = 0; i < nb_test; ++i) indices.push_back(i);
7159 for (
size_type i = 0; i < child0->tensor_order(); ++i) {
7160 if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
7162 =
size_type(round(pnode->children[i+1]->tensor()[0])- 1);
7164 indices.push_back(i+nb_test);
7166 pgai = std::make_shared<ga_instruction_tensor_slice>
7167 (pnode->tensor(), child0->tensor(), mi1, indices);
7169 rmi.instructions.push_back(std::move(pgai));
7174 default:GMM_ASSERT1(
false,
"Unexpected node type " << pnode->node_type
7175 <<
" in compilation. Internal error.");
7177 if (tensor_to_clear) {
7180 pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
7181 rmi.elt_instructions.push_back(std::move(pgai));
7184 rmi.node_list[pnode->hash_value].push_back(pnode);
7187 void ga_compile_function(ga_workspace &workspace,
7188 ga_instruction_set &gis,
bool scalar) {
7189 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7190 const ga_workspace::tree_description &td = workspace.tree_info(i);
7192 gis.trees.push_back(*(td.ptree));
7193 pga_tree_node root = gis.trees.back().root;
7195 GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
7196 "The result of the given expression is not a scalar");
7197 ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7198 gis.all_instructions[rm].m = td.m;
7199 ga_if_hierarchy if_hierarchy;
7200 ga_compile_node(root, workspace, gis, gis.all_instructions[rm],
7201 *(td.m),
true, if_hierarchy);
7203 gis.coeff = scalar_type(1);
7204 pga_instruction pgai;
7205 workspace.assembled_tensor() = root->tensor();
7206 pgai = std::make_shared<ga_instruction_add_to_coeff>
7207 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7208 gis.all_instructions[rm].instructions.push_back(std::move(pgai));
7213 static bool ga_node_used_interpolates
7214 (
const pga_tree_node pnode,
const ga_workspace &workspace,
7215 std::map<std::string, std::set<std::string> > &interpolates,
7216 std::set<std::string> &interpolates_der) {
7218 bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
7219 pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
7220 pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
7221 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
7222 bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
7223 pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
7224 pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
7225 pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
7227 if (intrpl || intrpl_test ||
7228 pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
7229 pnode->node_type == GA_NODE_INTERPOLATE_X ||
7230 pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
7231 interpolates[pnode->interpolate_name].size();
7232 if (intrpl || intrpl_test) {
7233 if (workspace.variable_group_exists(pnode->name))
7234 interpolates[pnode->interpolate_name].insert(pnode->name);
7238 if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
7239 interpolates_der.insert(pnode->interpolate_name_der);
7240 interpolates[pnode->interpolate_name_der].size();
7241 if (workspace.variable_group_exists(pnode->name))
7242 interpolates[pnode->interpolate_name_der].insert(pnode->name);
7244 for (
size_type i = 0; i < pnode->children.size(); ++i)
7245 found = ga_node_used_interpolates(pnode->children[i], workspace,
7246 interpolates, interpolates_der)
7252 static void ga_compile_interpolate_trans
7253 (
const pga_tree_node pnode,
const ga_workspace &workspace,
7254 ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
7257 std::set<std::string> interpolates_der;
7258 std::map<std::string, std::set<std::string> > transformations;
7259 ga_node_used_interpolates(pnode, workspace, transformations,
7262 for (
const auto &transformation : transformations) {
7263 const std::string &transname = transformation.first;
7264 bool compute_der = (interpolates_der.count(transname) != 0);
7265 if (rmi.transformations.count(transname) == 0 ||
7266 (compute_der && rmi.transformations_der.count(transname) == 0)) {
7267 rmi.transformations[transname].size();
7268 gis.transformations.insert(transname);
7269 if (compute_der) rmi.transformations_der.insert(transname);
7270 pga_instruction pgai;
7271 if (transname.compare(
"neighbor_element") == 0 ||
7272 transname.compare(
"neighbour_elt") == 0) {
7273 pgai = std::make_shared<ga_instruction_neighbor_transformation_call>
7274 (workspace, rmi.interpolate_infos[transname],
7275 workspace.interpolate_transformation(transname), gis.ctx,
7276 m, gis.ipt, gis.pai, gis.gp_pool, gis.neighbor_corresp);
7278 pgai = std::make_shared<ga_instruction_transformation_call>
7279 (workspace, rmi.interpolate_infos[transname],
7280 workspace.interpolate_transformation(transname), gis.ctx,
7281 gis.Normal, m, compute_der);
7283 if (pgai) rmi.instructions.push_back(std::move(pgai));
7286 for (
const std::string &nodename : transformation.second) {
7287 if (rmi.transformations[transname].count(nodename) == 0) {
7288 auto&& inin = rmi.interpolate_infos[transname];
7289 pga_instruction pgai =
7290 std::make_shared<ga_instruction_update_group_info>
7291 (workspace, gis, inin, nodename, inin.groups_info[nodename]);
7292 rmi.instructions.push_back(std::move(pgai));
7293 rmi.transformations[transname].insert(nodename);
7299 void ga_compile_interpolation(ga_workspace &workspace,
7300 ga_instruction_set &gis) {
7301 gis.transformations.clear();
7302 gis.all_instructions.clear();
7303 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7304 const ga_workspace::tree_description &td = workspace.tree_info(i);
7305 if (td.operation != ga_workspace::ASSEMBLY) {
7306 gis.trees.push_back(*(td.ptree));
7309 const mesh *m = td.m;
7310 GMM_ASSERT1(m,
"Internal error");
7311 ga_semantic_analysis(gis.trees.back(), workspace, *m,
7312 ref_elt_dim_of_mesh(*m, *(td.rg)),
true,
false);
7313 pga_tree_node root = gis.trees.back().root;
7316 ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7317 auto &rmi = gis.all_instructions[rm];
7321 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7322 ga_compile_node(root, workspace, gis,rmi, *(td.m),
false,
7323 rmi.current_hierarchy);
7326 workspace.assembled_tensor() = root->tensor();
7327 pga_instruction pgai = std::make_shared<ga_instruction_add_to>
7328 (workspace.assembled_tensor(), root->tensor());
7329 rmi.instructions.push_back(std::move(pgai));
7336 struct var_set : std::map<std::string,size_type> {
7338 size_type operator[](
const std::string &name) {
7341 auto it = find(name);
7348 std::string operator[](
const size_type &
id)
const {
7349 for (
const auto &key_value : *
this)
7350 if (key_value.second ==
id)
7351 return key_value.first;
7352 return std::string(
"");
7357 struct condensation_description {
7358 var_set Ivars, Jvars, Qvars;
7361 std::vector<std::set<size_type>> Qclusters, Jclusters;
7367 std::vector<size_type> cluster_of_Qvar;
7369 gmm::dense_matrix<base_tensor *> KQQ,
7373 std::vector<base_tensor *> RI,
7377 void ga_compile(ga_workspace &workspace,
7378 ga_instruction_set &gis,
size_type order,
bool condensation) {
7379 gis.transformations.clear();
7380 gis.all_instructions.clear();
7381 gis.unreduced_terms.clear();
7382 workspace.clear_temporary_variable_intervals();
7384 std::map<const ga_instruction_set::region_mim, condensation_description>
7387 if (condensation && order == 2) {
7388 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7389 ga_workspace::tree_description &td = workspace.tree_info(i);
7390 if (td.order != 2 && td.order !=
size_type(-1))
7392 ga_tree tree(*(td.ptree));
7393 ga_semantic_analysis(tree, workspace, td.mim->linked_mesh(),
7394 ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
7396 pga_tree_node root = tree.root;
7399 v1_is_intern = workspace.is_internal_variable(root->name_test1),
7400 v2_is_intern = workspace.is_internal_variable(root->name_test2);
7401 if (v1_is_intern || v2_is_intern) {
7402 GMM_ASSERT1(tree.secondary_domain.empty(),
7403 "Condensed variable cannot be used in secondary domain");
7405 for (
const auto &key_val : condensations) {
7406 const ga_instruction_set::region_mim rm0 = key_val.first;
7407 const condensation_description &CC0 = key_val.second;
7408 if (rm0.mim() == td.mim && rm0.region() != td.rg
7409 && (CC0.Qvars.count(root->name_test1) ||
7410 CC0.Qvars.count(root->name_test2))) {
7412 (*(rm0.region()), *(td.rg));
7413 GMM_ASSERT1(intrsct.is_empty(),
7414 "Cannot condense coupled variables between "
7415 "intersecting regions");
7418 const ga_instruction_set::region_mim rm(td.mim, td.rg,
nullptr);
7420 condensation_description &CC = condensations[rm];
7422 q1 = v1_is_intern ? CC.Qvars[root->name_test1] :
size_type(-1),
7423 q2 = v2_is_intern ? CC.Qvars[root->name_test2] :
size_type(-1);
7425 std::vector<size_type> selected_clusters;
7426 for (
size_type j=0; j < CC.Qclusters.size(); ++j)
7427 if (CC.Qclusters[j].count(q1) || CC.Qclusters[j].count(q2))
7428 selected_clusters.push_back(j);
7430 if (selected_clusters.empty()) {
7431 CC.Qclusters.push_back(std::set<size_type>());
7432 if (q1 !=
size_type(-1)) CC.Qclusters.back().insert(q1);
7433 if (q2 !=
size_type(-1)) CC.Qclusters.back().insert(q2);
7435 auto &target = CC.Qclusters[selected_clusters[0]];
7436 if (q1 !=
size_type(-1)) target.insert(q1);
7437 if (q2 !=
size_type(-1)) target.insert(q2);
7438 for (
size_type j=selected_clusters.size()-1; j > 1; --j) {
7439 auto &source = CC.Qclusters[selected_clusters[j]];
7440 target.insert(source.begin(), source.end());
7441 CC.Qclusters.erase(CC.Qclusters.begin() + selected_clusters[j]);
7448 for (
auto &key_value : condensations) {
7449 condensation_description &CC = key_value.second;
7458 CC.Jclusters.resize(CC.Qclusters.size());
7460 CC.cluster_of_Qvar.resize(Qsize);
7461 for (
size_type i=0; i < CC.Qclusters.size(); ++i)
7462 for (
const size_type &var : CC.Qclusters[i])
7463 CC.cluster_of_Qvar[var] = i;
7468 CC.KQQ.resize(Qsize, Qsize);
7469 CC.RQpr.resize(Qsize);
7471 bgeot::multi_index mi(1);
7472 mi[0] = workspace.associated_im_data(CC.Qvars[q]) ->nb_tensor_elem();
7473 gis.condensation_tensors.push_back
7474 (std::make_shared<base_tensor>(mi));
7475 CC.RQpr[q] = gis.condensation_tensors.back().get();
7480 std::array<ga_workspace::operation_type,3>
7481 phases{ga_workspace::PRE_ASSIGNMENT,
7482 ga_workspace::ASSEMBLY,
7483 ga_workspace::POST_ASSIGNMENT};
7484 for (
const auto &phase : phases) {
7486 for (
size_type i = 0; i < workspace.nb_trees(); ++i) {
7487 ga_workspace::tree_description &td = workspace.tree_info(i);
7488 if (td.operation != phase)
7491 if (td.order == order || td.order ==
size_type(-1)) {
7492 std::list<ga_tree> &trees = (phase == ga_workspace::ASSEMBLY)
7494 : gis.interpolation_trees;
7495 trees.push_back(*(td.ptree));
7497 ga_semantic_analysis(trees.back(), workspace, td.mim->linked_mesh(),
7498 ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
7500 pga_tree_node root = trees.back().root;
7505 psecondary_domain psd(0);
7506 if (trees.back().secondary_domain.size())
7507 psd = workspace.secondary_domain(trees.back().secondary_domain);
7508 ga_instruction_set::region_mim rm(td.mim, td.rg, psd);
7509 auto &rmi = gis.all_instructions[rm];
7513 ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7514 ga_compile_node(root, workspace, gis, rmi, *(td.m),
false,
7515 rmi.current_hierarchy);
7519 if (phase != ga_workspace::ASSEMBLY) {
7520 if (!td.varname_interpolation.empty()) {
7522 = workspace.associated_im_data(td.varname_interpolation);
7523 auto &V =
const_cast<model_real_plain_vector &
>
7524 (workspace.value(td.varname_interpolation));
7525 GMM_ASSERT1(imd,
"Internal error");
7526 auto pgai = std::make_shared<ga_instruction_assignment>
7527 (root->tensor(), V, gis.ctx, imd);
7528 rmi.instructions.push_back(std::move(pgai));
7531 pga_instruction pgai;
7534 workspace.assembled_tensor() = root->tensor();
7535 pgai = std::make_shared<ga_instruction_add_to_coeff>
7536 (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7540 GMM_ASSERT1(root->tensor_proper_size() == 1,
7541 "Invalid vector or tensor quantity. An order 1 "
7542 "weak form has to be a scalar quantity");
7543 const mesh_fem *
const
7544 mf = workspace.associated_mf(root->name_test1);
7545 const im_data *
const
7546 imd = workspace.associated_im_data(root->name_test1);
7547 workspace.add_temporary_interval_for_unreduced_variable
7550 base_vector &Vu = workspace.unreduced_vector(),
7551 &Vr = workspace.assembled_vector();
7553 const std::string &intn1 = root->interpolate_name_test1;
7554 bool secondary = !intn1.empty() &&
7555 workspace.secondary_domain_exists(intn1);
7556 fem_interpolation_context
7557 &ctx = intn1.empty() ? gis.ctx
7558 : (secondary ? rmi.secondary_domain_infos.ctx
7559 : rmi.interpolate_infos[intn1].ctx);
7561 !(intn1.empty() || intn1 ==
"neighbor_element"
7562 || intn1 ==
"neighbour_elt" || secondary);
7564 if (intn1.size() && !secondary &&
7565 workspace.variable_group_exists(root->name_test1)) {
7566 ga_instruction_set::variable_group_info
7567 &vgi = rmi.interpolate_infos[intn1]
7568 .groups_info[root->name_test1];
7569 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
7570 (root->tensor(), Vr, Vu, ctx,
7571 vgi.I, vgi.mf, vgi.reduced_mf,
7572 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7573 for (
const std::string &name
7574 : workspace.variable_group(root->name_test1))
7575 gis.unreduced_terms.emplace(name,
"");
7577 base_vector &V = mf->is_reduced() ? Vu : Vr;
7578 const gmm::sub_interval
7579 &I = mf->is_reduced()
7580 ? workspace.temporary_interval_of_variable
7582 : workspace.interval_of_variable(root->name_test1);
7583 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
7584 (root->tensor(), V, ctx, I, *mf,
7585 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7586 if (mf->is_reduced())
7587 gis.unreduced_terms.emplace(root->name_test1,
"");
7590 GMM_ASSERT1(root->interpolate_name_test1.size() == 0,
7591 "Interpolate transformation on integration "
7593 if (!workspace.is_internal_variable(root->name_test1) ||
7595 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
7596 (root->tensor(), Vr, gis.ctx,
7597 workspace.interval_of_variable(root->name_test1),
7598 *imd, gis.coeff, gis.ipt);
7601 pgai = std::make_shared<ga_instruction_vector_assembly>
7602 (root->tensor(), Vr,
7603 workspace.interval_of_variable(root->name_test1),
7609 GMM_ASSERT1(root->tensor_proper_size() == 1,
7610 "Invalid vector or tensor quantity. An order 2 "
7611 "weak form has to be a scalar quantity");
7612 const mesh_fem *mf1=workspace.associated_mf(root->name_test1),
7613 *mf2=workspace.associated_mf(root->name_test2);
7615 *imd1 = workspace.associated_im_data(root->name_test1),
7616 *imd2 = workspace.associated_im_data(root->name_test2);
7617 const std::string &intn1 = root->interpolate_name_test1,
7618 &intn2 = root->interpolate_name_test2;
7619 bool secondary1 = intn1.size() &&
7620 workspace.secondary_domain_exists(intn1);
7621 bool secondary2 = intn2.size() &&
7622 workspace.secondary_domain_exists(intn2);
7623 fem_interpolation_context
7624 &ctx1 = intn1.empty() ? gis.ctx
7625 : (secondary1 ? rmi.secondary_domain_infos.ctx
7626 : rmi.interpolate_infos[intn1].ctx),
7627 &ctx2 = intn2.empty() ? gis.ctx
7628 : (secondary2 ? rmi.secondary_domain_infos.ctx
7629 : rmi.interpolate_infos[intn2].ctx);
7630 bool interpolate = !(intn1.empty() || intn1 ==
"neighbor_element"
7631 || intn1 ==
"neighbour_elt"
7633 !(intn2.empty() || intn2 ==
"neighbor_element"
7634 || intn2 ==
"neighbour_elt"
7637 workspace.add_temporary_interval_for_unreduced_variable
7639 workspace.add_temporary_interval_for_unreduced_variable
7642 bool has_var_group1 = (!intn1.empty() && !secondary1 &&
7643 workspace.variable_group_exists
7644 (root->name_test1));
7645 bool has_var_group2 = (!intn2.empty() && !secondary2 &&
7646 workspace.variable_group_exists
7647 (root->name_test2));
7648 bool simple = !interpolate &&
7649 !has_var_group1 && !has_var_group2 &&
7650 mf1 && !(mf1->is_reduced()) &&
7651 mf2 && !(mf2->is_reduced());
7654 auto &Krr = workspace.assembled_matrix();
7655 auto &Kru = workspace.col_unreduced_matrix();
7656 auto &Kur = workspace.row_unreduced_matrix();
7657 auto &Kuu = workspace.row_col_unreduced_matrix();
7660 const gmm::sub_interval
7661 &I1 = workspace.interval_of_variable(root->name_test1),
7662 &I2 = workspace.interval_of_variable(root->name_test2);
7664 &alpha1 = workspace.factor_of_variable(root->name_test1),
7665 &alpha2 = workspace.factor_of_variable(root->name_test2);
7666 if (mf1->get_qdim() == 1 && mf2->get_qdim() == 1)
7667 pgai = std::make_shared
7668 <ga_instruction_matrix_assembly_standard_scalar>
7669 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7670 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7671 else if (root->sparsity() == 10 && root->t.qdim() == 2)
7672 pgai = std::make_shared
7673 <ga_instruction_matrix_assembly_standard_vector_opt10<2>>
7674 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7675 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7676 else if (root->sparsity() == 10 && root->t.qdim() == 3)
7677 pgai = std::make_shared
7678 <ga_instruction_matrix_assembly_standard_vector_opt10<3>>
7679 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7680 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7682 pgai = std::make_shared
7683 <ga_instruction_matrix_assembly_standard_vector>
7684 (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7685 alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7686 }
else if (condensation &&
7687 workspace.is_internal_variable(root->name_test1) &&
7688 workspace.is_internal_variable(root->name_test2)) {
7692 GMM_ASSERT1(imd1 && imd2,
"Internal error");
7693 GMM_ASSERT1(!interpolate,
"Internal error");
7697 condensation_description &CC = condensations[rm];
7698 GMM_ASSERT1(CC.Qvars.count(root->name_test1) > 0 &&
7699 CC.Qvars.count(root->name_test2) > 0,
7701 size_type q1 = CC.Qvars[root->name_test1],
7702 q2 = CC.Qvars[root->name_test2];
7703 if (!CC.KQQ(q1,q2)) {
7705 gis.condensation_tensors.push_back
7706 (std::make_shared<base_tensor>(s1,s2));
7707 CC.KQQ(q1,q2) = gis.condensation_tensors.back().get();
7708 pgai = std::make_shared<ga_instruction_copy_vect>
7709 (CC.KQQ(q1,q2)->as_vector(), root->tensor().as_vector());
7712 pgai = std::make_shared<ga_instruction_add_to>
7713 (*CC.KQQ(q1,q2), root->tensor());
7715 rmi.instructions.push_back(std::move(pgai));
7716 }
else if (condensation &&
7717 workspace.is_internal_variable(root->name_test1)) {
7721 GMM_ASSERT1(imd1,
"Internal error");
7722 GMM_ASSERT1(!interpolate,
"Internal error");
7725 condensation_description &CC = condensations[rm];
7726 GMM_ASSERT1(CC.Qvars.count(root->name_test1),
7728 size_type q1 = CC.Qvars[root->name_test1],
7729 j2 = CC.Jvars[root->name_test2];
7730 CC.Jclusters[CC.cluster_of_Qvar[q1]].insert(j2);
7731 if (q1 >= CC.KQJ.nrows() || j2 >= CC.KQJ.ncols())
7732 CC.KQJ.resize(std::max(CC.KQJ.nrows(), q1+1),
7733 std::max(CC.KQJ.ncols(), j2+1));
7734 if (!CC.KQJ(q1,j2)) {
7738 gis.condensation_tensors.push_back
7739 (std::make_shared<base_tensor>(root->tensor()));
7740 GMM_ASSERT1(root->tensor().size(0) == s1,
"Internal error");
7741 CC.KQJ(q1,j2) = gis.condensation_tensors.back().get();
7742 pgai = std::make_shared<ga_instruction_copy_vect>
7743 (CC.KQJ(q1,j2)->as_vector(), root->tensor().as_vector());
7747 pgai = std::make_shared<ga_instruction_add_to>
7748 (*CC.KQJ(q1,j2), root->tensor());
7750 rmi.instructions.push_back(std::move(pgai));
7751 }
else if (condensation &&
7752 workspace.is_internal_variable(root->name_test2)) {
7756 GMM_ASSERT1(imd2,
"Internal error");
7757 GMM_ASSERT1(!interpolate,
"Internal error");
7760 condensation_description &CC = condensations[rm];
7761 GMM_ASSERT1(CC.Qvars.count(root->name_test2),
7763 size_type i1 = CC.Ivars[root->name_test1],
7764 q2 = CC.Qvars[root->name_test2];
7765 if (i1 >= CC.KIQ.nrows() || q2 >= CC.KIQ.ncols())
7766 CC.KIQ.resize(std::max(CC.KIQ.nrows(), i1+1),
7767 std::max(CC.KIQ.ncols(), q2+1));
7768 if (!CC.KIQ(i1,q2)) {
7772 gis.condensation_tensors.push_back
7773 (std::make_shared<base_tensor>(root->tensor()));
7774 GMM_ASSERT1(root->tensor().size(1) == s2,
7776 CC.KIQ(i1,q2) = gis.condensation_tensors.back().get();
7777 pgai = std::make_shared<ga_instruction_copy_vect>
7778 (CC.KIQ(i1,q2)->as_vector(), root->tensor().as_vector());
7782 pgai = std::make_shared<ga_instruction_add_to>
7783 (*CC.KIQ(i1,q2), root->tensor());
7785 rmi.instructions.push_back(std::move(pgai));
7786 }
else if (!workspace.is_internal_variable(root->name_test1) &&
7787 !workspace.is_internal_variable(root->name_test2)) {
7789 if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced())
7790 || has_var_group1 || has_var_group2)
7791 gis.unreduced_terms.emplace(root->name_test1,
7794 auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
7795 auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
7796 auto &Kux = (mf2 && mf2->is_reduced()) ? Kuu : Kur;
7797 auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
7798 auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
7801 &alpha1 = workspace.factor_of_variable(root->name_test1),
7802 &alpha2 = workspace.factor_of_variable(root->name_test2);
7804 if (has_var_group1) {
7805 ga_instruction_set::variable_group_info
7806 &vgi1 = rmi.interpolate_infos[intn1]
7807 .groups_info[root->name_test1];
7808 if (has_var_group2) {
7809 ga_instruction_set::variable_group_info
7810 &vgi2 = rmi.interpolate_infos[intn2]
7811 .groups_info[root->name_test2];
7812 pgai = std::make_shared
7813 <ga_instruction_matrix_assembly_mf_mf>
7814 (root->tensor(), Krr, Kru, Kur, Kuu, ctx1, ctx2,
7816 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7818 const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
7819 ? workspace.temporary_interval_of_variable
7821 : workspace.interval_of_variable(root->name_test2);
7823 pgai = std::make_shared
7824 <ga_instruction_matrix_assembly_mf_mf>
7825 (root->tensor(), Krx, Kux, ctx1, ctx2,
7826 vgi1, I2, *mf2, alpha2,
7827 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7829 pgai = std::make_shared
7830 <ga_instruction_matrix_assembly_mf_imd>
7831 (root->tensor(), Krr, Kur, ctx1, ctx2,
7832 vgi1, I2, imd2, alpha2, gis.coeff, gis.ipt);
7835 const gmm::sub_interval &I1 = mf1 && mf1->is_reduced()
7836 ? workspace.temporary_interval_of_variable
7838 : workspace.interval_of_variable(root->name_test1);
7839 if (has_var_group2) {
7840 ga_instruction_set::variable_group_info
7841 &vgi2 = rmi.interpolate_infos[intn2]
7842 .groups_info[root->name_test2];
7844 pgai = std::make_shared
7845 <ga_instruction_matrix_assembly_mf_mf>
7846 (root->tensor(), Kxr, Kxu, ctx1, ctx2,
7847 I1, *mf1, alpha1, vgi2,
7848 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7850 pgai = std::make_shared
7851 <ga_instruction_matrix_assembly_imd_mf>
7852 (root->tensor(), Krr, Kru, ctx1, ctx2,
7853 I1, imd1, alpha1, vgi2, gis.coeff, gis.ipt);
7855 const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
7856 ? workspace.temporary_interval_of_variable
7858 : workspace.interval_of_variable(root->name_test2);
7860 pgai = std::make_shared
7861 <ga_instruction_matrix_assembly_mf_mf>
7862 (root->tensor(), Kxx, ctx1, ctx2,
7863 I1, *mf1, alpha1, I2, *mf2, alpha2,
7864 gis.coeff, gis.nbpt, gis.ipt, interpolate);
7866 pgai = std::make_shared
7867 <ga_instruction_matrix_assembly_mf_imd>
7868 (root->tensor(), Kxr, ctx1, ctx2,
7869 I1, *mf1, alpha1, I2, imd2, alpha2,
7870 gis.coeff, gis.ipt);
7872 pgai = std::make_shared
7873 <ga_instruction_matrix_assembly_imd_mf>
7874 (root->tensor(), Krx, ctx1, ctx2,
7875 I1, imd1, alpha1, I2, *mf2, alpha2,
7876 gis.coeff, gis.ipt);
7878 pgai = std::make_shared
7879 <ga_instruction_matrix_assembly_imd_imd>
7880 (root->tensor(), Krr, ctx1, ctx2,
7881 I1, imd1, alpha1, I2, imd2, alpha2,
7882 gis.coeff, gis.ipt);
7890 rmi.instructions.push_back(std::move(pgai));
7896 if (condensation && order == 2 && phase == ga_workspace::ASSEMBLY) {
7898 auto &Krr = workspace.assembled_matrix();
7899 auto &Kru = workspace.col_unreduced_matrix();
7900 auto &Kur = workspace.row_unreduced_matrix();
7901 auto &Kuu = workspace.row_col_unreduced_matrix();
7903 for (
auto &&key_val : condensations) {
7904 const ga_instruction_set::region_mim rm = key_val.first;
7905 condensation_description &CC = key_val.second;
7906 auto &rmi = gis.all_instructions[rm];
7908 CC.KQJpr.resize(CC.KQJ.nrows(), CC.KQJ.ncols());
7909 for (
size_type k=0; k < CC.KQJpr.size(); ++k) {
7910 gis.condensation_tensors.push_back
7911 (std::make_shared<base_tensor>(2,2));
7912 CC.KQJpr[k] = gis.condensation_tensors.back().get();
7915 pga_instruction pgai;
7918 for (
size_type k=0; k < CC.Qclusters.size(); ++k) {
7922 std::string name_test1 = CC.Qvars[q1];
7923 const im_data *imd1 = workspace.associated_im_data(name_test1);
7924 const gmm::sub_interval
7925 &I1 = workspace.interval_of_variable(name_test1);
7927 std::make_shared<ga_instruction_extract_residual_on_imd_dofs>
7928 (*(CC.RQpr[q1]), workspace.cached_vector(),
7929 gis.ctx, I1, *imd1, gis.ipt);
7930 rmi.instructions.push_back(std::move(pgai));
7936 pgai = std::make_shared<ga_instruction_condensation_sub>
7937 (CC.KQJpr, CC.RQpr, CC.KQQ, CC.KQJ, CC.Qclusters[k], gis.coeff);
7938 rmi.instructions.push_back(std::move(pgai));
7943 std::string name_test1 = CC.Qvars[q1];
7944 const im_data *imd1 = workspace.associated_im_data(name_test1);
7947 const gmm::sub_interval
7948 &I1 = workspace.interval_of_variable(name_test1);
7949 GMM_ASSERT1(imd1,
"Internal error");
7951 std::string name_test2 = CC.Jvars[j2];
7952 const mesh_fem *mf2 = workspace.associated_mf(name_test2);
7953 const im_data *imd2 = workspace.associated_im_data(name_test2);
7960 const gmm::sub_interval
7961 &I2 = mf2 && mf2->is_reduced()
7962 ? workspace.temporary_interval_of_variable(name_test2)
7963 : workspace.interval_of_variable(name_test2);
7964 const base_tensor &Kq1j2pr = *(CC.KQJpr(q1,j2));
7965 model_real_sparse_matrix
7966 &KQJpr = mf2 && mf2->is_reduced()
7967 ? workspace.col_unreduced_matrix()
7968 : workspace.internal_coupling_matrix();
7971 std::make_shared<ga_instruction_matrix_assembly_imd_mf>
7972 (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
7973 I1, imd1, gis.ONE, I2, *mf2, gis.ONE, gis.ONE, gis.ipt);
7975 if (mf2->is_reduced())
7976 gis.unreduced_terms.emplace(name_test1, name_test2);
7979 std::make_shared<ga_instruction_matrix_assembly_imd_imd>
7980 (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
7981 I1, imd1, gis.ONE, I2, imd2, gis.ONE, gis.ONE, gis.ipt);
7982 rmi.instructions.push_back(std::move(pgai));
7984 const bool initialize =
true;
7985 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
7986 (*(CC.RQpr[q1]), workspace.assembled_vector(),
7987 gis.ctx, I1, *imd1, gis.ONE, gis.ipt, initialize);
7988 rmi.instructions.push_back(std::move(pgai));
7993 for (
size_type i1=0; i1 < CC.Ivars.size(); ++i1) {
7995 std::string name_test1 = CC.Ivars[i1];
7996 const mesh_fem *mf1 = workspace.associated_mf(name_test1);
7997 const im_data *imd1 = workspace.associated_im_data(name_test1);
7999 &alpha1 = workspace.factor_of_variable(name_test1);
8000 const gmm::sub_interval
8001 &I1 = mf1 && mf1->is_reduced()
8002 ? workspace.temporary_interval_of_variable(name_test1)
8003 : workspace.interval_of_variable(name_test1);
8007 std::vector<std::set<size_type>> Q_of_J(CC.Jvars.size());
8008 for (
size_type q=0; q < CC.Qvars.size(); ++q)
8012 Q_of_J[j].insert(q);
8015 for (
size_type j2=0; j2 < CC.Jvars.size(); ++j2) {
8016 if (Q_of_J[j2].size()) {
8017 std::vector<base_tensor *> Ki1Q, KQj2;
8019 Ki1Q.push_back(CC.KIQ(i1,q));
8020 KQj2.push_back(CC.KQJpr(q,j2));
8023 gis.condensation_tensors.push_back
8024 (std::make_shared<base_tensor>());
8025 base_tensor &Kij = *gis.condensation_tensors.back();
8026 pgai = std::make_shared<ga_instruction_condensation_super_K>
8028 rmi.instructions.push_back(std::move(pgai));
8030 std::string name_test2 = CC.Jvars[j2];
8031 const mesh_fem *mf2 = workspace.associated_mf(name_test2);
8032 const im_data *imd2 = workspace.associated_im_data(name_test2);
8037 &alpha2 = workspace.factor_of_variable(name_test2);
8038 const gmm::sub_interval
8039 &I2 = mf2 && mf2->is_reduced()
8040 ? workspace.temporary_interval_of_variable(name_test2)
8041 : workspace.interval_of_variable(name_test2);
8043 auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8044 auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8045 auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8046 auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8048 if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced()))
8049 gis.unreduced_terms.emplace(name_test1, name_test2);
8052 pgai = std::make_shared
8053 <ga_instruction_matrix_assembly_mf_mf>
8054 (Kij, Kxx, gis.ctx, gis.ctx,
8055 I1, *mf1, alpha1, I2, *mf2, alpha2,
8056 gis.coeff, gis.nbpt, gis.ipt,
false);
8058 pgai = std::make_shared
8059 <ga_instruction_matrix_assembly_mf_imd>
8060 (Kij, Kxr, gis.ctx, gis.ctx,
8061 I1, *mf1, alpha1, I2, imd2, alpha2,
8062 gis.coeff, gis.ipt);
8064 pgai = std::make_shared
8065 <ga_instruction_matrix_assembly_imd_mf>
8066 (Kij, Krx, gis.ctx, gis.ctx,
8067 I1, imd1, alpha1, I2, *mf2, alpha2,
8068 gis.coeff, gis.ipt);
8070 pgai = std::make_shared
8071 <ga_instruction_matrix_assembly_imd_imd>
8072 (Kij, Krr, gis.ctx, gis.ctx,
8073 I1, imd1, alpha1, I2, imd2, alpha2,
8074 gis.coeff, gis.ipt);
8075 rmi.instructions.push_back(std::move(pgai));
8080 std::vector<base_tensor *> Ki1Q, RQpr;
8081 for (
size_type q=0; q < CC.Qvars.size(); ++q)
8083 Ki1Q.push_back(CC.KIQ(i1,q));
8084 RQpr.push_back(CC.RQpr[q]);
8086 gis.condensation_tensors.push_back
8087 (std::make_shared<base_tensor>());
8088 base_tensor &Ri = *gis.condensation_tensors.back();
8089 pgai = std::make_shared<ga_instruction_condensation_super_R>
8091 rmi.instructions.push_back(std::move(pgai));
8093 base_vector &R = mf1->is_reduced() ? workspace.unreduced_vector()
8094 : workspace.assembled_vector();
8096 pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8097 (Ri, R, gis.ctx, I1, *mf1, gis.coeff, gis.nbpt, gis.ipt,
false);
8099 pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8100 (Ri, R, gis.ctx, I1, *imd1, gis.coeff, gis.ipt);
8102 pgai = std::make_shared<ga_instruction_vector_assembly>
8103 (Ri, R, I1, gis.coeff);
8104 rmi.instructions.push_back(std::move(pgai));
8119 void ga_function_exec(ga_instruction_set &gis) {
8121 for (
auto &&instr : gis.all_instructions) {
8122 const auto &gil = instr.second.instructions;
8123 for (
size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8127 void ga_interpolation_exec(ga_instruction_set &gis,
8128 ga_workspace &workspace,
8129 ga_interpolation_context &gic) {
8131 base_small_vector un, up;
8133 for (
const std::string &t : gis.transformations)
8134 workspace.interpolate_transformation(t)->init(workspace);
8136 for (
auto &&instr : gis.all_instructions) {
8139 const mesh_region ®ion = *(instr.first.region());
8141 GMM_ASSERT1(&m == &(gic.linked_mesh()),
8142 "Incompatibility of meshes in interpolation");
8143 const auto &gilb = instr.second.begin_instructions;
8144 const auto &gile = instr.second.elt_instructions;
8145 const auto &gil = instr.second.instructions;
8148 std::vector<size_type> ind;
8149 auto pai_old = papprox_integration{};
8151 if (gic.use_mim()) {
8158 bgeot::pstored_point_tab pspt
8159 = gic.ppoints_for_element(v.cv(), v.f(), ind);
8161 if (pspt.get() && ind.size() && pspt->size()) {
8162 m.points_of_convex(v.cv(), G);
8164 up.resize(G.nrows());
8165 un.resize(pgt->dim());
8167 if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
8168 gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
8170 if (!(gic.use_pgp(v.cv()))) {
8171 gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
8173 gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
8178 if (gis.need_elt_size)
8182 gis.nbpt = pspt->size();
8183 for (
size_type ii = 0; ii < ind.size(); ++ii) {
8185 if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
8186 else gis.ctx.set_xref((*pspt)[gis.ipt]);
8188 if (ii == 0 || !(pgt->is_linear())) {
8191 const base_matrix& B = gis.ctx.B();
8192 gmm::copy(pgt->normals()[v.f()], un);
8193 gmm::mult(B, un, up);
8195 gmm::scale(up,1.0/nup);
8196 gmm::clean(up, 1e-13);
8198 }
else gis.Normal.resize(0);
8200 gmm::clear(workspace.assembled_tensor().as_vector());
8202 for (
size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
8203 for (
size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
8205 for (
size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8206 gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
8211 for (
const std::string &t : gis.transformations)
8212 workspace.interpolate_transformation(t)->finalize();
8217 void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
8219 base_small_vector un;
8220 scalar_type J1(0), J2(0);
8222 for (
const std::string &t : gis.transformations)
8223 workspace.interpolate_transformation(t)->init(workspace);
8225 for (
auto &instr : gis.all_instructions) {
8227 psecondary_domain psd = instr.first.psd();
8229 GMM_ASSERT1(&m == &(mim.
linked_mesh()),
"Incompatibility of meshes");
8230 const auto &gilb = instr.second.begin_instructions;
8231 const auto &gile = instr.second.elt_instructions;
8232 const auto &gil = instr.second.instructions;
8246 const mesh_region ®ion = *(instr.first.region());
8251 pintegration_method pim = 0;
8252 papprox_integration pai = 0;
8253 bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
8254 bgeot::pgeotrans_precomp pgp = 0;
8255 bool first_gp =
true;
8259 if (v.cv() != old_cv) {
8260 pgt = m.trans_of_convex(v.cv());
8262 m.points_of_convex(v.cv(), G1);
8264 if (pim->type() == IM_NONE)
continue;
8265 GMM_ASSERT1(pim->type() == IM_APPROX,
"Sorry, exact methods "
8266 "cannot be used in high level generic assembly");
8267 pai = pim->approx_method();
8268 pspt = pai->pintegration_points();
8270 if (pgp && gis.pai == pai && pgt_old == pgt) {
8271 gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8273 if (pai->is_built_on_the_fly()) {
8274 gis.ctx.change(pgt, 0, (*pspt)[0], G1, v.cv(), v.f());
8277 pgp = gis.gp_pool(pgt, pspt);
8278 gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8280 pgt_old = pgt; gis.pai = pai;
8282 if (gis.need_elt_size)
8287 if (pim->type() == IM_NONE)
continue;
8288 gis.ctx.set_face_num(v.f());
8290 if (pspt != old_pspt) { first_gp =
true; old_pspt = pspt; }
8295 gis.nbpt = pai->nb_points_on_face(v.f());
8296 first_ind = pai->ind_first_point_on_face(v.f());
8298 gis.nbpt = pai->nb_points_on_convex();
8300 for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
8301 if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
8302 else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
8303 if (gis.ipt == 0 || !(pgt->is_linear())) {
8307 gis.Normal.resize(G1.nrows());
8308 un.resize(pgt->dim());
8309 gmm::copy(pgt->normals()[v.f()], un);
8310 gmm::mult(gis.ctx.B(), un, gis.Normal);
8313 gmm::scale(gis.Normal, 1.0/nup);
8314 gmm::clean(gis.Normal, 1e-13);
8315 }
else gis.Normal.resize(0);
8317 auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
8318 gis.coeff = J1 * ipt_coeff;
8319 bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8320 workspace.include_empty_int_points());
8321 if (!enable_ipt) gis.coeff = scalar_type(0);
8323 for (
size_type j=0; j < gilb.size(); ++j) j+=gilb[j]->exec();
8327 for (
size_type j=0; j < gile.size(); ++j) j+=gile[j]->exec();
8329 if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8330 for (
size_type j=0; j < gil.size(); ++j) j+=gil[j]->exec();
8337 GA_DEBUG_INFO(
"-----------------------------");
8341 auto &sdi = instr.second.secondary_domain_infos;
8342 const mesh_region ®ion1 = *(instr.first.region());
8348 pintegration_method pim1 = 0, pim2 = 0;
8349 papprox_integration pai1 = 0, pai2 = 0;
8350 bgeot::pstored_point_tab pspt1=0, old_pspt1=0, pspt2=0, old_pspt2=0;
8351 bgeot::pgeotrans_precomp pgp1 = 0, pgp2 = 0;
8352 bool first_gp =
true;
8356 if (v1.cv() != old_cv1) {
8357 pgt1 = m.trans_of_convex(v1.cv());
8359 m.points_of_convex(v1.cv(), G1);
8361 if (pim1->type() == IM_NONE)
continue;
8362 GMM_ASSERT1(pim1->type() == IM_APPROX,
"Sorry, exact methods "
8363 "cannot be used in high level generic assembly");
8364 pai1 = pim1->approx_method();
8365 pspt1 = pai1->pintegration_points();
8366 if (pspt1->size()) {
8367 if (pgp1 && gis.pai == pai1 && pgt1_old == pgt1) {
8368 gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8370 if (pai1->is_built_on_the_fly()) {
8371 gis.ctx.change(pgt1, 0, (*pspt1)[0], G1, v1.cv(), v1.f());
8374 pgp1 = gis.gp_pool(pgt1, pspt1);
8375 gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8377 pgt1_old = pgt1; gis.pai = pai1;
8379 if (gis.need_elt_size)
8384 if (pim1->type() == IM_NONE)
continue;
8385 gis.ctx.set_face_num(v1.f());
8387 if (pspt1 != old_pspt1) { first_gp =
true; old_pspt1 = pspt1; }
8388 if (pspt1->size()) {
8392 nbpt1 = pai1->nb_points_on_face(v1.f());
8393 first_ind1 = pai1->ind_first_point_on_face(v1.f());
8395 nbpt1 = pai1->nb_points_on_convex();
8398 const mesh &m2 = psd->mim().linked_mesh();
8399 const mesh_region ®ion2 = psd->give_region(m, v1.cv(), v1.f());
8401 !v2.finished(); ++v2) {
8402 if (v2.cv() != old_cv2) {
8403 pgt2 = m2.trans_of_convex(v2.cv());
8404 pim2 = psd->mim().int_method_of_element(v2.cv());
8405 m2.points_of_convex(v2.cv(), G2);
8407 if (pim2->type() == IM_NONE)
continue;
8408 GMM_ASSERT1(pim2->type() == IM_APPROX,
"Sorry, exact methods "
8409 "cannot be used in high level generic assembly");
8410 pai2 = pim2->approx_method();
8411 pspt2 = pai2->pintegration_points();
8412 if (pspt2->size()) {
8413 if (pgp2 && sdi.pai == pai2 && pgt2_old == pgt2) {
8414 sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8416 if (pai2->is_built_on_the_fly()) {
8417 sdi.ctx.change(pgt2, 0, (*pspt2)[0], G2,v2.cv(),v2.f());
8420 pgp2 = gis.gp_pool(pgt2, pspt2);
8421 sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8423 pgt2_old = pgt2; sdi.pai = pai2;
8428 if (pim2->type() == IM_NONE)
continue;
8429 sdi.ctx.set_face_num(v2.f());
8431 if (pspt2 != old_pspt2) { first_gp =
true; old_pspt2 = pspt2; }
8432 if (pspt2->size()) {
8436 nbpt2 = pai2->nb_points_on_face(v2.f());
8437 first_ind2 = pai2->ind_first_point_on_face(v2.f());
8439 nbpt2 = gis.nbpt = pai2->nb_points_on_convex();
8441 gis.nbpt = nbpt1 * nbpt2;
8443 for (
size_type ipt1=0; ipt1 < nbpt1; ++ipt1) {
8444 for (
size_type ipt2=0; ipt2 < nbpt2; ++ipt2, ++(gis.ipt)) {
8446 if (pgp1) gis.ctx.set_ii(first_ind1+ipt1);
8447 else gis.ctx.set_xref((*pspt1)[first_ind1+ipt1]);
8448 if (pgp2) sdi.ctx.set_ii(first_ind2+ipt2);
8449 else sdi.ctx.set_xref((*pspt2)[first_ind2+ipt2]);
8451 if (gis.ipt == 0 || !(pgt1->is_linear())) {
8454 gis.Normal.resize(G1.nrows());
8455 un.resize(pgt1->dim());
8456 gmm::copy(pgt1->normals()[v1.f()], un);
8457 gmm::mult(gis.ctx.B(), un, gis.Normal);
8460 gmm::scale(gis.Normal, 1.0/nup);
8461 gmm::clean(gis.Normal, 1e-13);
8462 }
else gis.Normal.resize(0);
8465 if (gis.ipt == 0 || !(pgt2->is_linear())) {
8468 sdi.Normal.resize(G2.nrows());
8469 un.resize(pgt2->dim());
8470 gmm::copy(pgt2->normals()[v2.f()], un);
8471 gmm::mult(sdi.ctx.B(), un, sdi.Normal);
8474 gmm::scale(sdi.Normal, 1.0/nup);
8475 gmm::clean(sdi.Normal, 1e-13);
8476 }
else sdi.Normal.resize(0);
8479 auto ipt_coeff = pai1->coeff(first_ind1+ipt1)
8480 * pai2->coeff(first_ind2+ipt2);
8481 gis.coeff = J1 * J2 * ipt_coeff;
8482 bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8483 workspace.include_empty_int_points());
8484 if (!enable_ipt) gis.coeff = scalar_type(0);
8487 for (
size_type j=0; j < gilb.size(); ++j)
8492 for (
size_type j=0; j < gile.size(); ++j)
8495 if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8496 for (
size_type j=0; j < gil.size(); ++j)
8507 GA_DEBUG_INFO(
"-----------------------------");
8512 for (
const std::string &t : gis.transformations)
8513 workspace.interpolate_transformation(t)->finalize();