GetFEM  5.4.2
getfem_generic_assembly_compile_and_exec.cc
1 /*===========================================================================
2 
3  Copyright (C) 2013-2020 Yves Renard
4 
5  This file is a part of GetFEM
6 
7  GetFEM is free software; you can redistribute it and/or modify it
8  under the terms of the GNU Lesser General Public License as published
9  by the Free Software Foundation; either version 3 of the License, or
10  (at your option) any later version along with the GCC Runtime Library
11  Exception either version 3.1 or (at your option) any later version.
12  This program is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15  License and GCC Runtime Library Exception for more details.
16  You should have received a copy of the GNU Lesser General Public License
17  along with this program; if not, write to the Free Software Foundation,
18  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 
20 ===========================================================================*/
21 
25 #include "getfem/getfem_generic_assembly_compile_and_exec.h"
26 #include "getfem/getfem_generic_assembly_functions_and_operators.h"
27 
28 // #define GA_USES_BLAS // not so interesting, at least for debian blas
29 
30 // #define GA_DEBUG_INFO(a) { cout << a << endl; }
31 #define GA_DEBUG_INFO(a)
32 
33 
34 
35 namespace getfem {
36 
37 
38  template <class VEC1, class VEC2>
39  inline void copy_scaled_4(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
40  auto it1 = v1.begin();
41  auto it2 = v2.begin(), it2e = v2.end();
42  size_type nd = v1.size() >> 2;
43  for (size_type i = 0; i < nd; ++i) {
44  *it2++ = (*it1++) * a;
45  *it2++ = (*it1++) * a;
46  *it2++ = (*it1++) * a;
47  *it2++ = (*it1++) * a;
48  }
49  for (; it2 != it2e;)
50  *it2++ = (*it1++) * a;
51  }
52 
53  template <class VEC1, class VEC2>
54  inline void add_scaled_4(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
55  auto it1 = v1.begin();
56  auto it2 = v2.begin(), it2e = v2.end();
57  size_type nd = v1.size() >> 2;
58  for (size_type i = 0; i < nd; ++i) {
59  *it2++ += (*it1++) * a;
60  *it2++ += (*it1++) * a;
61  *it2++ += (*it1++) * a;
62  *it2++ += (*it1++) * a;
63  }
64  for (; it2 != it2e;)
65  *it2++ += (*it1++) * a;
66  }
67 
68  template <class VEC1, class VEC2>
69  inline void copy_scaled_8(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
70  auto it1 = v1.begin();
71  auto it2 = v2.begin(), it2e = v2.end();
72  size_type nd = v1.size() >> 3;
73  for (size_type i = 0; i < nd; ++i) {
74  *it2++ = (*it1++) * a;
75  *it2++ = (*it1++) * a;
76  *it2++ = (*it1++) * a;
77  *it2++ = (*it1++) * a;
78  *it2++ = (*it1++) * a;
79  *it2++ = (*it1++) * a;
80  *it2++ = (*it1++) * a;
81  *it2++ = (*it1++) * a;
82  }
83  for (; it2 != it2e;)
84  *it2++ = (*it1++) * a;
85  }
86 
87  template <class VEC1, class VEC2>
88  inline void add_scaled_8(const VEC1 &v1, const scalar_type a, VEC2 &v2) {
89  auto it1 = v1.begin();
90  auto it2 = v2.begin(), it2e = v2.end();
91  size_type nd = v1.size() >> 3;
92  for (size_type i = 0; i < nd; ++i) {
93  *it2++ += (*it1++) * a;
94  *it2++ += (*it1++) * a;
95  *it2++ += (*it1++) * a;
96  *it2++ += (*it1++) * a;
97  *it2++ += (*it1++) * a;
98  *it2++ += (*it1++) * a;
99  *it2++ += (*it1++) * a;
100  *it2++ += (*it1++) * a;
101  }
102  for (; it2 != it2e;)
103  *it2++ += (*it1++) * a;
104  }
105 
106  bool operator <(const gauss_pt_corresp &gpc1,
107  const gauss_pt_corresp &gpc2) {
108  if (gpc1.pai != gpc2.pai)
109  return (gpc1.pai < gpc2.pai );
110  if (gpc1.nodes.size() != gpc2.nodes.size())
111  return (gpc1.nodes.size() < gpc2.nodes.size());
112  for (size_type i = 0; i < gpc1.nodes.size(); ++i)
113  if (gpc1.nodes[i] != gpc2.nodes[i])
114  return (gpc1.nodes[i] < gpc2.nodes[i]);
115  if (gpc1.pgt1 != gpc2.pgt1)
116  return (gpc1.pgt1 < gpc2.pgt1);
117  if (gpc1.pgt2 != gpc2.pgt2)
118  return (gpc1.pgt2 < gpc2.pgt2);
119  return false;
120  }
121 
122  bool operator <(const ga_instruction_set::region_mim &rm1,
123  const ga_instruction_set::region_mim &rm2) {
124  if (rm1.mim() != rm2.mim()) return (rm1.mim() < rm2.mim());
125  if (rm1.region() != rm2.region()) return (rm1.region() < rm2.region());
126  return (rm1.psd() < rm2.psd());
127  }
128 
129  //=========================================================================
130  // Instructions for compilation: basic optimized operations on tensors
131  //=========================================================================
132 
133  struct ga_instruction_extract_local_im_data : public ga_instruction {
134  base_tensor &t;
135  const im_data &imd;
136  papprox_integration &pai;
137  const base_vector &U;
138  const fem_interpolation_context &ctx;
139  size_type qdim, cv_old;
140  virtual int exec() {
141  GA_DEBUG_INFO("Instruction: extract local im data");
142  size_type cv = ctx.convex_num();
143  if (cv != cv_old) {
144  cv_old = cv;
145  GMM_ASSERT1(imd.linked_mesh_im().int_method_of_element(cv)
146  ->approx_method() == pai, "Im data have to be used only "
147  "on their original integration method.");
148  }
149  size_type ipt = imd.filtered_index_of_point(cv, ctx.ii());
150  GMM_ASSERT1(ipt != size_type(-1),
151  "Im data with no data on the current integration point.");
152  auto it = U.begin()+ipt*qdim;
153  std::copy(it, it+qdim, t.begin());
154  return 0;
155  }
156  ga_instruction_extract_local_im_data
157  (base_tensor &t_, const im_data &imd_, const base_vector &U_,
158  papprox_integration &pai_, const fem_interpolation_context &ctx_,
159  size_type qdim_)
160  : t(t_), imd(imd_), pai(pai_), U(U_), ctx(ctx_), qdim(qdim_),
161  cv_old(-1)
162  {}
163  };
164 
165  struct ga_instruction_slice_local_dofs : public ga_instruction {
166  const mesh_fem &mf;
167  const base_vector &U;
168  const fem_interpolation_context &ctx;
169  base_vector &coeff;
170  size_type qmult1, qmult2;
171  virtual int exec() {
172  GA_DEBUG_INFO("Instruction: Slice local dofs");
173  GMM_ASSERT1(qmult1 != 0 && qmult2 != 0, "Internal error");
174  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(),
175  coeff, qmult1, qmult2);
176  return 0;
177  }
178  ga_instruction_slice_local_dofs(const mesh_fem &mf_, const base_vector &U_,
179  const fem_interpolation_context &ctx_,
180  base_vector &coeff_,
181  size_type qmult1_, size_type qmult2_)
182  : mf(mf_), U(U_), ctx(ctx_), coeff(coeff_),
183  qmult1(qmult1_), qmult2(qmult2_) {}
184  };
185 
186  struct ga_instruction_update_pfp : public ga_instruction {
187  const mesh_fem &mf;
188  const fem_interpolation_context &ctx;
189  fem_precomp_pool &fp_pool;
190  pfem_precomp &pfp;
191 
192  virtual int exec() {
193  GA_DEBUG_INFO("Instruction: Pfp update");
194  if (ctx.have_pgp()) {
195  size_type cv = ctx.is_convex_num_valid()
196  ? ctx.convex_num() : mf.convex_index().first_true();
197  pfem pf = mf.fem_of_element(cv);
198  if (!pfp || pf != pfp->get_pfem() ||
199  ctx.pgp()->get_ppoint_tab() != pfp->get_ppoint_tab()) {
200  pfp = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
201  }
202  } else {
203  pfp = 0;
204  }
205  return 0;
206  }
207 
208  ga_instruction_update_pfp(const mesh_fem &mf_, pfem_precomp &pfp_,
209  const fem_interpolation_context &ctx_,
210  fem_precomp_pool &fp_pool_)
211  : mf(mf_), ctx(ctx_), fp_pool(fp_pool_), pfp(pfp_) {}
212  };
213 
214  struct ga_instruction_first_ind_tensor : public ga_instruction {
215  base_tensor &t;
216  const fem_interpolation_context &ctx;
217  size_type qdim;
218  const mesh_fem *mfn, **mfg;
219 
220  virtual int exec() {
221  GA_DEBUG_INFO("Instruction: adapt first index of tensor");
222  const mesh_fem &mf = *(mfg ? *mfg : mfn);
223  GA_DEBUG_ASSERT(mfg ? *mfg : mfn, "Internal error");
224  size_type cv_1 = ctx.is_convex_num_valid()
225  ? ctx.convex_num() : mf.convex_index().first_true();
226  pfem pf = mf.fem_of_element(cv_1);
227  GMM_ASSERT1(pf, "An element without finite element method defined");
228  size_type Qmult = qdim / pf->target_dim();
229  size_type s = pf->nb_dof(cv_1) * Qmult;
230  if (t.sizes()[0] != s)
231  { bgeot::multi_index mi = t.sizes(); mi[0] = s; t.adjust_sizes(mi); }
232  return 0;
233  }
234 
235  ga_instruction_first_ind_tensor(base_tensor &t_,
236  const fem_interpolation_context &ctx_,
237  size_type qdim_, const mesh_fem *mfn_,
238  const mesh_fem **mfg_)
239  : t(t_), ctx(ctx_), qdim(qdim_), mfn(mfn_), mfg(mfg_) {}
240  };
241 
242  struct ga_instruction_second_ind_tensor
243  : public ga_instruction_first_ind_tensor {
244 
245  virtual int exec() {
246  GA_DEBUG_INFO("Instruction: adapt second index of tensor");
247  const mesh_fem &mf = *(mfg ? *mfg : mfn);
248  size_type cv_1 = ctx.is_convex_num_valid()
249  ? ctx.convex_num() : mf.convex_index().first_true();
250  pfem pf = mf.fem_of_element(cv_1);
251  GMM_ASSERT1(pf, "An element without finite element methode defined");
252  size_type Qmult = qdim / pf->target_dim();
253  size_type s = pf->nb_dof(cv_1) * Qmult;
254  if (t.sizes()[1] != s)
255  { bgeot::multi_index mi = t.sizes(); mi[1] = s; t.adjust_sizes(mi); }
256  return 0;
257  }
258 
259  ga_instruction_second_ind_tensor(base_tensor &t_,
260  fem_interpolation_context &ctx_,
261  size_type qdim_, const mesh_fem *mfn_,
262  const mesh_fem **mfg_)
263  : ga_instruction_first_ind_tensor(t_, ctx_, qdim_, mfn_, mfg_) {}
264 
265  };
266 
267  struct ga_instruction_two_first_ind_tensor : public ga_instruction {
268  base_tensor &t;
269  const fem_interpolation_context &ctx1, &ctx2;
270  size_type qdim1;
271  const mesh_fem *mfn1, **mfg1;
272  size_type qdim2;
273  const mesh_fem *mfn2, **mfg2;
274 
275  virtual int exec() {
276  GA_DEBUG_INFO("Instruction: adapt two first indices of tensor");
277  const mesh_fem &mf1 = *(mfg1 ? *mfg1 : mfn1);
278  const mesh_fem &mf2 = *(mfg2 ? *mfg2 : mfn2);
279  size_type cv_1 = ctx1.is_convex_num_valid()
280  ? ctx1.convex_num() : mf1.convex_index().first_true();
281  size_type cv_2 = ctx2.is_convex_num_valid()
282  ? ctx2.convex_num() : mf2.convex_index().first_true();
283  pfem pf1 = mf1.fem_of_element(cv_1);
284  GMM_ASSERT1(pf1, "An element without finite element method defined");
285  pfem pf2 = mf2.fem_of_element(cv_2);
286  GMM_ASSERT1(pf2, "An element without finite element method defined");
287  size_type Qmult1 = qdim1 / pf1->target_dim();
288  size_type s1 = pf1->nb_dof(cv_1) * Qmult1;
289  size_type Qmult2 = qdim2 / pf2->target_dim();
290  size_type s2 = pf2->nb_dof(cv_2) * Qmult2;
291  GMM_ASSERT1(s1 > 0 && s2 >0, "Element without degrees of freedom");
292  if (t.sizes()[0] != s1 || t.sizes()[1] != s2) {
293  bgeot::multi_index mi = t.sizes();
294  mi[0] = s1; mi[1] = s2;
295  t.adjust_sizes(mi);
296  }
297  return 0;
298  }
299 
300  ga_instruction_two_first_ind_tensor
301  (base_tensor &t_, const fem_interpolation_context &ctx1_,
302  const fem_interpolation_context &ctx2_,
303  size_type qdim1_, const mesh_fem *mfn1_, const mesh_fem **mfg1_,
304  size_type qdim2_, const mesh_fem *mfn2_, const mesh_fem **mfg2_)
305  : t(t_), ctx1(ctx1_), ctx2(ctx2_), qdim1(qdim1_), mfn1(mfn1_),
306  mfg1(mfg1_), qdim2(qdim2_), mfn2(mfn2_), mfg2(mfg2_) {}
307  };
308 
309 
310  struct ga_instruction_X_component : public ga_instruction {
311  scalar_type &t;
312  const fem_interpolation_context &ctx;
313  size_type n;
314 
315  virtual int exec() {
316  GA_DEBUG_INFO("Instruction: X component");
317  t = ctx.xreal()[n];
318  return 0;
319  }
320 
321  ga_instruction_X_component
322  (scalar_type &t_, const fem_interpolation_context &ctx_, size_type n_)
323  : t(t_), ctx(ctx_), n(n_) {}
324  };
325 
326  struct ga_instruction_X : public ga_instruction {
327  base_tensor &t;
328  const fem_interpolation_context &ctx;
329 
330  virtual int exec() {
331  GA_DEBUG_INFO("Instruction: X");
332  GA_DEBUG_ASSERT(t.size() == ctx.xreal().size(), "dimensions mismatch");
333  gmm::copy(ctx.xreal(), t.as_vector());
334  return 0;
335  }
336 
337  ga_instruction_X(base_tensor &t_, const fem_interpolation_context &ctx_)
338  : t(t_), ctx(ctx_) {}
339  };
340 
341  struct ga_instruction_copy_small_vect : public ga_instruction {
342  base_tensor &t;
343  const base_small_vector &vec;
344 
345  virtual int exec() {
346  GA_DEBUG_INFO("Instruction: copy small vector");
347  GMM_ASSERT1(t.size() == vec.size(), "Invalid vector size.");
348  gmm::copy(vec, t.as_vector());
349  return 0;
350  }
351  ga_instruction_copy_small_vect(base_tensor &t_,
352  const base_small_vector &vec_)
353  : t(t_), vec(vec_) {}
354  };
355 
356  struct ga_instruction_copy_Normal : public ga_instruction_copy_small_vect {
357 
358  virtual int exec() {
359  GA_DEBUG_INFO("Instruction: unit normal vector");
360  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
361  "vector. Possible reasons: not on boundary or "
362  "transformation failed.");
363  gmm::copy(vec, t.as_vector());
364  return 0;
365  }
366  ga_instruction_copy_Normal(base_tensor &t_,
367  const base_small_vector &Normal_)
368  : ga_instruction_copy_small_vect(t_, Normal_) {}
369  };
370 
371  struct ga_instruction_level_set_normal_vector : public ga_instruction {
372  base_tensor &t;
373  const mesh_im_level_set *mimls;
374  const fem_interpolation_context &ctx;
375  base_small_vector vec;
376 
377  virtual int exec() {
378  GA_DEBUG_INFO("Instruction: unit normal vector to a level-set");
379  mimls->compute_normal_vector(ctx, vec);
380  GMM_ASSERT1(t.size() == vec.size(), "Invalid outward unit normal "
381  "vector. Possible reasons: not on boundary or "
382  "transformation failed.");
383  gmm::copy(vec, t.as_vector());
384  return 0;
385  }
386  ga_instruction_level_set_normal_vector
387  (base_tensor &t_, const mesh_im_level_set *mimls_,
388  const fem_interpolation_context &ctx_)
389  : t(t_), mimls(mimls_), ctx(ctx_), vec(t.size()) {}
390  };
391 
392  struct ga_instruction_element_size : public ga_instruction {
393  base_tensor &t;
394  scalar_type &es;
395 
396  virtual int exec() {
397  GA_DEBUG_INFO("Instruction: element_size");
398  GMM_ASSERT1(t.size() == 1, "Invalid element size.");
399  t[0] = es;
400  return 0;
401  }
402  ga_instruction_element_size(base_tensor &t_, scalar_type &es_)
403  : t(t_), es(es_) {}
404  };
405 
406  struct ga_instruction_element_K : public ga_instruction {
407  base_tensor &t;
408  const fem_interpolation_context &ctx;
409 
410  virtual int exec() {
411  GA_DEBUG_INFO("Instruction: element_K");
412  GMM_ASSERT1(t.size() == (ctx.K()).size(), "Invalid tensor size.");
413  gmm::copy(ctx.K().as_vector(), t.as_vector());
414  return 0;
415  }
416  ga_instruction_element_K(base_tensor &t_,
417  const fem_interpolation_context &ct)
418  : t(t_), ctx(ct) {}
419  };
420 
421  struct ga_instruction_element_B : public ga_instruction {
422  base_tensor &t;
423  const fem_interpolation_context &ctx;
424 
425  virtual int exec() {
426  GA_DEBUG_INFO("Instruction: element_B");
427  GMM_ASSERT1(t.size() == (ctx.B()).size(), "Invalid tensor size.");
428  gmm::copy(ctx.B().as_vector(), t.as_vector());
429  return 0;
430  }
431  ga_instruction_element_B(base_tensor &t_,
432  const fem_interpolation_context &ct)
433  : t(t_), ctx(ct) {}
434  };
435 
436  struct ga_instruction_val_base : public ga_instruction {
437  base_tensor &t;
438  fem_interpolation_context &ctx;
439  const mesh_fem &mf;
440  const pfem_precomp &pfp;
441 
442  virtual int exec() { // --> t(ndof,target_dim)
443  GA_DEBUG_INFO("Instruction: compute value of base functions");
444  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
445  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
446  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
447  // ctx.base_value(t);
448  if (ctx.have_pgp()) ctx.pfp_base_value(t, pfp);
449  else {
450  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
451  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
452  ctx.base_value(t);
453  }
454  return 0;
455  }
456 
457  ga_instruction_val_base(base_tensor &tt, fem_interpolation_context &ct,
458  const mesh_fem &mf_, const pfem_precomp &pfp_)
459  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
460  };
461 
462  struct ga_instruction_xfem_plus_val_base : public ga_instruction {
463  base_tensor &t;
464  fem_interpolation_context &ctx;
465  const mesh_fem &mf;
466  pfem_precomp &pfp;
467 
468  virtual int exec() { // --> t(ndof,target_dim)
469  GA_DEBUG_INFO("Instruction: compute value of base functions");
470  if (ctx.have_pgp()) ctx.set_pfp(pfp);
471  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
472  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
473  int old_xfem_side = ctx.xfem_side();
474  ctx.set_xfem_side(1);
475  ctx.base_value(t);
476  ctx.set_xfem_side(old_xfem_side);
477  return 0;
478  }
479 
480  ga_instruction_xfem_plus_val_base(base_tensor &tt,
481  fem_interpolation_context &ct,
482  const mesh_fem &mf_, pfem_precomp &pfp_)
483  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
484  };
485 
486  struct ga_instruction_xfem_minus_val_base : public ga_instruction {
487  base_tensor &t;
488  fem_interpolation_context &ctx;
489  const mesh_fem &mf;
490  pfem_precomp &pfp;
491 
492  virtual int exec() { // --> t(ndof,target_dim)
493  GA_DEBUG_INFO("Instruction: compute value of base functions");
494  if (ctx.have_pgp()) ctx.set_pfp(pfp);
495  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
496  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
497  int old_xfem_side = ctx.xfem_side();
498  ctx.set_xfem_side(-1);
499  ctx.base_value(t);
500  ctx.set_xfem_side(old_xfem_side);
501  return 0;
502  }
503 
504  ga_instruction_xfem_minus_val_base
505  (base_tensor &tt, fem_interpolation_context &ct,
506  const mesh_fem &mf_, pfem_precomp &pfp_)
507  : t(tt), ctx(ct), mf(mf_), pfp(pfp_) {}
508  };
509 
510  struct ga_instruction_grad_base : public ga_instruction_val_base {
511 
512  virtual int exec() { // --> t(ndof,target_dim,N)
513  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
514  // if (ctx.have_pgp()) ctx.set_pfp(pfp);
515  // else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
516  // GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
517  // ctx.grad_base_value(t);
518  if (ctx.have_pgp()) ctx.pfp_grad_base_value(t, pfp);
519  else {
520  ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
521  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
522  ctx.grad_base_value(t);
523  }
524  return 0;
525  }
526 
527  ga_instruction_grad_base(base_tensor &tt, fem_interpolation_context &ct,
528  const mesh_fem &mf_, pfem_precomp &pfp_)
529  : ga_instruction_val_base(tt, ct, mf_, pfp_)
530  {}
531  };
532 
533  struct ga_instruction_xfem_plus_grad_base : public ga_instruction_val_base {
534 
535  virtual int exec() { // --> t(ndof,target_dim,N)
536  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
537  if (ctx.have_pgp()) ctx.set_pfp(pfp);
538  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
539  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
540  int old_xfem_side = ctx.xfem_side();
541  ctx.set_xfem_side(1);
542  ctx.grad_base_value(t);
543  ctx.set_xfem_side(old_xfem_side);
544  return 0;
545  }
546 
547  ga_instruction_xfem_plus_grad_base
548  (base_tensor &tt, fem_interpolation_context &ct,
549  const mesh_fem &mf_, pfem_precomp &pfp_)
550  : ga_instruction_val_base(tt, ct, mf_, pfp_)
551  {}
552  };
553 
554  struct ga_instruction_xfem_minus_grad_base : public ga_instruction_val_base {
555 
556  virtual int exec() { // --> t(ndof,target_dim,N)
557  GA_DEBUG_INFO("Instruction: compute gradient of base functions");
558  if (ctx.have_pgp()) ctx.set_pfp(pfp);
559  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
560  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
561  int old_xfem_side = ctx.xfem_side();
562  ctx.set_xfem_side(-1);
563  ctx.grad_base_value(t);
564  ctx.set_xfem_side(old_xfem_side);
565  return 0;
566  }
567 
568  ga_instruction_xfem_minus_grad_base
569  (base_tensor &tt, fem_interpolation_context &ct,
570  const mesh_fem &mf_, pfem_precomp &pfp_)
571  : ga_instruction_val_base(tt, ct, mf_, pfp_)
572  {}
573  };
574 
575 
576  struct ga_instruction_hess_base : public ga_instruction_val_base {
577 
578  virtual int exec() { // --> t(ndof,target_dim,N*N)
579  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
580  if (ctx.have_pgp()) ctx.set_pfp(pfp);
581  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
582  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
583  ctx.hess_base_value(t);
584  return 0;
585  }
586 
587  ga_instruction_hess_base(base_tensor &tt, fem_interpolation_context &ct,
588  const mesh_fem &mf_, pfem_precomp &pfp_)
589  : ga_instruction_val_base(tt, ct, mf_, pfp_)
590  {}
591  };
592 
593  struct ga_instruction_xfem_plus_hess_base : public ga_instruction_val_base {
594 
595  virtual int exec() { // --> t(ndof,target_dim,N*N)
596  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
597  if (ctx.have_pgp()) ctx.set_pfp(pfp);
598  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
599  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
600  int old_xfem_side = ctx.xfem_side();
601  ctx.set_xfem_side(1);
602  ctx.hess_base_value(t);
603  ctx.set_xfem_side(old_xfem_side);
604  return 0;
605  }
606 
607  ga_instruction_xfem_plus_hess_base
608  (base_tensor &tt, fem_interpolation_context &ct,
609  const mesh_fem &mf_, pfem_precomp &pfp_)
610  : ga_instruction_val_base(tt, ct, mf_, pfp_)
611  {}
612  };
613 
614  struct ga_instruction_xfem_minus_hess_base : public ga_instruction_val_base {
615 
616  virtual int exec() { // --> t(ndof,target_dim,N*N)
617  GA_DEBUG_INFO("Instruction: compute Hessian of base functions");
618  if (ctx.have_pgp()) ctx.set_pfp(pfp);
619  else ctx.set_pf(mf.fem_of_element(ctx.convex_num()));
620  GMM_ASSERT1(ctx.pf(), "Undefined finite element method");
621  int old_xfem_side = ctx.xfem_side();
622  ctx.set_xfem_side(-1);
623  ctx.hess_base_value(t);
624  ctx.set_xfem_side(old_xfem_side);
625  return 0;
626  }
627 
628  ga_instruction_xfem_minus_hess_base
629  (base_tensor &tt, fem_interpolation_context &ct,
630  const mesh_fem &mf_, pfem_precomp &pfp_)
631  : ga_instruction_val_base(tt, ct, mf_, pfp_)
632  {}
633  };
634 
635  struct ga_instruction_val : public ga_instruction {
636  scalar_type &a;
637  base_tensor &t;
638  const base_tensor &Z;
639  const base_vector &coeff;
640  size_type qdim;
641  // Z(ndof,target_dim), coeff(Qmult,ndof) --> t(target_dim*Qmult)
642  virtual int exec() {
643  GA_DEBUG_INFO("Instruction: variable value");
644  size_type ndof = Z.sizes()[0];
645  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
646  GA_DEBUG_ASSERT(t.size() == qdim, "dimensions mismatch");
647 
648  if (qdim == 1) {
649  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
650  "Wrong size for coeff vector");
651  auto itc = coeff.begin(); auto itZ = Z.begin();
652  a = (*itc++) * (*itZ++);
653  while (itc != coeff.end()) a += (*itc++) * (*itZ++);
654  } else {
655  size_type target_dim = Z.sizes()[1];
656  if (target_dim == 1) {
657  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
658  "Wrong size for coeff vector");
659  auto itc = coeff.begin(); auto itZ = Z.begin();
660  for (auto it = t.begin(); it != t.end(); ++it)
661  *it = (*itc++) * (*itZ);
662  ++itZ;
663  for (size_type j = 1; j < ndof; ++j, ++itZ) {
664  for (auto it = t.begin(); it != t.end(); ++it)
665  *it += (*itc++) * (*itZ);
666  }
667  } else {
668  size_type Qmult = qdim / target_dim;
669  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
670  "Wrong size for coeff vector");
671 
672  gmm::clear(t.as_vector());
673  auto itc = coeff.begin();
674  for (size_type j = 0; j < ndof; ++j) {
675  auto it = t.begin();
676  for (size_type q = 0; q < Qmult; ++q, ++itc) {
677  for (size_type r = 0; r < target_dim; ++r)
678  *it++ += (*itc) * Z[j + r*ndof];
679  }
680  }
681  }
682  }
683  return 0;
684  }
685 
686  ga_instruction_val(base_tensor &tt, const base_tensor &Z_,
687  const base_vector &co, size_type q)
688  : a(tt[0]), t(tt), Z(Z_), coeff(co), qdim(q) {}
689  };
690 
691  struct ga_instruction_grad : public ga_instruction_val {
692  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N)
693  virtual int exec() {
694  GA_DEBUG_INFO("Instruction: gradient");
695  size_type ndof = Z.sizes()[0];
696  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
697  size_type N = Z.sizes()[2];
698  if (qdim == 1) {
699  GA_DEBUG_ASSERT(t.size() == N, "dimensions mismatch");
700  GA_DEBUG_ASSERT(coeff.size() == ndof, "Wrong size for coeff vector");
701  auto itZ = Z.begin();
702  for (auto it = t.begin(); it != t.end(); ++it) {
703  auto itc = coeff.begin();
704  *it = (*itc++) * (*itZ++);
705  while (itc != coeff.end()) *it += (*itc++) * (*itZ++);
706  }
707  } else {
708  size_type target_dim = Z.sizes()[1];
709  if (target_dim == 1) {
710  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
711  GA_DEBUG_ASSERT(coeff.size() == ndof*qdim,
712  "Wrong size for coeff vector");
713  for (size_type q = 0; q < qdim; ++q) {
714  auto itZ = Z.begin(); auto it = t.begin() + q;
715  for (size_type k = 0; k < N; ++k) {
716  if (k) it += qdim;
717  auto itc = coeff.begin() + q;
718  *it = (*itc) * (*itZ++);
719  for (size_type j = 1; j < ndof; ++j)
720  { itc += qdim; *it += (*itc) * (*itZ++); }
721  }
722  }
723  } else {
724  size_type Qmult = qdim / target_dim;
725  GA_DEBUG_ASSERT(t.size() == N*qdim, "dimensions mismatch");
726  GA_DEBUG_ASSERT(coeff.size() == ndof*Qmult,
727  "Wrong size for coeff vector");
728  gmm::clear(t.as_vector());
729  for (size_type q = 0; q < Qmult; ++q) {
730  auto itZ = Z.begin();
731  for (size_type k = 0; k < N; ++k)
732  for (size_type r = 0; r < target_dim; ++r)
733  for (size_type j = 0; j < ndof; ++j)
734  t[r + q*target_dim + k*qdim] += coeff[j*Qmult+q] * (*itZ++);
735  }
736  }
737  }
738  return 0;
739  }
740 
741  ga_instruction_grad(base_tensor &tt, const base_tensor &Z_,
742  const base_vector &co, size_type q)
743  : ga_instruction_val(tt, Z_, co, q)
744  {}
745 
746  };
747 
748  struct ga_instruction_hess : public ga_instruction_val {
749  // Z(ndof,target_dim,N*N), coeff(Qmult,ndof) --> t(target_dim*Qmult,N,N)
750  virtual int exec() {
751  GA_DEBUG_INFO("Instruction: Hessian");
752  size_type ndof = Z.sizes()[0];
753  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
754  size_type NN = gmm::sqr(t.sizes().back());
755  GA_DEBUG_ASSERT(NN == Z.sizes()[2], "Internal error");
756  if (qdim == 1) {
757  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof,
758  "Wrong size for coeff vector");
759  auto it = Z.begin(); auto itt = t.begin();
760  for (size_type kl = 0; kl < NN; ++kl, ++itt) {
761  *itt = scalar_type(0);
762  for (auto itc = coeff.begin(); itc != coeff.end(); ++itc, ++it)
763  *itt += (*itc) * (*it);
764  }
765  GMM_ASSERT1(itt == t.end(), "dimensions mismatch");
766  } else {
767  size_type target_dim = Z.sizes()[1];
768  if (target_dim == 1) {
769  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
770  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*qdim,
771  "Wrong size for coeff vector");
772  gmm::clear(t.as_vector());
773  for (size_type q = 0; q < qdim; ++q) {
774  base_tensor::const_iterator it = Z.begin();
775  for (size_type kl = 0; kl < NN; ++kl)
776  for (size_type j = 0; j < ndof; ++j, ++it)
777  t[q + kl*qdim] += coeff[j*qdim+q] * (*it);
778  }
779  } else {
780  size_type Qmult = qdim / target_dim;
781  GA_DEBUG_ASSERT(t.size() == NN*qdim, "dimensions mismatch");
782  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
783  "Wrong size for coeff vector");
784  gmm::clear(t.as_vector());
785  for (size_type q = 0; q < Qmult; ++q) {
786  base_tensor::const_iterator it = Z.begin();
787  for (size_type kl = 0; kl < NN; ++kl)
788  for (size_type r = 0; r < target_dim; ++r)
789  for (size_type j = 0; j < ndof; ++j, ++it)
790  t[r + q*target_dim + kl*qdim] += coeff[j*Qmult+q] * (*it);
791  }
792  }
793  }
794  return 0;
795  }
796 
797  ga_instruction_hess(base_tensor &tt, const base_tensor &Z_,
798  const base_vector &co, size_type q)
799  : ga_instruction_val(tt, Z_, co, q)
800  {}
801  };
802 
803  struct ga_instruction_diverg : public ga_instruction_val {
804  // Z(ndof,target_dim,N), coeff(Qmult,ndof) --> t(1)
805  virtual int exec() {
806  GA_DEBUG_INFO("Instruction: divergence");
807  size_type ndof = Z.sizes()[0];
808  if (!ndof) { gmm::clear(t.as_vector()); return 0; }
809  size_type target_dim = Z.sizes()[1];
810  size_type N = Z.sizes()[2];
811  size_type Qmult = qdim / target_dim;
812  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
813  "Dimensions mismatch for divergence operator");
814  GA_DEBUG_ASSERT(gmm::vect_size(coeff) == ndof*Qmult,
815  "Wrong size for coeff vector");
816 
817  t[0] = scalar_type(0);
818  base_tensor::const_iterator it = Z.begin();
819  if (Qmult == 1)
820  for (size_type k = 0; k < N; ++k) {
821  if (k) it += (N*ndof + 1);
822  for (size_type j = 0; j < ndof; ++j) {
823  if (j) ++it;
824  t[0] += coeff[j] * (*it);
825  }
826  }
827  else // if (target_dim() == 1)
828  for (size_type k = 0; k < N; ++k) {
829  if (k) ++it;
830  for (size_type j = 0; j < ndof; ++j) {
831  if (j) ++it;
832  t[0] += coeff[j*N+k] * (*it);
833  }
834  }
835  return 0;
836  }
837 
838  ga_instruction_diverg(base_tensor &tt, const base_tensor &Z_,
839  const base_vector &co, size_type q)
840  : ga_instruction_val(tt, Z_, co, q)
841  {}
842  };
843 
844  struct ga_instruction_copy_val_base : public ga_instruction {
845  base_tensor &t;
846  const base_tensor &Z;
847  size_type qdim;
848  // Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
849  virtual int exec() {
850  GA_DEBUG_INFO("Instruction: value of test functions");
851  if (qdim == 1) {
852  GA_DEBUG_ASSERT(t.size() == Z.size(), "Wrong size for base vector");
853  std::copy(Z.begin(), Z.end(), t.begin());
854  } else {
855  size_type target_dim = Z.sizes()[1];
856  size_type Qmult = qdim / target_dim;
857  if (Qmult == 1) {
858  std::copy(Z.begin(), Z.end(), t.begin());
859  } else {
860  if (target_dim == 1) {
861  size_type ndof = Z.sizes()[0];
862  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
863  "Wrong size for base vector");
864  std::fill(t.begin(), t.end(), scalar_type(0));
865  auto itZ = Z.begin();
866  size_type s = t.sizes()[0], sss = s+1;
867 
868  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
869  auto it = t.begin();
870  for (size_type i = 0; i < ndof; ++i, ++itZ) {
871  if (i) it += Qmult;
872  auto it2 = it;
873  *it2 = *itZ;
874  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
875  }
876  } else {
877  size_type ndof = Z.sizes()[0];
878  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
879  "Wrong size for base vector");
880  std::fill(t.begin(), t.end(), scalar_type(0));
881  auto itZ = Z.begin();
882  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
883 
884  // Performs t(i*Qmult+j, k*Qmult + j) = Z(i,k);
885  for (size_type k = 0; k < target_dim; ++k) {
886  auto it = t.begin() + (ss * k);
887  for (size_type i = 0; i < ndof; ++i, ++itZ) {
888  if (i) it += Qmult;
889  auto it2 = it;
890  *it2 = *itZ;
891  for (size_type j = 1; j < Qmult; ++j)
892  { it2 += sss; *it2 = *itZ; }
893  }
894  }
895  }
896  }
897  }
898  return 0;
899  }
900 
901  ga_instruction_copy_val_base(base_tensor &tt, const base_tensor &Z_,
902  size_type q) : t(tt), Z(Z_), qdim(q) {}
903  };
904 
905  struct ga_instruction_copy_grad_base : public ga_instruction_copy_val_base {
906  // Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
907  virtual int exec() {
908  GA_DEBUG_INFO("Instruction: gradient of test functions");
909  if (qdim == 1) {
910  std::copy(Z.begin(), Z.end(), t.begin());
911  } else {
912  size_type target_dim = Z.sizes()[1];
913  size_type Qmult = qdim / target_dim;
914  if (Qmult == 1) {
915  std::copy(Z.begin(), Z.end(), t.begin());
916  } else {
917  if (target_dim == 1) {
918  size_type ndof = Z.sizes()[0];
919  size_type N = Z.sizes()[2];
920  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
921  "Wrong size for gradient vector");
922  std::fill(t.begin(), t.end(), scalar_type(0));
923  base_tensor::const_iterator itZ = Z.begin();
924  size_type s = t.sizes()[0], sss = s+1, ssss = s*target_dim*Qmult;
925 
926  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
927  for (size_type l = 0; l < N; ++l) {
928  base_tensor::iterator it = t.begin() + (ssss*l);
929  for (size_type i = 0; i < ndof; ++i, ++itZ) {
930  if (i) it += Qmult;
931  base_tensor::iterator it2 = it;
932  *it2 = *itZ;
933  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
934  }
935  }
936  } else {
937  size_type ndof = Z.sizes()[0];
938  size_type N = Z.sizes()[2];
939  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
940  "Wrong size for gradient vector");
941  std::fill(t.begin(), t.end(), scalar_type(0));
942  base_tensor::const_iterator itZ = Z.begin();
943  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
944  size_type ssss = ss*target_dim;
945 
946  // Performs t(i*Qmult+j, k*Qmult + j, l) = Z(i,k,l);
947  for (size_type l = 0; l < N; ++l)
948  for (size_type k = 0; k < target_dim; ++k) {
949  base_tensor::iterator it = t.begin() + (ss * k + ssss*l);
950  for (size_type i = 0; i < ndof; ++i, ++itZ) {
951  if (i) it += Qmult;
952  base_tensor::iterator it2 = it;
953  *it2 = *itZ;
954  for (size_type j = 1; j < Qmult; ++j) { it2+=sss; *it2=*itZ; }
955  }
956  }
957  }
958  }
959  }
960  return 0;
961  }
962 
963  ga_instruction_copy_grad_base(base_tensor &tt, const base_tensor &Z_,
964  size_type q)
965  : ga_instruction_copy_val_base(tt,Z_,q) {}
966  };
967 
968  struct ga_instruction_copy_vect_val_base : public ga_instruction {
969  base_tensor &t;
970  const base_tensor &Z;
971  size_type qdim;
972  // Z(ndof) --> t(qdim*ndof,qdim*target_dim)
973  virtual int exec() {
974  GA_DEBUG_INFO("Instruction: vectorized value of test functions");
975 
976  size_type ndof = Z.sizes()[0];
977  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
978  "Wrong size for base vector");
979  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
980  auto itZ = Z.begin();
981  size_type s = t.sizes()[0], sss = s+1;
982 
983  // Performs t(i*qdim+j, k*qdim + j) = Z(i,k);
984  auto it = t.begin();
985  for (size_type i = 0; i < ndof; ++i, ++itZ) {
986  if (i) it += qdim;
987  auto it2 = it;
988  *it2 = *itZ;
989  for (size_type j = 1; j < qdim; ++j) { it2 += sss; *it2 = *itZ; }
990  }
991  return 0;
992  }
993 
994  ga_instruction_copy_vect_val_base(base_tensor &tt, const base_tensor &Z_,
995  size_type q) : t(tt), Z(Z_), qdim(q) {}
996  };
997 
998  struct ga_instruction_copy_vect_grad_base
999  : public ga_instruction_copy_vect_val_base {
1000  // Z(ndof,N) --> t(qdim*ndof,qdim,N)
1001  virtual int exec() {
1002  GA_DEBUG_INFO("Instruction: vectorized gradient of test functions");
1003  size_type ndof = Z.sizes()[0];
1004  size_type N = Z.sizes()[2];
1005  GA_DEBUG_ASSERT(t.size() == Z.size() * qdim * qdim,
1006  "Wrong size for gradient vector");
1007  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
1008  base_tensor::const_iterator itZ = Z.begin();
1009  size_type s = t.sizes()[0], sss = s+1, ssss = s*qdim;
1010 
1011  // Performs t(i*qdim+j, k*qdim + j, l) = Z(i,k,l);
1012  for (size_type l = 0; l < N; ++l) {
1013  base_tensor::iterator it = t.begin() + (ssss*l);
1014  for (size_type i = 0; i < ndof; ++i, ++itZ) {
1015  if (i) it += qdim;
1016  base_tensor::iterator it2 = it;
1017  *it2 = *itZ;
1018  for (size_type j = 1; j < qdim; ++j) { it2+=sss; *it2=*itZ; }
1019  }
1020  }
1021  return 0;
1022  }
1023 
1024  ga_instruction_copy_vect_grad_base(base_tensor &tt, const base_tensor &Z_,
1025  size_type q)
1026  : ga_instruction_copy_vect_val_base(tt,Z_,q) {}
1027  };
1028 
1029  struct ga_instruction_copy_hess_base : public ga_instruction_copy_val_base {
1030  // Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
1031  virtual int exec() {
1032  GA_DEBUG_INFO("Instruction: Hessian of test functions");
1033  size_type target_dim = Z.sizes()[1];
1034  size_type Qmult = qdim / target_dim;
1035  if (Qmult == 1) {
1036  gmm::copy(Z.as_vector(), t.as_vector());
1037  } else {
1038  size_type ndof = Z.sizes()[0];
1039  GA_DEBUG_ASSERT(t.size() == Z.size() * Qmult * Qmult,
1040  "Wrong size for Hessian vector");
1041  gmm::clear(t.as_vector());
1042  base_tensor::const_iterator itZ = Z.begin();
1043  size_type s = t.sizes()[0], ss = s * Qmult, sss = s+1;
1044 
1045  // Performs t(i*Qmult+j, k*Qmult + j, l, m) = Z(i,k,l*N+m)
1046  size_type NNdim = Z.sizes()[2]*target_dim;
1047  for (size_type klm = 0; klm < NNdim; ++klm) {
1048  base_tensor::iterator it = t.begin() + (ss * klm);
1049  for (size_type i = 0; i < ndof; ++i, ++itZ) {
1050  if (i) it += Qmult;
1051  base_tensor::iterator it2 = it;
1052  *it2 = *itZ;
1053  for (size_type j = 1; j < Qmult; ++j) { it2 += sss; *it2 = *itZ; }
1054  }
1055  }
1056  }
1057  return 0;
1058  }
1059 
1060  ga_instruction_copy_hess_base(base_tensor &tt, const base_tensor &Z_,
1061  size_type q)
1062  : ga_instruction_copy_val_base(tt, Z_, q) {}
1063  };
1064 
1065  struct ga_instruction_copy_diverg_base : public ga_instruction_copy_val_base {
1066  // Z(ndof,target_dim,N) --> t(Qmult*ndof)
1067  virtual int exec() {
1068  GA_DEBUG_INFO("Instruction: divergence of test functions");
1069  size_type ndof = Z.sizes()[0];
1070  size_type target_dim = Z.sizes()[1];
1071  size_type N = Z.sizes()[2];
1072  size_type Qmult = qdim / target_dim;
1073  GA_DEBUG_ASSERT(Qmult*target_dim == N && (Qmult == 1 || target_dim == 1),
1074  "Dimensions mismatch for divergence operator");
1075  GA_DEBUG_ASSERT(t.size() == ndof * Qmult,
1076  "Wrong size for divergence vector");
1077  gmm::clear(t.as_vector());
1078  base_tensor::const_iterator itZ = Z.begin();
1079  if (Qmult == 1) { // target_dim == N
1080  // Performs t(i) = Trace(Z(i,:,:))
1081  for (size_type l = 0; l < N; ++l) {
1082  base_tensor::iterator it = t.begin();
1083  if (l) itZ += target_dim*ndof+1;
1084  for (size_type i = 0; i < ndof; ++i) {
1085  if (i) { ++it; ++itZ; }
1086  *it += *itZ;
1087  }
1088  }
1089  } else { // Qmult == N
1090  // Performs t(i*Qmult+j) = Z(i,1,j)
1091  for (size_type j = 0; j < N; ++j) {
1092  base_tensor::iterator it = t.begin() + j;
1093  if (j) ++itZ;
1094  for (size_type i = 0; i < ndof; ++i) {
1095  if (i) { it += Qmult; ++itZ; }
1096  *it += *itZ;
1097  }
1098  }
1099  }
1100  return 0;
1101  }
1102 
1103  ga_instruction_copy_diverg_base(base_tensor &tt, const base_tensor &Z_,
1104  size_type q)
1105  : ga_instruction_copy_val_base(tt, Z_, q) {}
1106  };
1107 
1108  struct ga_instruction_elementary_trans {
1109  const base_vector &coeff_in;
1110  base_vector coeff_out;
1111  pelementary_transformation elemtrans;
1112  const mesh_fem &mf1, &mf2;
1113  const fem_interpolation_context &ctx;
1114  base_matrix &M;
1115  size_type &icv;
1116 
1117  void do_transformation(size_type n, size_type m) {
1118  if (icv != ctx.convex_num() || M.size() == 0) {
1119  M.base_resize(m, n);
1120  icv = ctx.convex_num();
1121  elemtrans->give_transformation(mf1, mf2, icv, M);
1122  }
1123  coeff_out.resize(gmm::mat_nrows(M));
1124  gmm::mult(M, coeff_in, coeff_out); // remember: coeff == coeff_out
1125  }
1126 
1127  ga_instruction_elementary_trans
1128  (const base_vector &co, pelementary_transformation e,
1129  const mesh_fem &mf1_, const mesh_fem &mf2_,
1130  const fem_interpolation_context &ctx_, base_matrix &M_,
1131  size_type &icv_)
1132  : coeff_in(co), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1133  M(M_), icv(icv_) {}
1134  ~ga_instruction_elementary_trans() {};
1135  };
1136 
1137  struct ga_instruction_elementary_trans_val
1138  : public ga_instruction_val, ga_instruction_elementary_trans {
1139  // Z(ndof,target_dim), coeff_in(Qmult,ndof) --> t(target_dim*Qmult)
1140  virtual int exec() {
1141  GA_DEBUG_INFO("Instruction: variable value with elementary "
1142  "transformation");
1143  size_type ndof = Z.sizes()[0];
1144  size_type Qmult = qdim / Z.sizes()[1];
1145  do_transformation(coeff_in.size(), ndof*Qmult);
1146  return ga_instruction_val::exec();
1147  }
1148 
1149  ga_instruction_elementary_trans_val
1150  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1151  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1152  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1153  : ga_instruction_val(tt, Z_, coeff_out, q),
1154  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1155  };
1156 
1157  struct ga_instruction_elementary_trans_grad
1158  : public ga_instruction_grad, ga_instruction_elementary_trans {
1159  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N)
1160  virtual int exec() {
1161  GA_DEBUG_INFO("Instruction: gradient with elementary transformation");
1162  size_type ndof = Z.sizes()[0];
1163  size_type Qmult = qdim / Z.sizes()[1];
1164  do_transformation(coeff_in.size(), ndof*Qmult);
1165  return ga_instruction_grad::exec();
1166  }
1167 
1168  ga_instruction_elementary_trans_grad
1169  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1170  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1171  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1172  : ga_instruction_grad(tt, Z_, coeff_out, q),
1173  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1174  };
1175 
1176  struct ga_instruction_elementary_trans_hess
1177  : public ga_instruction_hess, ga_instruction_elementary_trans {
1178  // Z(ndof,target_dim,N,N), coeff_in(Qmult,ndof) --> t(target_dim*Qmult,N,N)
1179  virtual int exec() {
1180  GA_DEBUG_INFO("Instruction: Hessian with elementary transformation");
1181  size_type ndof = Z.sizes()[0];
1182  size_type Qmult = qdim / Z.sizes()[1];
1183  do_transformation(coeff_in.size(), ndof*Qmult);
1184  return ga_instruction_hess::exec();
1185  }
1186 
1187  ga_instruction_elementary_trans_hess
1188  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1189  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1190  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1191  : ga_instruction_hess(tt, Z_, coeff_out, q),
1192  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1193  };
1194 
1195  struct ga_instruction_elementary_trans_diverg
1196  : public ga_instruction_diverg, ga_instruction_elementary_trans {
1197  // Z(ndof,target_dim,N), coeff_in(Qmult,ndof) --> t(1)
1198  virtual int exec() {
1199  GA_DEBUG_INFO("Instruction: divergence with elementary transformation");
1200  size_type ndof = Z.sizes()[0];
1201  size_type Qmult = qdim / Z.sizes()[1];
1202  do_transformation(coeff_in.size(), ndof*Qmult);
1203  return ga_instruction_diverg::exec();
1204  }
1205 
1206  ga_instruction_elementary_trans_diverg
1207  (base_tensor &tt, const base_tensor &Z_, const base_vector &co, size_type q,
1208  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1209  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1210  : ga_instruction_diverg(tt, Z_, coeff_out, q),
1211  ga_instruction_elementary_trans(co, e, mf1_, mf2_, ctx_, M_, icv_) {}
1212  };
1213 
1214  struct ga_instruction_update_group_info : public ga_instruction {
1215  const ga_workspace &workspace;
1216  const ga_instruction_set &gis;
1217  const ga_instruction_set::interpolate_info &inin;
1218  const std::string gname;
1219  ga_instruction_set::variable_group_info &vgi;
1220 
1221  virtual int exec() {
1222  GA_DEBUG_INFO("Instruction: Update group info for "+gname);
1223  if (vgi.cached_mesh && vgi.cached_mesh == inin.m)
1224  return 0;
1225 
1226  vgi.cached_mesh = inin.m;
1227  const std::string &varname
1228  = inin.m ? workspace.variable_in_group(gname, *(inin.m))
1229  : workspace.first_variable_of_group(gname);
1230  vgi.varname = &varname;
1231  vgi.mf = workspace.associated_mf(varname);
1232  GA_DEBUG_ASSERT(vgi.mf, "Group variable should always have a mesh_fem");
1233  vgi.reduced_mf = vgi.mf->is_reduced();
1234  if (vgi.reduced_mf) {
1235  const auto it = gis.really_extended_vars.find(varname);
1236  GA_DEBUG_ASSERT(it != gis.really_extended_vars.end(),
1237  "Variable " << varname << " not in extended variables");
1238  vgi.U = &(it->second);
1239  vgi.I = &(workspace.temporary_interval_of_variable(varname));
1240  } else {
1241  vgi.U = &(workspace.value(varname));
1242  vgi.I = &(workspace.interval_of_variable(varname));
1243  }
1244  vgi.alpha = workspace.factor_of_variable(varname);
1245  return 0;
1246  }
1247 
1248  ga_instruction_update_group_info
1249  (const ga_workspace &workspace_, const ga_instruction_set &gis_,
1250  const ga_instruction_set::interpolate_info &inin_,
1251  const std::string &gname_, ga_instruction_set::variable_group_info &vgi_)
1252  : workspace(workspace_), gis(gis_), inin(inin_), gname(gname_), vgi(vgi_)
1253  {}
1254  };
1255 
1256  struct ga_instruction_interpolate_filter : public ga_instruction {
1257  base_tensor &t;
1258  const ga_instruction_set::interpolate_info &inin;
1259  const size_type pt_type;
1260  const int nb;
1261 
1262  virtual int exec() {
1263  GA_DEBUG_INFO("Instruction: interpolated filter");
1264  if ((pt_type == size_type(-1) && inin.pt_type) ||
1265  (pt_type != size_type(-1) && inin.pt_type == pt_type)) {
1266  GA_DEBUG_INFO("Instruction: interpolated filter: pass");
1267  return 0;
1268  }
1269  else {
1270  GA_DEBUG_INFO("Instruction: interpolated filter: filtered");
1271  gmm::clear(t.as_vector());
1272  return nb;
1273  }
1274  return 0;
1275  }
1276 
1277  ga_instruction_interpolate_filter
1278  (base_tensor &t_, const ga_instruction_set::interpolate_info &inin_,
1279  size_type ind_, int nb_)
1280  : t(t_), inin(inin_), pt_type(ind_), nb(nb_) {}
1281  };
1282 
1283  struct ga_instruction_copy_interpolated_small_vect : public ga_instruction {
1284  base_tensor &t;
1285  const base_small_vector &vec;
1286  const ga_instruction_set::interpolate_info &inin;
1287 
1288  virtual int exec() {
1289  GA_DEBUG_INFO("Instruction: copy small vector");
1290  GMM_ASSERT1(!(inin.has_ctx) || inin.ctx.is_convex_num_valid(),
1291  "Invalid element, probably transformation failed");
1292  GMM_ASSERT1(t.size() == vec.size(), "Invalid vector size.");
1293  gmm::copy(vec, t.as_vector());
1294  return 0;
1295  }
1296  ga_instruction_copy_interpolated_small_vect
1297  (base_tensor &t_, const base_small_vector &vec_,
1298  const ga_instruction_set::interpolate_info &inin_)
1299  : t(t_), vec(vec_), inin(inin_) {}
1300  };
1301 
1302  struct ga_instruction_interpolate : public ga_instruction {
1303  base_tensor &t;
1304  const mesh **m;
1305  const mesh_fem *mfn, **mfg;
1306  const base_vector *Un, **Ug;
1307  fem_interpolation_context &ctx;
1308  base_vector coeff;
1309  size_type qdim;
1310  const size_type &ipt;
1311  fem_precomp_pool &fp_pool;
1312  ga_instruction_set::interpolate_info &inin;
1313 
1314  virtual int exec() {
1315  GMM_ASSERT1(ctx.is_convex_num_valid(), "No valid element for the "
1316  "transformation. Probably transformation failed");
1317  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1318  const base_vector &U = *(Ug ? *Ug : Un);
1319  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1320  "on another mesh than the one it is defined on");
1321  slice_vector_on_basic_dof_of_element(mf, U, ctx.convex_num(), coeff);
1322  pfem pf = mf.fem_of_element(ctx.convex_num());
1323  GMM_ASSERT1(pf, "Undefined finite element method");
1324  if (ctx.have_pgp()) {
1325  if (ipt == 0)
1326  inin.pfps[&mf] = fp_pool(pf, ctx.pgp()->get_ppoint_tab());
1327  ctx.set_pfp(inin.pfps[&mf]);
1328  } else {
1329  ctx.set_pf(pf);
1330  }
1331  return 0;
1332  }
1333 
1334  ga_instruction_interpolate
1335  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1336  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1337  fem_interpolation_context &ctx_, size_type q, const size_type &ipt_,
1338  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1339  : t(tt), m(m_), mfn(mfn_), mfg(mfg_), Un(Un_), Ug(Ug_),
1340  ctx(ctx_), qdim(q), ipt(ipt_), fp_pool(fp_pool_), inin(inin_) {}
1341  };
1342 
1343  struct ga_instruction_interpolate_val : public ga_instruction_interpolate {
1344  // --> t(target_dim*Qmult)
1345  virtual int exec() {
1346  GA_DEBUG_INFO("Instruction: interpolated variable value");
1347  ga_instruction_interpolate::exec();
1348  ctx.pf()->interpolation(ctx, coeff, t.as_vector(), dim_type(qdim));
1349  // cout << "interpolate " << &U << " result : " << t.as_vector() << endl;
1350  return 0;
1351  }
1352 
1353  ga_instruction_interpolate_val
1354  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1355  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1356  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1357  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1358  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_,ctx_, q, ipt_,
1359  fp_pool_, inin_)
1360  {}
1361  };
1362 
1363  struct ga_instruction_interpolate_grad : public ga_instruction_interpolate {
1364  // --> t(target_dim*Qmult,N)
1365  virtual int exec() {
1366  GA_DEBUG_INFO("Instruction: interpolated variable grad");
1367  ga_instruction_interpolate::exec();
1368  base_matrix v(qdim, ctx.N());
1369  ctx.pf()->interpolation_grad(ctx, coeff, v, dim_type(qdim));
1370  gmm::copy(v.as_vector(), t.as_vector());
1371  return 0;
1372  }
1373 
1374  ga_instruction_interpolate_grad
1375  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1376  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1377  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1378  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1379  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1380  fp_pool_, inin_)
1381  {}
1382  };
1383 
1384  struct ga_instruction_interpolate_hess : public ga_instruction_interpolate {
1385  // --> t(target_dim*Qmult,N,N)
1386  virtual int exec() {
1387  GA_DEBUG_INFO("Instruction: interpolated variable hessian");
1388  ga_instruction_interpolate::exec();
1389  base_matrix v(qdim, ctx.N()*ctx.N()); // To be optimized
1390  ctx.pf()->interpolation_hess(ctx, coeff, v, dim_type(qdim));
1391  gmm::copy(v.as_vector(), t.as_vector());
1392  return 0;
1393  }
1394 
1395  ga_instruction_interpolate_hess
1396  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1397  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1398  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1399  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1400  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1401  fp_pool_, inin_)
1402  {}
1403  };
1404 
1405  struct ga_instruction_interpolate_diverg : public ga_instruction_interpolate {
1406  // --> t(1)
1407  virtual int exec() {
1408  GA_DEBUG_INFO("Instruction: interpolated variable divergence");
1409  ga_instruction_interpolate::exec();
1410  ctx.pf()->interpolation_diverg(ctx, coeff, t[0]);
1411  return 0;
1412  }
1413 
1414  ga_instruction_interpolate_diverg
1415  (base_tensor &tt, const mesh **m_, const mesh_fem *mfn_,
1416  const mesh_fem **mfg_, const base_vector *Un_, const base_vector **Ug_,
1417  fem_interpolation_context &ctx_, size_type q, size_type &ipt_,
1418  fem_precomp_pool &fp_pool_, ga_instruction_set::interpolate_info &inin_)
1419  : ga_instruction_interpolate(tt, m_, mfn_, mfg_, Un_, Ug_, ctx_, q, ipt_,
1420  fp_pool_, inin_)
1421  {}
1422  };
1423 
1424  struct ga_instruction_interpolate_base {
1425  base_tensor ZZ;
1426  const mesh **m;
1427  const mesh_fem *mfn, **mfg;
1428  const size_type &ipt;
1429  ga_instruction_set::interpolate_info &inin;
1430  fem_precomp_pool &fp_pool;
1431 
1432  virtual int exec() {
1433  GMM_ASSERT1(inin.ctx.is_convex_num_valid(), "No valid element for "
1434  "the transformation. Probably transformation failed");
1435  const mesh_fem &mf = *(mfg ? *mfg : mfn);
1436  GMM_ASSERT1(&(mf.linked_mesh()) == *m, "Interpolation of a variable "
1437  "on another mesh than the one it is defined on");
1438 
1439  pfem pf = mf.fem_of_element(inin.ctx.convex_num());
1440  GMM_ASSERT1(pf, "Undefined finite element method");
1441 
1442  if (inin.ctx.have_pgp()) {
1443  if (ipt == 0)
1444  inin.pfps[&mf] = fp_pool(pf, inin.ctx.pgp()->get_ppoint_tab());
1445  inin.ctx.set_pfp(inin.pfps[&mf]);
1446  } else {
1447  inin.ctx.set_pf(pf);
1448  }
1449  return 0;
1450  }
1451 
1452  ga_instruction_interpolate_base
1453  (const mesh **m_, const mesh_fem *mfn_, const mesh_fem **mfg_,
1454  const size_type &ipt_, ga_instruction_set::interpolate_info &inin_,
1455  fem_precomp_pool &fp_pool_)
1456  : m(m_), mfn(mfn_), mfg(mfg_), ipt(ipt_), inin(inin_),
1457  fp_pool(fp_pool_) {}
1458  };
1459 
1460  struct ga_instruction_interpolate_val_base
1461  : public ga_instruction_copy_val_base, ga_instruction_interpolate_base {
1462  // ctx --> Z(ndof,target_dim) --> t(Qmult*ndof,Qmult*target_dim)
1463  virtual int exec() {
1464  GA_DEBUG_INFO("Instruction: interpolated base value");
1465  ga_instruction_interpolate_base::exec();
1466  inin.ctx.pf()->real_base_value(inin.ctx, ZZ); // remember Z == ZZ
1467  return ga_instruction_copy_val_base::exec();
1468  }
1469 
1470  ga_instruction_interpolate_val_base
1471  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1472  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1473  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1474  : ga_instruction_copy_val_base(t_, ZZ, q),
1475  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1476  inin_, fp_pool_) {}
1477  };
1478 
1479  struct ga_instruction_interpolate_grad_base
1480  : public ga_instruction_copy_grad_base, ga_instruction_interpolate_base {
1481  // ctx --> Z(ndof,target_dim,N) --> t(Qmult*ndof,Qmult*target_dim,N)
1482  virtual int exec() {
1483  GA_DEBUG_INFO("Instruction: interpolated base grad");
1484  ga_instruction_interpolate_base::exec();
1485  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1486  return ga_instruction_copy_grad_base::exec();
1487  }
1488 
1489  ga_instruction_interpolate_grad_base
1490  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1491  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1492  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1493  : ga_instruction_copy_grad_base(t_, ZZ, q),
1494  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1495  inin_, fp_pool_) {}
1496  };
1497 
1498  struct ga_instruction_interpolate_hess_base
1499  : public ga_instruction_copy_hess_base, ga_instruction_interpolate_base {
1500  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof,Qmult*target_dim,N,N)
1501  virtual int exec() {
1502  GA_DEBUG_INFO("Instruction: interpolated base hessian");
1503  ga_instruction_interpolate_base::exec();
1504  inin.ctx.pf()->real_hess_base_value(inin.ctx, ZZ); // remember Z == ZZ
1505  return ga_instruction_copy_hess_base::exec();
1506  }
1507 
1508  ga_instruction_interpolate_hess_base
1509  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1510  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1511  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1512  : ga_instruction_copy_hess_base(t_, ZZ, q),
1513  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1514  inin_, fp_pool_) {}
1515  };
1516 
1517  struct ga_instruction_interpolate_diverg_base
1518  : public ga_instruction_copy_diverg_base, ga_instruction_interpolate_base {
1519  // ctx --> Z(ndof,target_dim,N*N) --> t(Qmult*ndof)
1520  virtual int exec() {
1521  GA_DEBUG_INFO("Instruction: interpolated base divergence");
1522  ga_instruction_interpolate_base::exec();
1523  inin.ctx.pf()->real_grad_base_value(inin.ctx, ZZ); // remember Z == ZZ
1524  return ga_instruction_copy_diverg_base::exec();
1525  }
1526 
1527  ga_instruction_interpolate_diverg_base
1528  (base_tensor &t_, const mesh **m_, const mesh_fem *mfn_,
1529  const mesh_fem **mfg_, const size_type &ipt_, size_type q,
1530  ga_instruction_set::interpolate_info &inin_, fem_precomp_pool &fp_pool_)
1531  : ga_instruction_copy_diverg_base(t_, ZZ, q),
1532  ga_instruction_interpolate_base(m_, mfn_, mfg_, ipt_,
1533  inin_, fp_pool_) {}
1534  };
1535 
1536 
1537  struct ga_instruction_elementary_trans_base {
1538  base_tensor t_in;
1539  base_tensor &t_out;
1540  pelementary_transformation elemtrans;
1541  const mesh_fem &mf1, &mf2;
1542  const fem_interpolation_context &ctx;
1543  base_matrix &M;
1544  size_type &icv;
1545 
1546  void do_transformation(size_type n, size_type m) {
1547  if (icv != ctx.convex_num() || M.size() == 0) {
1548  M.base_resize(m, n);
1549  icv = ctx.convex_num();
1550  elemtrans->give_transformation(mf1, mf2, icv, M);
1551  }
1552  t_out.mat_reduction(t_in, M, 0);
1553  }
1554 
1555  ga_instruction_elementary_trans_base
1556  (base_tensor &t_, pelementary_transformation e, const mesh_fem &mf1_,
1557  const mesh_fem &mf2_,
1558  const fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1559  : t_out(t_), elemtrans(e), mf1(mf1_), mf2(mf2_), ctx(ctx_),
1560  M(M_), icv(icv_) {}
1561  };
1562 
1563  struct ga_instruction_elementary_trans_val_base
1564  : public ga_instruction_copy_val_base,
1565  ga_instruction_elementary_trans_base {
1566  // Z(ndof,target_dim) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim)
1567  virtual int exec() {
1568  GA_DEBUG_INFO("Instruction: value of test functions with elementary "
1569  "transformation");
1570  size_type ndof = Z.sizes()[0];
1571  size_type Qmult = qdim / Z.sizes()[1];
1572  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1]);
1573  ga_instruction_copy_val_base::exec();
1574  do_transformation(t_out.sizes()[0], ndof*Qmult);
1575  return 0;
1576  }
1577 
1578  ga_instruction_elementary_trans_val_base
1579  (base_tensor &t_, const base_tensor &Z_, size_type q,
1580  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1581  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1582  : ga_instruction_copy_val_base(t_in, Z_, q),
1583  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1584  M_, icv_) {}
1585  };
1586 
1587  struct ga_instruction_elementary_trans_grad_base
1588  : public ga_instruction_copy_grad_base,
1589  ga_instruction_elementary_trans_base {
1590  // Z(ndof,target_dim,N) --> t_in --> t_out(Qmult*ndof,Qmult*target_dim,N)
1591  virtual int exec() {
1592  GA_DEBUG_INFO("Instruction: gradient of test functions with elementary "
1593  "transformation");
1594  size_type ndof = Z.sizes()[0];
1595  size_type Qmult = qdim / Z.sizes()[1];
1596  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1597  ga_instruction_copy_grad_base::exec();
1598  do_transformation(t_out.sizes()[0], ndof*Qmult);
1599  return 0;
1600  }
1601 
1602  ga_instruction_elementary_trans_grad_base
1603  (base_tensor &t_, const base_tensor &Z_, size_type q,
1604  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1605  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1606  : ga_instruction_copy_grad_base(t_in, Z_, q),
1607  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1608  M_, icv_) {}
1609  };
1610 
1611  struct ga_instruction_elementary_trans_hess_base
1612  : public ga_instruction_copy_hess_base,
1613  ga_instruction_elementary_trans_base {
1614  // Z(ndof,target_dim,N*N) --> t_out(Qmult*ndof,Qmult*target_dim,N,N)
1615  virtual int exec() {
1616  GA_DEBUG_INFO("Instruction: Hessian of test functions with elementary "
1617  "transformation");
1618  size_type ndof = Z.sizes()[0];
1619  size_type Qmult = qdim / Z.sizes()[1];
1620  t_in.adjust_sizes(Qmult*ndof, Qmult*Z.sizes()[1], Z.sizes()[2]);
1621  ga_instruction_copy_hess_base::exec();
1622  do_transformation(t_out.sizes()[0], ndof*Qmult);
1623  return 0;
1624  }
1625 
1626  ga_instruction_elementary_trans_hess_base
1627  (base_tensor &t_, const base_tensor &Z_, size_type q,
1628  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1629  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1630  : ga_instruction_copy_hess_base(t_in, Z_, q),
1631  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1632  M_, icv_) {}
1633  };
1634 
1635  struct ga_instruction_elementary_trans_diverg_base
1636  : public ga_instruction_copy_diverg_base,
1637  ga_instruction_elementary_trans_base {
1638  // Z(ndof,target_dim,N) --> t_out(Qmult*ndof)
1639  virtual int exec() {
1640  GA_DEBUG_INFO("Instruction: divergence of test functions with elementary "
1641  "transformation");
1642  size_type ndof = Z.sizes()[0];
1643  size_type Qmult = qdim / Z.sizes()[1];
1644  t_in.adjust_sizes(Qmult*ndof);
1645  ga_instruction_copy_diverg_base::exec();
1646  do_transformation(t_out.sizes()[0], ndof*Qmult);
1647  return 0;
1648  }
1649 
1650  ga_instruction_elementary_trans_diverg_base
1651  (base_tensor &t_, const base_tensor &Z_, size_type q,
1652  pelementary_transformation e, const mesh_fem &mf1_, const mesh_fem &mf2_,
1653  fem_interpolation_context &ctx_, base_matrix &M_, size_type &icv_)
1654  : ga_instruction_copy_diverg_base(t_in, Z_, q),
1655  ga_instruction_elementary_trans_base(t_, e, mf1_, mf2_, ctx_,
1656  M_, icv_) {}
1657  };
1658 
1659 
1660  struct ga_instruction_add : public ga_instruction {
1661  base_tensor &t;
1662  const base_tensor &tc1, &tc2;
1663  virtual int exec() {
1664  GA_DEBUG_INFO("Instruction: addition");
1665  GA_DEBUG_ASSERT(t.size() == tc1.size(),
1666  "internal error " << t.size() << " != " << tc1.size());
1667  GA_DEBUG_ASSERT(t.size() == tc2.size(),
1668  "internal error " << t.size() << " != " << tc2.size());
1669  gmm::add(tc1.as_vector(), tc2.as_vector(), t.as_vector());
1670  return 0;
1671  }
1672  ga_instruction_add(base_tensor &t_,
1673  const base_tensor &tc1_, const base_tensor &tc2_)
1674  : t(t_), tc1(tc1_), tc2(tc2_) {}
1675  };
1676 
1677  struct ga_instruction_add_to : public ga_instruction {
1678  base_tensor &t;
1679  const base_tensor &tc1;
1680  virtual int exec() {
1681  GA_DEBUG_INFO("Instruction: addition");
1682  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1683  << " incompatible with " << tc1.size());
1684  gmm::add(tc1.as_vector(), t.as_vector());
1685  return 0;
1686  }
1687  ga_instruction_add_to(base_tensor &t_, const base_tensor &tc1_)
1688  : t(t_), tc1(tc1_) {}
1689  };
1690 
1691  struct ga_instruction_add_to_coeff : public ga_instruction {
1692  base_tensor &t;
1693  const base_tensor &tc1;
1694  scalar_type &coeff;
1695  virtual int exec() {
1696  GA_DEBUG_INFO("Instruction: addition with scale");
1697  GA_DEBUG_ASSERT(t.size() == tc1.size(), "internal error " << t.size()
1698  << " incompatible with " << tc1.size());
1699  gmm::add(gmm::scaled(tc1.as_vector(), coeff), t.as_vector());
1700  return 0;
1701  }
1702  ga_instruction_add_to_coeff(base_tensor &t_, const base_tensor &tc1_,
1703  scalar_type &coeff_)
1704  : t(t_), tc1(tc1_), coeff(coeff_) {}
1705  };
1706 
1707  struct ga_instruction_sub : public ga_instruction {
1708  base_tensor &t;
1709  const base_tensor &tc1, &tc2;
1710  virtual int exec() {
1711  GA_DEBUG_INFO("Instruction: subtraction");
1712  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
1713  "internal error");
1714  gmm::add(tc1.as_vector(), gmm::scaled(tc2.as_vector(), scalar_type(-1)),
1715  t.as_vector());
1716  return 0;
1717  }
1718  ga_instruction_sub(base_tensor &t_,
1719  const base_tensor &tc1_, const base_tensor &tc2_)
1720  : t(t_), tc1(tc1_), tc2(tc2_) {}
1721  };
1722 
1723  struct ga_instruction_opposite : public ga_instruction {
1724  base_tensor &t;
1725  virtual int exec() {
1726  GA_DEBUG_INFO("Instruction: multiplication with -1");
1727  gmm::scale(t.as_vector(), scalar_type(-1));
1728  return 0;
1729  }
1730  ga_instruction_opposite(base_tensor &t_) : t(t_) {}
1731  };
1732 
1733  struct ga_instruction_print_tensor : public ga_instruction {
1734  base_tensor &t;
1735  pga_tree_node pnode;
1736  const fem_interpolation_context &ctx;
1737  size_type &nbpt, &ipt;
1738  virtual int exec() {
1739  GA_DEBUG_INFO("Instruction: tensor print");
1740  cout << "Print term "; ga_print_node(pnode, cout);
1741  cout << " on Gauss point " << ipt << "/" << nbpt << " of element "
1742  << ctx.convex_num() << ": " << t << endl;
1743  return 0;
1744  }
1745  ga_instruction_print_tensor(base_tensor &t_, pga_tree_node pnode_,
1746  const fem_interpolation_context &ctx_,
1747  size_type &nbpt_, size_type &ipt_)
1748  : t(t_), pnode(pnode_), ctx(ctx_), nbpt(nbpt_), ipt(ipt_) {}
1749  };
1750 
1751  struct ga_instruction_copy_tensor : public ga_instruction {
1752  base_tensor &t;
1753  const base_tensor &tc1;
1754  virtual int exec() {
1755  GA_DEBUG_INFO("Instruction: tensor copy");
1756  std::copy(tc1.begin(), tc1.end(), t.begin());
1757  // gmm::copy(tc1.as_vector(), t.as_vector());
1758  return 0;
1759  }
1760  ga_instruction_copy_tensor(base_tensor &t_, const base_tensor &tc1_)
1761  : t(t_), tc1(tc1_) {}
1762  };
1763 
1764  struct ga_instruction_clear_tensor : public ga_instruction {
1765  base_tensor &t;
1766  virtual int exec() {
1767  GA_DEBUG_INFO("Instruction: clear tensor");
1768  std::fill(t.begin(), t.end(), scalar_type(0));
1769  return 0;
1770  }
1771  ga_instruction_clear_tensor(base_tensor &t_) : t(t_) {}
1772  };
1773 
1774  struct ga_instruction_copy_tensor_possibly_void : public ga_instruction {
1775  base_tensor &t;
1776  const base_tensor &tc1;
1777  virtual int exec() {
1778  GA_DEBUG_INFO("Instruction: tensor copy possibly void");
1779  if (tc1.size())
1780  gmm::copy(tc1.as_vector(), t.as_vector());
1781  else
1782  gmm::clear(t.as_vector());
1783  return 0;
1784  }
1785  ga_instruction_copy_tensor_possibly_void(base_tensor &t_,
1786  const base_tensor &tc1_)
1787  : t(t_), tc1(tc1_) {}
1788  };
1789 
1790  struct ga_instruction_copy_scalar : public ga_instruction {
1791  scalar_type &t; const scalar_type &t1;
1792  virtual int exec() {
1793  GA_DEBUG_INFO("Instruction: scalar copy");
1794  t = t1;
1795  return 0;
1796  }
1797  ga_instruction_copy_scalar(scalar_type &t_, const scalar_type &t1_)
1798  : t(t_), t1(t1_) {}
1799  };
1800 
1801  struct ga_instruction_copy_vect : public ga_instruction {
1802  base_vector &t;
1803  const base_vector &t1;
1804  virtual int exec() {
1805  GA_DEBUG_INFO("Instruction: fixed size tensor copy");
1806  gmm::copy(t1, t);
1807  return 0;
1808  }
1809  ga_instruction_copy_vect(base_vector &t_, const base_vector &t1_)
1810  : t(t_), t1(t1_) {}
1811  };
1812 
1813  struct ga_instruction_trace : public ga_instruction {
1814  base_tensor &t;
1815  const base_tensor &tc1;
1816  size_type n;
1817  // tc1(:,:,...,n,n) --> t(:,:,...)
1818  virtual int exec() {
1819  GA_DEBUG_INFO("Instruction: Trace");
1820  GA_DEBUG_ASSERT(t.size()*n*n == tc1.size(), "Wrong sizes");
1821  size_type s = t.size() * (n+1);
1822  auto it = t.begin();
1823  auto it1 = tc1.begin();
1824  for (; it != t.end(); ++it, ++it1) {
1825  auto it2 = it1;
1826  *it = *it2;
1827  for (size_type i = 1; i < n; ++i) { it2 += s; *it += *it2; }
1828  }
1829  return 0;
1830  }
1831 
1832  ga_instruction_trace(base_tensor &t_, const base_tensor &tc1_, size_type n_)
1833  : t(t_), tc1(tc1_), n(n_) {}
1834  };
1835 
1836  struct ga_instruction_deviator : public ga_instruction {
1837  base_tensor &t;
1838  const base_tensor &tc1;
1839  size_type n;
1840  // tc1(:,:,...,n,n) --> t(:,:,...,n,n)
1841  virtual int exec() {
1842  GA_DEBUG_INFO("Instruction: Deviator");
1843  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1844 
1845  gmm::copy(tc1.as_vector(), t.as_vector());
1846 
1847  size_type nb = t.size()/(n*n);
1848  size_type s = nb * (n+1), j = 0;
1849  base_tensor::iterator it = t.begin();
1850  base_tensor::const_iterator it1 = tc1.begin();
1851  for (; j < nb; ++it, ++it1, ++j) {
1852  scalar_type tr(0);
1853  base_tensor::const_iterator it2 = it1;
1854  tr += *it2;
1855  for (size_type i = 1; i < n; ++i) { it2 += s; tr += *it2; }
1856  tr /= scalar_type(n);
1857 
1858  base_tensor::iterator it3 = it;
1859  *it3 -= tr;
1860  for (size_type i = 1; i < n; ++i) { it3 += s; *it3 -= tr; }
1861  }
1862  return 0;
1863  }
1864 
1865  ga_instruction_deviator(base_tensor &t_, const base_tensor &tc1_,
1866  size_type n_)
1867  : t(t_), tc1(tc1_), n(n_) {}
1868  };
1869 
1870  struct ga_instruction_transpose : public ga_instruction { // To be optimized
1871  base_tensor &t;
1872  const base_tensor &tc1;
1873  size_type n1, n2, nn;
1874  virtual int exec() {
1875  GA_DEBUG_INFO("Instruction: transpose");
1876  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1877 
1878  size_type n0 = tc1.size() / (n1*n2*nn);
1879  auto it = t.begin();
1880  for (size_type i = 0; i < nn; ++i) {
1881  size_type s1 = i*n1*n2*n0;
1882  for (size_type j = 0; j < n1; ++j) {
1883  size_type s2 = s1 + j*n0;
1884  for (size_type k = 0; k < n2; ++k) {
1885  size_type s3 = s2 + k*n1*n0;
1886  for (size_type l = 0; l < n0; ++l, ++it)
1887  *it = tc1[s3+l];
1888  }
1889  }
1890  }
1891  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1892  return 0;
1893  }
1894  ga_instruction_transpose(base_tensor &t_, const base_tensor &tc1_,
1895  size_type n1_, size_type n2_, size_type nn_)
1896  : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1897  };
1898 
1899  struct ga_instruction_swap_indices : public ga_instruction {// To be optimized
1900  base_tensor &t;
1901  const base_tensor &tc1;
1902  size_type nn1, nn2, ii2, ii3;
1903  virtual int exec() {
1904  GA_DEBUG_INFO("Instruction: swap indices");
1905  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1906  size_type ii1 = t.size() / (nn1*nn2*ii2*ii3);
1907 
1908  auto it = t.begin();
1909  for (size_type i = 0; i < ii3; ++i)
1910  for (size_type j = 0; j < nn1; ++j)
1911  for (size_type k = 0; k < ii2; ++k)
1912  for (size_type l = 0; l < nn2; ++l) {
1913  size_type ind = j*ii1+k*ii1*nn1+l*ii1*nn1*ii2+i*ii1*nn1*ii2*nn2;
1914  for (size_type m = 0; m < ii1; ++m, ++it)
1915  *it = tc1[m+ind];
1916  }
1917  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1918  return 0;
1919  }
1920  ga_instruction_swap_indices(base_tensor &t_, const base_tensor &tc1_,
1921  size_type n1_, size_type n2_,
1922  size_type i2_, size_type i3_)
1923  : t(t_), tc1(tc1_), nn1(n1_), nn2(n2_), ii2(i2_), ii3(i3_) {}
1924  };
1925 
1926  struct ga_instruction_index_move_last : public ga_instruction {// To be optimized
1927  base_tensor &t;
1928  const base_tensor &tc1;
1929  size_type nn, ii2;
1930  virtual int exec() {
1931  GA_DEBUG_INFO("Instruction: swap indices");
1932  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1933  size_type ii1 = t.size() / (nn*ii2);
1934 
1935  auto it = t.begin();
1936  for (size_type i = 0; i < nn; ++i)
1937  for (size_type j = 0; j < ii2; ++j) {
1938  size_type ind = i*ii1+j*ii1*nn;
1939  for (size_type k = 0; k < ii1; ++k, ++it)
1940  *it = tc1[k+ind];
1941  }
1942  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1943  return 0;
1944  }
1945  ga_instruction_index_move_last(base_tensor &t_, const base_tensor &tc1_,
1946  size_type n_, size_type i2_)
1947  : t(t_), tc1(tc1_), nn(n_), ii2(i2_) {}
1948  };
1949 
1950  struct ga_instruction_transpose_no_test : public ga_instruction {
1951  base_tensor &t;
1952  const base_tensor &tc1;
1953  size_type n1, n2, nn;
1954  virtual int exec() {
1955  GA_DEBUG_INFO("Instruction: transpose");
1956  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1957 
1958  auto it = t.begin();
1959  for (size_type i = 0; i < nn; ++i) {
1960  size_type s1 = i*n1*n2;
1961  for (size_type j = 0; j < n1; ++j) {
1962  size_type s2 = s1 + j;
1963  for (size_type k = 0; k < n2; ++k, ++it)
1964  *it = tc1[s2 + k*n1];
1965  }
1966  }
1967  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
1968  return 0;
1969  }
1970  ga_instruction_transpose_no_test(base_tensor &t_, const base_tensor &tc1_,
1971  size_type n1_, size_type n2_,
1972  size_type nn_)
1973  : t(t_), tc1(tc1_), n1(n1_), n2(n2_), nn(nn_) {}
1974  };
1975 
1976  struct ga_instruction_transpose_test : public ga_instruction {
1977  base_tensor &t;
1978  const base_tensor &tc1;
1979  virtual int exec() {
1980  GA_DEBUG_INFO("Instruction: copy tensor and transpose test functions");
1981  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
1982  GA_DEBUG_ASSERT(t.sizes().size() >= 2, "Wrong sizes");
1983 
1984  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], s3 = s1*s2;
1985  size_type s = t.size() / s3;
1986  base_tensor::iterator it = t.begin();
1987  for (size_type k = 0; k < s; ++k)
1988  for (size_type j = 0; j < s2; ++j)
1989  for (size_type i = 0; i < s1; ++i, ++it)
1990  *it = tc1[j+s2*i+k*s3];
1991  return 0;
1992  }
1993  ga_instruction_transpose_test(base_tensor &t_, const base_tensor &tc1_)
1994  : t(t_), tc1(tc1_) {}
1995  };
1996 
1997  struct ga_instruction_sym : public ga_instruction {
1998  base_tensor &t;
1999  const base_tensor &tc1;
2000  virtual int exec() {
2001  GA_DEBUG_INFO("Instruction: symmetric part");
2002  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2003  size_type order = t.sizes().size();
2004  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2005  size_type s = t.size() / (s1*s2);
2006  for (size_type i = 0; i < s1; ++i)
2007  for (size_type j = 0; j < s2; ++j) {
2008  base_tensor::iterator it = t.begin() + s*(i + s1*j);
2009  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2010  it1T = tc1.begin() + s*(j + s2*i);
2011  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ + *it1T++);
2012  }
2013  return 0;
2014  }
2015  ga_instruction_sym(base_tensor &t_, const base_tensor &tc1_)
2016  : t(t_), tc1(tc1_) {}
2017  };
2018 
2019  struct ga_instruction_skew : public ga_instruction {
2020  base_tensor &t;
2021  const base_tensor &tc1;
2022  virtual int exec() {
2023  GA_DEBUG_INFO("Instruction: skew-symmetric part");
2024  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2025  size_type order = t.sizes().size();
2026  size_type s1 = t.sizes()[order-2], s2 = t.sizes()[order-1];
2027  size_type s = t.size() / (s1*s2);
2028  for (size_type i = 0; i < s1; ++i)
2029  for (size_type j = 0; j < s2; ++j) {
2030  base_tensor::iterator it = t.begin() + s*(i + s1*j);
2031  base_tensor::const_iterator it1 = tc1.begin() + s*(i + s1*j),
2032  it1T = tc1.begin() + s*(j + s2*i);
2033  for (size_type k = 0; k < s; ++k) *it++ = 0.5*(*it1++ - *it1T++);
2034  }
2035  return 0;
2036  }
2037  ga_instruction_skew(base_tensor &t_, const base_tensor &tc1_)
2038  : t(t_), tc1(tc1_) {}
2039  };
2040 
2041  struct ga_instruction_scalar_add : public ga_instruction {
2042  scalar_type &t;
2043  const scalar_type &c, &d;
2044  virtual int exec() {
2045  GA_DEBUG_INFO("Instruction: scalar addition");
2046  t = c + d;
2047  return 0;
2048  }
2049  ga_instruction_scalar_add(scalar_type &t_, const scalar_type &c_,
2050  const scalar_type &d_)
2051  : t(t_), c(c_), d(d_) {}
2052  };
2053 
2054  struct ga_instruction_scalar_sub : public ga_instruction {
2055  scalar_type &t;
2056  const scalar_type &c, &d;
2057  virtual int exec() {
2058  GA_DEBUG_INFO("Instruction: scalar subtraction");
2059  t = c - d;
2060  return 0;
2061  }
2062  ga_instruction_scalar_sub(scalar_type &t_, const scalar_type &c_,
2063  const scalar_type &d_)
2064  : t(t_), c(c_), d(d_) {}
2065  };
2066 
2067  struct ga_instruction_scalar_scalar_mult : public ga_instruction {
2068  scalar_type &t;
2069  const scalar_type &c, &d;
2070  virtual int exec() {
2071  GA_DEBUG_INFO("Instruction: scalar multiplication");
2072  t = c * d;
2073  return 0;
2074  }
2075  ga_instruction_scalar_scalar_mult(scalar_type &t_, const scalar_type &c_,
2076  const scalar_type &d_)
2077  : t(t_), c(c_), d(d_) {}
2078  };
2079 
2080  struct ga_instruction_scalar_scalar_div : public ga_instruction {
2081  scalar_type &t;
2082  const scalar_type &c, &d;
2083  virtual int exec() {
2084  GA_DEBUG_INFO("Instruction: scalar division");
2085  t = c / d;
2086  return 0;
2087  }
2088  ga_instruction_scalar_scalar_div(scalar_type &t_, const scalar_type &c_,
2089  const scalar_type &d_)
2090  : t(t_), c(c_), d(d_) {}
2091  };
2092 
2093  struct ga_instruction_scalar_mult : public ga_instruction {
2094  base_tensor &t, &tc1;
2095  const scalar_type &c;
2096  virtual int exec() {
2097  GA_DEBUG_INFO("Instruction: multiplication of a tensor by a scalar " << c);
2098  gmm::copy(gmm::scaled(tc1.as_vector(), c), t.as_vector());
2099  return 0;
2100  }
2101  ga_instruction_scalar_mult(base_tensor &t_, base_tensor &tc1_,
2102  const scalar_type &c_)
2103  : t(t_), tc1(tc1_), c(c_) {}
2104  };
2105 
2106  struct ga_instruction_scalar_div : public ga_instruction {
2107  base_tensor &t, &tc1;
2108  const scalar_type &c;
2109  virtual int exec() {
2110  GA_DEBUG_INFO("Instruction: division of a tensor by a scalar");
2111  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
2112 
2113  base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2114  for (; it != t.end(); ++it, ++it1) *it = *it1/c;
2115  return 0;
2116  }
2117  ga_instruction_scalar_div(base_tensor &t_, base_tensor &tc1_,
2118  const scalar_type &c_)
2119  : t(t_), tc1(tc1_), c(c_) {}
2120  };
2121 
2122  // Performs Cross product in the presence of test functions
2123  struct ga_instruction_cross_product_tf : public ga_instruction {
2124  base_tensor &t, &tc1, &tc2;
2125  bool inv;
2126  virtual int exec() {
2127  GA_DEBUG_INFO("Instruction: Cross product with test functions");
2128 
2129  size_type n1 = tc1.size() / 3, n2 = tc2.size() / 3, nn=n1*n2;
2130  GA_DEBUG_ASSERT(t.size() == nn*3, "Bad tensor size for cross product");
2131  size_type mm=2*nn, n1_2 = 2*n1, n2_2 = 2*n2;
2132  base_tensor::iterator it = t.begin(), it2 = tc2.begin();
2133 
2134  if (inv) {
2135  for (size_type i = 0; i < n2; ++i, ++it2) {
2136  base_tensor::iterator it1 = tc1.begin();
2137  for (size_type j = 0; j < n1; ++j, ++it, ++it1) {
2138  *it = - it1[n1] *it2[n2_2] + it1[n1_2]*it2[n2];
2139  it[nn] = - it1[n1_2]*it2[0] + it1[0] *it2[n2_2];
2140  it[mm] = - it1[0] *it2[n2] + it1[n1] *it2[0];
2141  }
2142  }
2143  } else {
2144  for (size_type i = 0; i < n2; ++i, ++it2) {
2145  base_tensor::iterator it1 = tc1.begin();
2146  for (size_type j = 0; j < n1; ++j, ++it, ++it1) {
2147  *it = it1[n1] *it2[n2_2] - it1[n1_2]*it2[n2];
2148  it[nn] = it1[n1_2]*it2[0] - it1[0] *it2[n2_2];
2149  it[mm] = it1[0] *it2[n2] - it1[n1] *it2[0];
2150  }
2151  }
2152  }
2153  return 0;
2154  }
2155  ga_instruction_cross_product_tf(base_tensor &t_, base_tensor &tc1_,
2156  base_tensor &tc2_, bool inv_)
2157  : t(t_), tc1(tc1_), tc2(tc2_), inv(inv_) {}
2158  };
2159 
2160  // Performs Cross product in the absence of test functions
2161  struct ga_instruction_cross_product : public ga_instruction {
2162  base_tensor &t, &tc1, &tc2;
2163  virtual int exec() {
2164  GA_DEBUG_INFO("Instruction: Cross product with test functions");
2165  GA_DEBUG_ASSERT(t.size() == 3 && tc1.size() == 3 && tc2.size() == 3,
2166  "Bad tensor size for cross product");
2167  t[0] = tc1[1]*tc2[2] - tc1[2]*tc2[1];
2168  t[1] = tc1[2]*tc2[0] - tc1[0]*tc2[2];
2169  t[2] = tc1[0]*tc2[1] - tc1[1]*tc2[0];
2170  return 0;
2171  }
2172  ga_instruction_cross_product(base_tensor &t_, base_tensor &tc1_,
2173  base_tensor &tc2_)
2174  : t(t_), tc1(tc1_), tc2(tc2_) {}
2175  };
2176 
2177 
2178 
2179 
2180  struct ga_instruction_dotmult : public ga_instruction {
2181  base_tensor &t, &tc1, &tc2;
2182  virtual int exec() {
2183  GA_DEBUG_INFO("Instruction: componentwise multiplication");
2184  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2185  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2186 
2187  base_tensor::iterator it = t.begin();
2188  for (size_type i = 0; i < s2; ++i)
2189  for (size_type m = 0; m < s1_1; ++m, ++it)
2190  *it = tc1[m+s1_1*i] * tc2[i];
2191  return 0;
2192  }
2193  ga_instruction_dotmult(base_tensor &t_, base_tensor &tc1_,
2194  base_tensor &tc2_)
2195  : t(t_), tc1(tc1_), tc2(tc2_) {}
2196  };
2197 
2198  struct ga_instruction_dotdiv : public ga_instruction {
2199  base_tensor &t, &tc1, &tc2;
2200  virtual int exec() {
2201  GA_DEBUG_INFO("Instruction: componentwise division");
2202  size_type s2 = tc2.size(), s1_1 = tc1.size() / s2;
2203  GA_DEBUG_ASSERT(t.size() == s1_1*s2, "Wrong sizes");
2204 
2205  base_tensor::iterator it = t.begin();
2206  for (size_type i = 0; i < s2; ++i)
2207  for (size_type m = 0; m < s1_1; ++m, ++it)
2208  *it = tc1[m+s1_1*i] / tc2[i];
2209  return 0;
2210  }
2211  ga_instruction_dotdiv(base_tensor &t_, base_tensor &tc1_,
2212  base_tensor &tc2_)
2213  : t(t_), tc1(tc1_), tc2(tc2_) {}
2214  };
2215 
2216  // Performs Ami Bni -> Cmni
2217  struct ga_instruction_dotmult_spec : public ga_instruction {
2218  base_tensor &t, &tc1, &tc2;
2219  virtual int exec() {
2220  GA_DEBUG_INFO("Instruction: specific componentwise multiplication");
2221  size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
2222  size_type s1_1 = tc1.size() / s2_2;
2223 
2224  base_tensor::iterator it = t.begin();
2225  for (size_type i = 0; i < s2_2; ++i)
2226  for (size_type n = 0; n < s2_1; ++n)
2227  for (size_type m = 0; m < s1_1; ++m, ++it)
2228  *it = tc1[m+s1_1*i] * tc2[n+s2_1*i];
2229  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2230  return 0;
2231  }
2232  ga_instruction_dotmult_spec(base_tensor &t_, base_tensor &tc1_,
2233  base_tensor &tc2_)
2234  : t(t_), tc1(tc1_), tc2(tc2_) {}
2235  };
2236 
2237  // Performs Amijik -> Cmjk. To be optimized
2238  struct ga_instruction_contract_1_1 : public ga_instruction {
2239  base_tensor &t, &tc1;
2240  size_type nn, ii2, ii3;
2241  virtual int exec() {
2242  GA_DEBUG_INFO("Instruction: single contraction on a single tensor");
2243 
2244  size_type ii1 = tc1.size() / (nn*nn*ii2*ii3);
2245 
2246  base_tensor::iterator it = t.begin();
2247  for (size_type i = 0; i < ii3; ++i)
2248  for (size_type j = 0; j < ii2; ++j)
2249  for (size_type k = 0; k < ii1; ++k, ++it) {
2250  *it = scalar_type(0);
2251  size_type pre_ind = k+j*ii1*nn+i*ii1*nn*ii2*nn;
2252  for (size_type n = 0; n < nn; ++n)
2253  *it += tc1[pre_ind+n*ii1+n*ii1*nn*ii2];
2254  }
2255 
2256  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2257  return 0;
2258  }
2259  ga_instruction_contract_1_1(base_tensor &t_, base_tensor &tc1_,
2260  size_type n_, size_type i2_, size_type i3_)
2261  : t(t_), tc1(tc1_), nn(n_), ii2(i2_), ii3(i3_) {}
2262  };
2263 
2264  // Performs Amijk Bnljp -> Cmniklp. To be optimized
2265  struct ga_instruction_contract_2_1 : public ga_instruction {
2266  base_tensor &t, &tc1, &tc2;
2267  size_type nn, ii1, ii2, ii3, ii4;
2268  virtual int exec() {
2269  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2270 
2271  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2272  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2273 
2274  base_tensor::iterator it = t.begin();
2275  for (size_type i = 0; i < ii4; ++i)
2276  for (size_type j = 0; j < ii3; ++j)
2277  for (size_type k = 0; k < ii2; ++k)
2278  for (size_type l = 0; l < ii1; ++l)
2279  for (size_type p = 0; p < ift2; ++p)
2280  for (size_type q = 0; q < ift1; ++q, ++it) {
2281  *it = scalar_type(0);
2282  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2283  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2284  for (size_type n = 0; n < nn; ++n)
2285  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2286  }
2287 
2288  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2289  return 0;
2290  }
2291  ga_instruction_contract_2_1(base_tensor &t_, base_tensor &tc1_,
2292  base_tensor &tc2_,
2293  size_type n_, size_type i1_, size_type i2_,
2294  size_type i3_, size_type i4_)
2295  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2296  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2297  };
2298 
2299  // Performs Amijk Bnljp -> Cnmiklp. To be optimized
2300  struct ga_instruction_contract_2_1_rev : public ga_instruction {
2301  base_tensor &t, &tc1, &tc2;
2302  size_type nn, ii1, ii2, ii3, ii4;
2303  virtual int exec() {
2304  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2305 
2306  size_type ift1 = tc1.size() / (nn*ii1*ii2);
2307  size_type ift2 = tc2.size() / (nn*ii3*ii4);
2308 
2309  base_tensor::iterator it = t.begin();
2310  for (size_type i = 0; i < ii4; ++i)
2311  for (size_type j = 0; j < ii3; ++j)
2312  for (size_type k = 0; k < ii2; ++k)
2313  for (size_type l = 0; l < ii1; ++l)
2314  for (size_type q = 0; q < ift1; ++q)
2315  for (size_type p = 0; p < ift2; ++p, ++it) {
2316  *it = scalar_type(0);
2317  size_type ind1 = q+l*ift1+k*ift1*ii1*nn;
2318  size_type ind2 = p+j*ift2+i*ift2*ii3*nn;
2319  for (size_type n = 0; n < nn; ++n)
2320  *it += tc1[ind1+n*ift1*ii1] * tc2[ind2+n*ift2*ii3];
2321  }
2322 
2323  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2324  return 0;
2325  }
2326  ga_instruction_contract_2_1_rev(base_tensor &t_, base_tensor &tc1_,
2327  base_tensor &tc2_,
2328  size_type n_, size_type i1_, size_type i2_,
2329  size_type i3_, size_type i4_)
2330  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_),
2331  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_) {}
2332  };
2333 
2334  // Performs Amijklp Bnqjrls -> Cmnikpqrs. To be optimized
2335  struct ga_instruction_contract_2_2 : public ga_instruction {
2336  base_tensor &t, &tc1, &tc2;
2337  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2338  bool inv_tc2;
2339  virtual int exec() {
2340  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2341 
2342  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2343  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2344 
2345  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2346  if (inv_tc2) std::swap(sn1, sn2);
2347 
2348  base_tensor::iterator it = t.begin();
2349  for (size_type i = 0; i < ii6; ++i)
2350  for (size_type j = 0; j < ii5; ++j)
2351  for (size_type k = 0; k < ii4; ++k)
2352  for (size_type l = 0; l < ii3; ++l)
2353  for (size_type p = 0; p < ii2; ++p)
2354  for (size_type q = 0; q < ii1; ++q)
2355  for (size_type r = 0; r < ift2; ++r)
2356  for (size_type s = 0; s < ift1; ++s, ++it) {
2357  *it = scalar_type(0);
2358  size_type ind1
2359  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2360  size_type ind2
2361  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2362  for (size_type n1 = 0; n1 < nn1; ++n1)
2363  for (size_type n2 = 0; n2 < nn2; ++n2)
2364  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2365  * tc2[ind2+n1*sn1+n2*sn2];
2366  }
2367 
2368  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2369  return 0;
2370  }
2371  ga_instruction_contract_2_2(base_tensor &t_, base_tensor &tc1_,
2372  base_tensor &tc2_,
2373  size_type n1_, size_type n2_,
2374  size_type i1_, size_type i2_, size_type i3_,
2375  size_type i4_, size_type i5_, size_type i6_,
2376  bool intc2)
2377  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2378  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2379  inv_tc2(intc2) {}
2380  };
2381 
2382  // Performs Amijklp Bnqjrls -> Cnmikpqrs. To be optimized
2383  struct ga_instruction_contract_2_2_rev : public ga_instruction {
2384  base_tensor &t, &tc1, &tc2;
2385  size_type nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6;
2386  bool inv_tc2;
2387  virtual int exec() {
2388  GA_DEBUG_INFO("Instruction: single contraction on two tensors");
2389 
2390  size_type ift1 = tc1.size() / (nn1*nn2*ii1*ii2*ii3);
2391  size_type ift2 = tc2.size() / (nn1*nn2*ii3*ii4*ii5);
2392 
2393  size_type sn1 = ift2*ii4, sn2 = ift2*ii4*nn1*ii5;
2394  if (inv_tc2) std::swap(sn1, sn2);
2395 
2396  base_tensor::iterator it = t.begin();
2397  for (size_type i = 0; i < ii6; ++i)
2398  for (size_type j = 0; j < ii5; ++j)
2399  for (size_type k = 0; k < ii4; ++k)
2400  for (size_type l = 0; l < ii3; ++l)
2401  for (size_type p = 0; p < ii2; ++p)
2402  for (size_type q = 0; q < ii1; ++q)
2403  for (size_type s = 0; s < ift1; ++s)
2404  for (size_type r = 0; r < ift2; ++r, ++it) {
2405  *it = scalar_type(0);
2406  size_type ind1
2407  = s+q*ift1+p*ift1*ii1*nn1+l*ift1*ii1*nn1*ii2*nn2;
2408  size_type ind2
2409  = r+k*ift2+j*ift2*ii4*nn1+i*ift2*ii4*nn1*ii5*nn2;
2410  for (size_type n1 = 0; n1 < nn1; ++n1)
2411  for (size_type n2 = 0; n2 < nn2; ++n2)
2412  *it += tc1[ind1+n1*ift1*ii1+n2*ift1*ii1*nn1*ii2]
2413  * tc2[ind2+n1*sn1+n2*sn2];
2414  }
2415 
2416  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2417  return 0;
2418  }
2419  ga_instruction_contract_2_2_rev(base_tensor &t_, base_tensor &tc1_,
2420  base_tensor &tc2_,
2421  size_type n1_, size_type n2_,
2422  size_type i1_, size_type i2_, size_type i3_,
2423  size_type i4_, size_type i5_, size_type i6_,
2424  bool intc2)
2425  : t(t_), tc1(tc1_), tc2(tc2_), nn1(n1_), nn2(n2_),
2426  ii1(i1_), ii2(i2_), ii3(i3_), ii4(i4_), ii5(i5_), ii6(i6_),
2427  inv_tc2(intc2) {}
2428  };
2429 
2430 
2431  // Performs Amj Bjk -> Cmk. To be optimized
2432  struct ga_instruction_matrix_mult : public ga_instruction {
2433  base_tensor &t, &tc1, &tc2;
2434  size_type n;
2435  virtual int exec() {
2436  GA_DEBUG_INFO("Instruction: order one contraction "
2437  "(dot product or matrix multiplication)");
2438 
2439  size_type s1 = tc1.size() / n;
2440  size_type s2 = tc2.size() / n;
2441 
2442  base_tensor::iterator it = t.begin();
2443  for (size_type k = 0; k < s2; ++k)
2444  for (size_type i = 0; i < s1; ++i, ++it) {
2445  *it = scalar_type(0);
2446  for (size_type j = 0; j < n; ++j)
2447  *it += tc1[i+j*s1] * tc2[j+k*n];
2448  }
2449  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2450  return 0;
2451  }
2452  ga_instruction_matrix_mult(base_tensor &t_, base_tensor &tc1_,
2453  base_tensor &tc2_, size_type n_)
2454  : t(t_), tc1(tc1_), tc2(tc2_), n(n_) {}
2455  };
2456 
2457  // Performs Amij Bnjk -> Cmnik. To be optimized
2458  struct ga_instruction_matrix_mult_spec : public ga_instruction {
2459  base_tensor &t, &tc1, &tc2;
2460  size_type n, m, p; // tc1 of size q*m*n, tc2 of size l*n*p
2461  // t of size q*l*m*p
2462  virtual int exec() {
2463  GA_DEBUG_INFO("Instruction: specific order one contraction "
2464  "(dot product or matrix multiplication)");
2465  size_type q = tc1.size() / (m * n);
2466  size_type l = tc2.size() / (p * n);
2467 
2468  base_tensor::iterator it = t.begin();
2469  for (size_type r = 0; r < p; ++r)
2470  for (size_type k = 0; k < m; ++k)
2471  for (size_type j = 0; j < l; ++j)
2472  for (size_type i = 0; i < q; ++i, ++it) {
2473  *it = scalar_type(0);
2474  for (size_type s = 0; s < n; ++s)
2475  *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2476  }
2477  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2478  return 0;
2479  }
2480  ga_instruction_matrix_mult_spec(base_tensor &t_, base_tensor &tc1_,
2481  base_tensor &tc2_, size_type n_,
2482  size_type m_, size_type p_)
2483  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2484  };
2485 
2486  // Performs Amij Bnjk -> Cnmik. To be optimized
2487  struct ga_instruction_matrix_mult_spec2 : public ga_instruction {
2488  base_tensor &t, &tc1, &tc2;
2489  size_type n, m, p; // tc1 of size q*m*n, tc2 of size l*n*p
2490  // t of size l*q*m*p
2491  virtual int exec() {
2492  GA_DEBUG_INFO("Instruction: specific order one contraction "
2493  "(dot product or matrix multiplication)");
2494  size_type q = tc1.size() / (m * n);
2495  size_type l = tc2.size() / (p * n);
2496 
2497  base_tensor::iterator it = t.begin();
2498  for (size_type r = 0; r < p; ++r)
2499  for (size_type k = 0; k < m; ++k)
2500  for (size_type i = 0; i < q; ++i)
2501  for (size_type j = 0; j < l; ++j, ++it) {
2502  *it = scalar_type(0);
2503  for (size_type s = 0; s < n; ++s)
2504  *it += tc1[i+k*q+s*q*m] * tc2[j+s*l+r*l*n];
2505  }
2506  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
2507  return 0;
2508  }
2509  ga_instruction_matrix_mult_spec2(base_tensor &t_, base_tensor &tc1_,
2510  base_tensor &tc2_, size_type n_,
2511  size_type m_, size_type p_)
2512  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), m(m_), p(p_) {}
2513  };
2514 
2515  // Performs Ani Bmi -> Cmn
2516  struct ga_instruction_contraction : public ga_instruction {
2517  base_tensor &t, &tc1, &tc2;
2518  size_type nn;
2519  virtual int exec() {
2520  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn);
2521 #if GA_USES_BLAS
2522  long m = int(tc1.size()/nn), k = int(nn), n = int(tc2.size()/nn);
2523  long lda = m, ldb = n, ldc = m;
2524  char T = 'T', N = 'N';
2525  scalar_type alpha(1), beta(0);
2526  gmm::dgemm_(&N, &T, &m, &n, &k, &alpha, &(tc1[0]), &lda, &(tc2[0]), &ldb,
2527  &beta, &(t[0]), &ldc);
2528 #else
2529  size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2530  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2531 
2532  auto it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2533  for (auto it = t.begin(); it != t.end(); ++it) {
2534  auto it11 = it1, it22 = it2;
2535  scalar_type a = (*it11) * (*it22);
2536  for (size_type i = 1; i < nn; ++i)
2537  { it11 += s1; it22 += s2; a += (*it11) * (*it22); }
2538  *it = a;
2539  ++it2; if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2540  }
2541  // auto it = t.begin(); // Unoptimized version.
2542  // for (size_type i = 0; i < s1; ++i)
2543  // for (size_type j = 0; j < s2; ++j, ++it) {
2544  // *it = scalar_type(0);
2545  // for (size_type k = 0; k < nn; ++k)
2546  // *it += tc1[i+k*s1] * tc2[j+k*s2];
2547  // }
2548 #endif
2549  return 0;
2550  }
2551  ga_instruction_contraction(base_tensor &t_, base_tensor &tc1_,
2552  base_tensor &tc2_, size_type n_)
2553  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2554  };
2555 
2556  // Performs Ani Bmi -> Cmn
2557  struct ga_instruction_contraction_opt0_2 : public ga_instruction {
2558  base_tensor &t, &tc1, &tc2;
2559  size_type n, q;
2560  virtual int exec() {
2561  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2562  " optimized for vectorized second tensor of type 2");
2563  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2564  size_type s1_qq = s1*q, s2_qq = s2*q;
2565  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2566 
2567  auto it = t.begin(), it1 = tc1.begin();
2568  for (size_type i = 0; i < s1; ++i, ++it1) {
2569  auto it2 = tc2.begin();
2570  for (size_type j = 0; j < s2_q; ++j) {
2571  if (j) it2+=q;
2572  auto itt1 = it1;
2573  for (size_type l = 0; l < q; ++l, ++it) {
2574  if (l) itt1 += s1;
2575  auto ittt1 = itt1, ittt2 = it2;
2576  *it = *ittt1 * (*ittt2);
2577  for (size_type m = 1; m < n; ++m) {
2578  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2579  }
2580  }
2581  }
2582  }
2583  // base_tensor u = t;
2584  // ga_instruction_contraction toto(t, tc1, tc2, n*q);
2585  // toto.exec();
2586  // GMM_ASSERT1(gmm::vect_dist2(t.as_vector(), u.as_vector()) < 1E-9, "Erroneous");
2587  return 0;
2588  }
2589  ga_instruction_contraction_opt0_2(base_tensor &t_, base_tensor &tc1_,
2590  base_tensor &tc2_, size_type n_,
2591  size_type q_)
2592  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) {}
2593  };
2594 
2595  // Performs Ani Bmi -> Cmn
2596  template <int N>
2597  struct ga_instruction_contraction_opt0_2_unrolled : public ga_instruction {
2598  base_tensor &t, &tc1, &tc2;
2599  size_type q;
2600  virtual int exec() {
2601  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*q <<
2602  " optimized for vectorized second tensor of type 2");
2603  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_q = s2/q;
2604  size_type s1_qq = s1*q, s2_qq = s2*q;
2605  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2606 
2607  auto it = t.begin(), it1 = tc1.begin();
2608  for (size_type i = 0; i < s1; ++i, ++it1) {
2609  auto it2 = tc2.begin();
2610  for (size_type j = 0; j < s2_q; ++j) {
2611  if (j) it2+=q;
2612  auto itt1 = it1;
2613  for (size_type l = 0; l < q; ++l, ++it) {
2614  if (l) itt1 += s1;
2615  auto ittt1 = itt1, ittt2 = it2;
2616  *it = *ittt1 * (*ittt2);
2617  for (size_type m = 1; m < N; ++m) {
2618  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2619  }
2620  }
2621  }
2622  }
2623  return 0;
2624  }
2625  ga_instruction_contraction_opt0_2_unrolled(base_tensor &t_, base_tensor &tc1_,
2626  base_tensor &tc2_, size_type q_)
2627  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2628  };
2629 
2630  // Performs Ani Bmi -> Cmn
2631  template <int N, int Q>
2632  struct ga_instruction_contraction_opt0_2_dunrolled : public ga_instruction {
2633  base_tensor &t, &tc1, &tc2;
2634  virtual int exec() {
2635  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*Q
2636  << " optimized for vectorized second tensor of type 2");
2637  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q), s2_q = s2/Q;
2638  size_type s1_qq = s1*Q, s2_qq = s2*Q;
2639  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2640 
2641  auto it = t.begin(), it1 = tc1.begin();
2642  for (size_type i = 0; i < s1; ++i, ++it1) {
2643  auto it2 = tc2.begin();
2644  for (size_type j = 0; j < s2_q; ++j) {
2645  if (j) it2+=Q;
2646  auto itt1 = it1;
2647  for (size_type l = 0; l < Q; ++l, ++it) {
2648  if (l) itt1 += s1;
2649  auto ittt1 = itt1, ittt2 = it2;
2650  *it = *ittt1 * (*ittt2);
2651  for (size_type m = 1; m < N; ++m) {
2652  ittt1 += s1_qq, ittt2 += s2_qq; *it += *ittt1 * (*ittt2);
2653  }
2654  }
2655  }
2656  }
2657  return 0;
2658  }
2659  ga_instruction_contraction_opt0_2_dunrolled
2660  (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2661  : t(t_), tc1(tc1_), tc2(tc2_) {}
2662  };
2663 
2664  // Performs Ani Bmi -> Cmn
2665  struct ga_instruction_contraction_opt2_0 : public ga_instruction {
2666  base_tensor &t, &tc1, &tc2;
2667  size_type n, q;
2668  virtual int exec() {
2669  GA_DEBUG_INFO("Instruction: contraction operation of size " << n*q <<
2670  " optimized for vectorized second tensor of type 2");
2671  size_type nn = n*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2672  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2673  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2674 
2675  auto it = t.begin();
2676  for (size_type i = 0; i < s1_q; ++i) {
2677  auto it1 = tc1.begin() + i*q;
2678  for (size_type l = 0; l < q; ++l) {
2679  auto it2 = tc2.begin() + l*s2;
2680  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2681  auto itt1 = it1, itt2 = it2;
2682  *it = *itt1 * (*itt2);
2683  for (size_type m = 1; m < n; ++m) {
2684  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2685  }
2686  }
2687  }
2688  }
2689  return 0;
2690  }
2691  ga_instruction_contraction_opt2_0(base_tensor &t_, base_tensor &tc1_,
2692  base_tensor &tc2_, size_type n_,
2693  size_type q_)
2694  : t(t_), tc1(tc1_), tc2(tc2_), n(n_), q(q_) { }
2695  };
2696 
2697  // Performs Ani Bmi -> Cmn
2698  template <int N>
2699  struct ga_instruction_contraction_opt2_0_unrolled : public ga_instruction {
2700  base_tensor &t, &tc1, &tc2;
2701  size_type q;
2702  virtual int exec() {
2703  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*q
2704  << " optimized for vectorized second tensor of type 2");
2705  size_type nn = N*q, s1 = tc1.size()/nn, s2 = tc2.size()/nn;
2706  size_type s1_q = s1/q, s1_qq = s1*q, s2_qq = s2*q;
2707  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2708 
2709  auto it = t.begin(), it1 = tc1.begin();
2710  for (size_type i = 0; i < s1_q; ++i, it1 += q) {
2711  for (size_type l = 0; l < q; ++l) {
2712  auto it2 = tc2.begin() + l*s2;
2713  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2714  auto itt1 = it1, itt2 = it2;
2715  *it = *itt1 * (*itt2);
2716  for (size_type m = 1; m < N; ++m) {
2717  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2718  }
2719  }
2720  }
2721  }
2722  return 0;
2723  }
2724  ga_instruction_contraction_opt2_0_unrolled(base_tensor &t_, base_tensor &tc1_,
2725  base_tensor &tc2_, size_type q_)
2726  : t(t_), tc1(tc1_), tc2(tc2_), q(q_) {}
2727  };
2728 
2729  // Performs Ani Bmi -> Cmn
2730  template <int N, int Q>
2731  struct ga_instruction_contraction_opt2_0_dunrolled : public ga_instruction {
2732  base_tensor &t, &tc1, &tc2;
2733  virtual int exec() {
2734  GA_DEBUG_INFO("Instruction: unrolled contraction of size " << N*Q
2735  << " optimized for vectorized second tensor of type 2");
2736  size_type s1 = tc1.size()/(N*Q), s2 = tc2.size()/(N*Q);
2737  size_type s1_q = s1/Q, s1_qq = s1*Q, s2_qq = s2*Q;
2738  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error");
2739 
2740  auto it = t.begin(), it1 = tc1.begin();
2741  for (size_type i = 0; i < s1_q; ++i, it1 += Q) {
2742  for (size_type l = 0; l < Q; ++l) {
2743  auto it2 = tc2.begin() + l*s2;
2744  for (size_type j = 0; j < s2; ++j, ++it, ++it2) {
2745  auto itt1 = it1, itt2 = it2;
2746  *it = *itt1 * (*itt2);
2747  for (size_type m = 1; m < N; ++m) {
2748  itt1 += s1_qq, itt2 += s2_qq; *it += *itt1 * (*itt2);
2749  }
2750  }
2751  }
2752  }
2753  return 0;
2754  }
2755  ga_instruction_contraction_opt2_0_dunrolled
2756  (base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2757  : t(t_), tc1(tc1_), tc2(tc2_) {}
2758  };
2759 
2760  // Performs Ani Bmi -> Cmn
2761  struct ga_instruction_contraction_opt0_1 : public ga_instruction {
2762  base_tensor &t, &tc1, &tc2;
2763  size_type nn;
2764  virtual int exec() {
2765  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
2766  " optimized for vectorized second tensor of type 1");
2767  size_type ss1=tc1.size(), s1 = ss1/nn, s2=tc2.size()/nn, s2_n=s2/nn;
2768 
2769  auto it = t.begin(), it1 = tc1.begin();
2770  for (size_type i = 0; i < s1; ++i, ++it1) {
2771  auto it2 = tc2.begin();
2772  for (size_type j = 0; j < s2_n; ++j) {
2773  if (j) it2 += nn;
2774  auto itt1 = it1;
2775  *it++ = (*itt1) * (*it2);
2776  for (size_type k = 1; k < nn; ++k)
2777  { itt1 += s1; *it++ = (*itt1) * (*it2); }
2778  }
2779  }
2780  return 0;
2781  }
2782  ga_instruction_contraction_opt0_1(base_tensor &t_, base_tensor &tc1_,
2783  base_tensor &tc2_, size_type n_)
2784  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2785  };
2786 
2787  template<int N> inline void reduc_elem_unrolled_opt1_
2788  (const base_vector::iterator &it, const base_vector::iterator &it1,
2789  scalar_type a, size_type s1) {
2790  it[N-1] = it1[(N-1)*s1] * a;
2791  reduc_elem_unrolled_opt1_<N-1>(it, it1, a, s1);
2792  }
2793  template<> inline void reduc_elem_unrolled_opt1_<1>
2794  (const base_vector::iterator &it, const base_vector::iterator &it1,
2795  scalar_type a, size_type /* s1 */)
2796  { *it = (*it1) * a; }
2797 
2798  // Performs Ani Bmi -> Cmn
2799  template <int N>
2800  struct ga_instruction_contraction_opt0_1_unrolled : public ga_instruction {
2801  base_tensor &t, &tc1, &tc2;
2802  virtual int exec() {
2803  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N
2804  << " optimized for vectorized second tensor of type 1");
2805  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2806  auto it = t.begin(), it1 = tc1.begin();
2807  for (size_type i = 0; i < s1; ++i, ++it1) {
2808  auto it2 = tc2.begin(), it2e = it2 + s2;
2809  for (; it2 != it2e; it2 += N, it += N)
2810  reduc_elem_unrolled_opt1_<N>(it, it1, *it2, s1);
2811  }
2812  return 0;
2813  }
2814  ga_instruction_contraction_opt0_1_unrolled(base_tensor &t_, base_tensor &tc1_,
2815  base_tensor &tc2_)
2816  : t(t_), tc1(tc1_), tc2(tc2_) {}
2817  };
2818 
2819  // Performs Ani Bmi -> Cmn
2820  struct ga_instruction_contraction_opt1_1 : public ga_instruction {
2821  base_tensor &t, &tc1, &tc2;
2822  size_type nn;
2823  virtual int exec() {
2824  GA_DEBUG_INFO("Instruction: contraction operation of size " << nn <<
2825  " optimized for both vectorized tensor of type 1");
2826  size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
2827  GA_DEBUG_ASSERT(t.size() == s2*s1, "Internal error");
2828  size_type ss1 = s1/nn, ss2 = s2/nn;
2829 
2830  // std::fill(t.begin(), t.end(), scalar_type(0)); // Factorized
2831  auto it2 = tc2.begin();
2832  for (size_type j = 0; j < ss2; ++j) {
2833  if (j) it2 += nn;
2834  auto it1 = tc1.begin(), it = t.begin() + j*nn;
2835  for (size_type i = 0; i < ss1; ++i) {
2836  if (i) { it1 += nn, it += s2*nn; }
2837  scalar_type a = (*it1) * (*it2);
2838  auto itt = it;
2839  *itt = a; itt += s2_1; *itt = a;
2840  for (size_type k = 2; k < nn; ++k) { itt += s2_1; *itt = a; }
2841  }
2842  }
2843  return 0;
2844  }
2845  ga_instruction_contraction_opt1_1(base_tensor &t_, base_tensor &tc1_,
2846  base_tensor &tc2_, size_type n_)
2847  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
2848  };
2849 
2850 
2851 
2852  template<int N> inline scalar_type reduc_elem_unrolled__
2853  (base_tensor::iterator &it1, base_tensor::iterator &it2,
2854  size_type s1, size_type s2) {
2855  return (it1[(N-1)*s1])*(it2[(N-1)*s2])
2856  + reduc_elem_unrolled__<N-1>(it1, it2, s1, s2);
2857  }
2858  template<> inline scalar_type reduc_elem_unrolled__<1>
2859  (base_tensor::iterator &it1, base_tensor::iterator &it2,
2860  size_type /*s1*/, size_type /*s2*/)
2861  { return (*it1)*(*it2); }
2862 
2863  // Performs Ani Bmi -> Cmn. Unrolled operation.
2864  template<int N> struct ga_instruction_contraction_unrolled
2865  : public ga_instruction {
2866  base_tensor &t, &tc1, &tc2;
2867  virtual int exec() {
2868  GA_DEBUG_INFO("Instruction: unrolled contraction operation of size " << N);
2869  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2870  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error, " << t.size()
2871  << " != " << s1 << "*" << s2);
2872  base_tensor::iterator it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
2873  for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
2874  *it = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2875  ++it2; if (it2 == it2end) { it2 = tc2.begin(), ++it1; }
2876  }
2877  return 0;
2878  }
2879  ga_instruction_contraction_unrolled(base_tensor &t_, base_tensor &tc1_,
2880  base_tensor &tc2_)
2881  : t(t_), tc1(tc1_), tc2(tc2_) {}
2882  };
2883 
2884  template<int N, int S2> inline void reduc_elem_d_unrolled__
2885  (base_tensor::iterator &it, base_tensor::iterator &it1,
2886  base_tensor::iterator &it2, size_type s1, size_type s2) {
2887  *it++ = reduc_elem_unrolled__<N>(it1, it2, s1, s2);
2888  reduc_elem_d_unrolled__<N, S2-1>(it, it1, ++it2, s1, s2);
2889  }
2890  // A Repeated definition is following because partial specialization
2891  // of functions is not allowed in C++ for the moment.
2892  // The gain in assembly time is small compared to the simply unrolled version
2893  template<> inline void reduc_elem_d_unrolled__<1, 0>
2894  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2895  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2896  template<> inline void reduc_elem_d_unrolled__<2, 0>
2897  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2898  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2899  template<> inline void reduc_elem_d_unrolled__<3, 0>
2900  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2901  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2902  template<> inline void reduc_elem_d_unrolled__<4, 0>
2903  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2904  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2905  template<> inline void reduc_elem_d_unrolled__<5, 0>
2906  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2907  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2908  template<> inline void reduc_elem_d_unrolled__<6, 0>
2909  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2910  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2911  template<> inline void reduc_elem_d_unrolled__<7, 0>
2912  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2913  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2914  template<> inline void reduc_elem_d_unrolled__<8, 0>
2915  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2916  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2917  template<> inline void reduc_elem_d_unrolled__<9, 0>
2918  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2919  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2920  template<> inline void reduc_elem_d_unrolled__<10, 0>
2921  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2922  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2923  template<> inline void reduc_elem_d_unrolled__<11, 0>
2924  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2925  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2926  template<> inline void reduc_elem_d_unrolled__<12, 0>
2927  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2928  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2929  template<> inline void reduc_elem_d_unrolled__<13, 0>
2930  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2931  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2932  template<> inline void reduc_elem_d_unrolled__<14, 0>
2933  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2934  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2935  template<> inline void reduc_elem_d_unrolled__<15, 0>
2936  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2937  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2938  template<> inline void reduc_elem_d_unrolled__<16, 0>
2939  (base_tensor::iterator &/* it */, base_tensor::iterator &/* it1 */,
2940  base_tensor::iterator &/* it2 */, size_type /* s1 */, size_type /* s2 */) { }
2941 
2942  // Performs Ani Bmi -> Cmn. Automatically doubly unrolled operation
2943  // (for uniform meshes).
2944  template<int N, int S2> struct ga_ins_red_d_unrolled
2945  : public ga_instruction {
2946  base_tensor &t, &tc1, &tc2;
2947  virtual int exec() {
2948  GA_DEBUG_INFO("Instruction: doubly unrolled contraction operation of size "
2949  << S2 << "x" << N);
2950  size_type s1 = tc1.size()/N, s2 = tc2.size()/N;
2951  GA_DEBUG_ASSERT(s2 == S2, "Internal error");
2952  GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error, " << t.size()
2953  << " != " << s1 << "*" << s2);
2954  base_tensor::iterator it = t.begin(), it1 = tc1.begin();
2955  for (size_type ii = 0; ii < s1; ++ii, ++it1) {
2956  base_tensor::iterator it2 = tc2.begin();
2957  reduc_elem_d_unrolled__<N, S2>(it, it1, it2, s1, s2);
2958  }
2959  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
2960  return 0;
2961  }
2962  ga_ins_red_d_unrolled(base_tensor &t_, base_tensor &tc1_, base_tensor &tc2_)
2963  : t(t_), tc1(tc1_), tc2(tc2_) {}
2964  };
2965 
2966 
2967  pga_instruction ga_instruction_contraction_switch
2968  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
2969  size_type n, bool &to_clear) {
2970  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
2971 
2972  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
2973  tc1_.qdim() == n && tc2_.qdim() == n) {
2974  to_clear = true;
2975  t_.set_sparsity(10, tc1_.qdim());
2976  return std::make_shared<ga_instruction_contraction_opt1_1>(t, tc1, tc2, n);
2977  }
2978 
2979  if (tc2_.sparsity() == 1) {
2980  switch(n) {
2981  case 2:
2982  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
2983  (t, tc1, tc2);
2984  case 3:
2985  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
2986  (t, tc1, tc2);
2987  case 4:
2988  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
2989  (t, tc1, tc2);
2990  case 5:
2991  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
2992  (t, tc1, tc2);
2993  default:
2994  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
2995  }
2996  }
2997  if (tc2_.sparsity() == 2) {
2998  size_type q2 = tc2.sizes()[1];
2999  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3000  if (n2*q2 == n) {
3001  switch (n2) {
3002  case 1:
3003  switch (q2) {
3004  case 2:
3005  return
3006  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3007  (t, tc1, tc2);
3008  case 3:
3009  return
3010  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3011  (t, tc1, tc2);
3012  case 4:
3013  return
3014  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3015  (t, tc1, tc2);
3016  default :
3017  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3018  (t, tc1, tc2, q2);
3019  }
3020  case 2:
3021  switch (q2) {
3022  case 2:
3023  return
3024  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3025  (t, tc1, tc2);
3026  case 3:
3027  return
3028  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3029  (t, tc1, tc2);
3030  case 4:
3031  return
3032  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3033  (t, tc1, tc2);
3034  default :
3035  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3036  (t, tc1, tc2, q2);
3037  }
3038  case 3:
3039  switch (q2) {
3040  case 2:
3041  return
3042  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3043  (t, tc1, tc2);
3044  case 3:
3045  return
3046  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3047  (t, tc1, tc2);
3048  case 4:
3049  return
3050  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3051  (t, tc1, tc2);
3052  default :
3053  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3054  (t, tc1, tc2, q2);
3055  }
3056  case 4:
3057  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3058  (t, tc1, tc2, q2);
3059  case 5:
3060  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3061  (t, tc1, tc2, q2);
3062  default:
3063  return std::make_shared<ga_instruction_contraction_opt0_2>
3064  (t,tc1,tc2,n2,q2);
3065  }
3066  }
3067  }
3068  if (tc1_.sparsity() == 2) {
3069  size_type q1 = tc1.sizes()[1];
3070  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3071  if (n1*q1 == n) {
3072  switch (n1) {
3073  case 1:
3074  switch (q1) {
3075  case 2:
3076  return
3077  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3078  (t, tc1, tc2);
3079  case 3:
3080  return
3081  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3082  (t, tc1, tc2);
3083  case 4:
3084  return
3085  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3086  (t, tc1, tc2);
3087  default :
3088  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3089  (t, tc1, tc2, q1);
3090  }
3091  case 2:
3092  switch (q1) {
3093  case 2:
3094  return
3095  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3096  (t, tc1, tc2);
3097  case 3:
3098  return
3099  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3100  (t, tc1, tc2);
3101  case 4:
3102  return
3103  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3104  (t, tc1, tc2);
3105  default :
3106  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3107  (t, tc1, tc2, q1);
3108  }
3109  case 3:
3110  switch (q1) {
3111  case 2:
3112  return
3113  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3114  (t, tc1, tc2);
3115  case 3:
3116  return
3117  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3118  (t, tc1, tc2);
3119  case 4:
3120  return
3121  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3122  (t, tc1, tc2);
3123  default :
3124  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3125  (t, tc1, tc2, q1);
3126  }
3127  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3128  (t, tc1, tc2, q1);
3129  case 4:
3130  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3131  (t, tc1, tc2, q1);
3132  case 5:
3133  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3134  (t, tc1, tc2, q1);
3135  default:
3136  return std::make_shared<ga_instruction_contraction_opt2_0>
3137  (t,tc1,tc2, n1, q1);
3138  }
3139  }
3140  }
3141 
3142  switch(n) {
3143  case 2 : return std::make_shared<ga_instruction_contraction_unrolled< 2>>
3144  (t, tc1, tc2);
3145  case 3 : return std::make_shared<ga_instruction_contraction_unrolled< 3>>
3146  (t, tc1, tc2);
3147  case 4 : return std::make_shared<ga_instruction_contraction_unrolled< 4>>
3148  (t, tc1, tc2);
3149  case 5 : return std::make_shared<ga_instruction_contraction_unrolled< 5>>
3150  (t, tc1, tc2);
3151  case 6 : return std::make_shared<ga_instruction_contraction_unrolled< 6>>
3152  (t, tc1, tc2);
3153  case 7 : return std::make_shared<ga_instruction_contraction_unrolled< 7>>
3154  (t, tc1, tc2);
3155  case 8 : return std::make_shared<ga_instruction_contraction_unrolled< 8>>
3156  (t, tc1, tc2);
3157  case 9 : return std::make_shared<ga_instruction_contraction_unrolled< 9>>
3158  (t, tc1, tc2);
3159  case 10 : return std::make_shared<ga_instruction_contraction_unrolled<10>>
3160  (t, tc1, tc2);
3161  case 11 : return std::make_shared<ga_instruction_contraction_unrolled<11>>
3162  (t, tc1, tc2);
3163  case 12 : return std::make_shared<ga_instruction_contraction_unrolled<12>>
3164  (t, tc1, tc2);
3165  case 13 : return std::make_shared<ga_instruction_contraction_unrolled<13>>
3166  (t, tc1, tc2);
3167  case 14 : return std::make_shared<ga_instruction_contraction_unrolled<14>>
3168  (t, tc1, tc2);
3169  case 15 : return std::make_shared<ga_instruction_contraction_unrolled<15>>
3170  (t, tc1, tc2);
3171  case 16 : return std::make_shared<ga_instruction_contraction_unrolled<16>>
3172  (t, tc1, tc2);
3173  default : return std::make_shared<ga_instruction_contraction>
3174  (t, tc1, tc2, n);
3175  }
3176  }
3177 
3178  pga_instruction ga_uniform_instruction_contraction_switch
3179  (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
3180  size_type n, bool &to_clear) {
3181  base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
3182 
3183  if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
3184  tc1_.qdim() == n && tc2_.qdim() == n) {
3185  to_clear = true;
3186  t_.set_sparsity(10, tc1_.qdim());
3187  return std::make_shared<ga_instruction_contraction_opt1_1>(t,tc1,tc2,n);
3188  }
3189  if (tc2_.sparsity() == 1) {
3190  switch(n) {
3191  case 2:
3192  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<2>>
3193  (t, tc1, tc2);
3194  case 3:
3195  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<3>>
3196  (t, tc1, tc2);
3197  case 4:
3198  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<4>>
3199  (t, tc1, tc2);
3200  case 5:
3201  return std::make_shared<ga_instruction_contraction_opt0_1_unrolled<5>>
3202  (t, tc1, tc2);
3203  default:
3204  return std::make_shared<ga_instruction_contraction_opt0_1>(t,tc1,tc2, n);
3205  }
3206  }
3207  if (tc2_.sparsity() == 2) {
3208  size_type q2 = tc2.sizes()[1];
3209  size_type n2 = (tc2.sizes().size() > 2) ? tc2.sizes()[1] : 1;
3210  if (n2*q2 == n) {
3211  switch (n2) {
3212  case 1:
3213  switch (q2) {
3214  case 2:
3215  return
3216  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,2>>
3217  (t, tc1, tc2);
3218  case 3:
3219  return
3220  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,3>>
3221  (t, tc1, tc2);
3222  case 4:
3223  return
3224  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<1,4>>
3225  (t, tc1, tc2);
3226  default :
3227  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<1>>
3228  (t, tc1, tc2, q2);
3229  }
3230  case 2:
3231  switch (q2) {
3232  case 2:
3233  return
3234  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,2>>
3235  (t, tc1, tc2);
3236  case 3:
3237  return
3238  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,3>>
3239  (t, tc1, tc2);
3240  case 4:
3241  return
3242  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<2,4>>
3243  (t, tc1, tc2);
3244  default :
3245  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<2>>
3246  (t, tc1, tc2, q2);
3247  }
3248  case 3:
3249  switch (q2) {
3250  case 2:
3251  return
3252  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,2>>
3253  (t, tc1, tc2);
3254  case 3:
3255  return
3256  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,3>>
3257  (t, tc1, tc2);
3258  case 4:
3259  return
3260  std::make_shared<ga_instruction_contraction_opt0_2_dunrolled<3,4>>
3261  (t, tc1, tc2);
3262  default :
3263  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<3>>
3264  (t, tc1, tc2, q2);
3265  }
3266  case 4:
3267  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<4>>
3268  (t, tc1, tc2, q2);
3269  case 5:
3270  return std::make_shared<ga_instruction_contraction_opt0_2_unrolled<5>>
3271  (t, tc1, tc2, q2);
3272  default:
3273  return std::make_shared<ga_instruction_contraction_opt0_2>
3274  (t,tc1,tc2,n2,q2);
3275  }
3276  }
3277  }
3278  if (tc1_.sparsity() == 2) {
3279  size_type q1 = tc1.sizes()[1];
3280  size_type n1 = (tc1.sizes().size() > 2) ? tc1.sizes()[1] : 1;
3281  if (n1*q1 == n) {
3282  switch (n1) {
3283  case 1:
3284  switch (q1) {
3285  case 2:
3286  return
3287  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,2>>
3288  (t, tc1, tc2);
3289  case 3:
3290  return
3291  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,3>>
3292  (t, tc1, tc2);
3293  case 4:
3294  return
3295  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<1,4>>
3296  (t, tc1, tc2);
3297  default :
3298  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<1>>
3299  (t, tc1, tc2, q1);
3300  }
3301  case 2:
3302  switch (q1) {
3303  case 2:
3304  return
3305  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,2>>
3306  (t, tc1, tc2);
3307  case 3:
3308  return
3309  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,3>>
3310  (t, tc1, tc2);
3311  case 4:
3312  return
3313  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<2,4>>
3314  (t, tc1, tc2);
3315  default :
3316  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<2>>
3317  (t, tc1, tc2, q1);
3318  }
3319  case 3:
3320  switch (q1) {
3321  case 2:
3322  return
3323  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,2>>
3324  (t, tc1, tc2);
3325  case 3:
3326  return
3327  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,3>>
3328  (t, tc1, tc2);
3329  case 4:
3330  return
3331  std::make_shared<ga_instruction_contraction_opt2_0_dunrolled<3,4>>
3332  (t, tc1, tc2);
3333  default :
3334  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3335  (t, tc1, tc2, q1);
3336  }
3337  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<3>>
3338  (t, tc1, tc2, q1);
3339  case 4:
3340  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<4>>
3341  (t, tc1, tc2, q1);
3342  case 5:
3343  return std::make_shared<ga_instruction_contraction_opt2_0_unrolled<5>>
3344  (t, tc1, tc2, q1);
3345  default:
3346  return std::make_shared<ga_instruction_contraction_opt2_0>
3347  (t,tc1,tc2, n1, q1);
3348  }
3349  }
3350  }
3351 
3352  // Only specialized for certain values
3353  size_type s2 = tc2.size()/n;
3354  switch(s2) {
3355  case 1 :
3356  switch(n) {
3357  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,1>>(t, tc1, tc2);
3358  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,1>>(t, tc1, tc2);
3359  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,1>>(t, tc1, tc2);
3360  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3361  }
3362  case 2 :
3363  switch(n) {
3364  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,2>>(t, tc1, tc2);
3365  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,2>>(t, tc1, tc2);
3366  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,2>>(t, tc1, tc2);
3367  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3368  }
3369  case 3 :
3370  switch(n) {
3371  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,3>>(t, tc1, tc2);
3372  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,3>>(t, tc1, tc2);
3373  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,3>>(t, tc1, tc2);
3374  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3375  }
3376  case 4 :
3377  switch(n) {
3378  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,4>>(t, tc1, tc2);
3379  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,4>>(t, tc1, tc2);
3380  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,4>>(t, tc1, tc2);
3381  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3382  }
3383  case 5 :
3384  switch(n) {
3385  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,5>>(t, tc1, tc2);
3386  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,5>>(t, tc1, tc2);
3387  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,5>>(t, tc1, tc2);
3388  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3389  }
3390  case 6 :
3391  switch(n) {
3392  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,6>>(t, tc1, tc2);
3393  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,6>>(t, tc1, tc2);
3394  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,6>>(t, tc1, tc2);
3395  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3396  }
3397  case 7 :
3398  switch(n) {
3399  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,7>>(t, tc1, tc2);
3400  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,7>>(t, tc1, tc2);
3401  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,7>>(t, tc1, tc2);
3402  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3403  }
3404  case 8 :
3405  switch(n) {
3406  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,8>>(t, tc1, tc2);
3407  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,8>>(t, tc1, tc2);
3408  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,8>>(t, tc1, tc2);
3409  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3410  }
3411  case 9 :
3412  switch(n) {
3413  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,9>>(t, tc1, tc2);
3414  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,9>>(t, tc1, tc2);
3415  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,9>>(t, tc1, tc2);
3416  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3417  }
3418  case 10:
3419  switch(n) {
3420  case 2: return std::make_shared<ga_ins_red_d_unrolled<2,10>>(t, tc1, tc2);
3421  case 3: return std::make_shared<ga_ins_red_d_unrolled<3,10>>(t, tc1, tc2);
3422  case 4: return std::make_shared<ga_ins_red_d_unrolled<4,10>>(t, tc1, tc2);
3423  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3424  }
3425  default: return ga_instruction_contraction_switch(t_,tc1_,tc2_,n,to_clear);
3426  }
3427  }
3428 
3429 
3430  // Performs Amij Bnj -> Cmni. To be optimized.
3431  struct ga_instruction_spec_contraction : public ga_instruction {
3432  base_tensor &t, &tc1, &tc2;
3433  size_type nn;
3434  virtual int exec() {
3435  GA_DEBUG_INFO("Instruction: specific contraction operation of "
3436  "size " << nn);
3437  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3438  size_type s2 = tc2.sizes()[0];
3439  base_tensor::iterator it = t.begin();
3440  for (size_type i = 0; i < s11; ++i)
3441  for (size_type n = 0; n < s2; ++n)
3442  for (size_type m = 0; m < s1; ++m, ++it) {
3443  *it = scalar_type(0);
3444  for (size_type j = 0; j < nn; ++j)
3445  *it += tc1[m+i*s1+j*s111] * tc2[n+j*s2];
3446  }
3447  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3448  return 0;
3449  }
3450  ga_instruction_spec_contraction(base_tensor &t_, base_tensor &tc1_,
3451  base_tensor &tc2_, size_type n_)
3452  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3453  };
3454 
3455  // Performs Amik Bnjk -> Cmnij. To be optimized.
3456  struct ga_instruction_spec2_contraction : public ga_instruction {
3457  base_tensor &t, &tc1, &tc2;
3458  size_type nn;
3459  virtual int exec() {
3460  GA_DEBUG_INFO("Instruction: second specific contraction operation of "
3461  "size " << nn);
3462  size_type s1 = tc1.sizes()[0], s11 = tc1.size() / (s1*nn), s111 = s1*s11;
3463  size_type s2 = tc2.sizes()[0], s22 = tc2.size() / (s2*nn), s222 = s2*s22;
3464  base_tensor::iterator it = t.begin();
3465  for (size_type j = 0; j < s22; ++j)
3466  for (size_type i = 0; i < s11; ++i)
3467  for (size_type m = 0; m < s1; ++m)
3468  for (size_type n = 0; n < s2; ++n, ++it) {
3469  *it = scalar_type(0);
3470  for (size_type k = 0; k < nn; ++k)
3471  *it += tc1[m+i*s1+k*s111] * tc2[n+j*s2+k*s222];
3472  }
3473  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3474  return 0;
3475  }
3476  ga_instruction_spec2_contraction(base_tensor &t_, base_tensor &tc1_,
3477  base_tensor &tc2_, size_type n_)
3478  : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
3479  };
3480 
3481  // Performs Aij Bkl -> Cijkl
3482  struct ga_instruction_simple_tmult : public ga_instruction {
3483  base_tensor &t, &tc1, &tc2;
3484  virtual int exec() {
3485  GA_DEBUG_INFO("Instruction: simple tensor product");
3486  size_type s1 = tc1.size();
3487  GA_DEBUG_ASSERT(t.size() == s1 * tc2.size(), "Wrong sizes");
3488  base_tensor::iterator it2=tc2.begin(), it1=tc1.begin(), it1end=it1 + s1;
3489  for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
3490  *it = *(it2) * (*it1);
3491  ++it1; if (it1 == it1end) { it1 = tc1.begin(), ++it2; }
3492  }
3493  return 0;
3494  }
3495  ga_instruction_simple_tmult(base_tensor &t_, base_tensor &tc1_,
3496  base_tensor &tc2_)
3497  : t(t_), tc1(tc1_), tc2(tc2_) {}
3498  };
3499 
3500  template<int S1> inline void tmult_elem_unrolled__
3501  (base_tensor::iterator &it, base_tensor::iterator &it1,
3502  base_tensor::iterator &it2) {
3503  *it++ = (*it1++)*(*it2);
3504  tmult_elem_unrolled__<S1-1>(it, it1, it2);
3505  }
3506  template<> inline void tmult_elem_unrolled__<0>
3507  (base_tensor::iterator &/*it*/, base_tensor::iterator &/*it1*/,
3508  base_tensor::iterator &/*it2*/) { }
3509 
3510  // Performs Aij Bkl -> Cijkl, partially unrolled version
3511  template<int S1> struct ga_instruction_simple_tmult_unrolled
3512  : public ga_instruction {
3513  base_tensor &t, &tc1, &tc2;
3514  virtual int exec() {
3515  size_type s2 = tc2.size();
3516  GA_DEBUG_ASSERT(tc1.size() == S1,
3517  "Wrong sizes " << tc1.size() << " != " << S1);
3518  GA_DEBUG_INFO("Instruction: simple tensor product, unrolled with "
3519  << S1 << " operations");
3520  GA_DEBUG_ASSERT(t.size() == S1 * s2,
3521  "Wrong sizes " << t.size() << " != " << S1 << "*" << s2);
3522  base_tensor::iterator it = t.begin(), it2 = tc2.begin();
3523  for (size_type ii = 0; ii < s2; ++ii, ++it2) {
3524  base_tensor::iterator it1 = tc1.begin();
3525  tmult_elem_unrolled__<S1>(it, it1, it2);
3526  }
3527  GA_DEBUG_ASSERT(it == t.end(), "Internal error");
3528  return 0;
3529  }
3530  ga_instruction_simple_tmult_unrolled(base_tensor &t_, base_tensor &tc1_,
3531  base_tensor &tc2_)
3532  : t(t_), tc1(tc1_), tc2(tc2_) {}
3533  };
3534 
3535  pga_instruction ga_uniform_instruction_simple_tmult
3536  (base_tensor &t, base_tensor &tc1, base_tensor &tc2) {
3537  switch(tc1.size()) {
3538  case 2 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 2>>
3539  (t, tc1, tc2);
3540  case 3 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 3>>
3541  (t, tc1, tc2);
3542  case 4 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 4>>
3543  (t, tc1, tc2);
3544  case 5 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 5>>
3545  (t, tc1, tc2);
3546  case 6 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 6>>
3547  (t, tc1, tc2);
3548  case 7 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 7>>
3549  (t, tc1, tc2);
3550  case 8 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 8>>
3551  (t, tc1, tc2);
3552  case 9 : return std::make_shared<ga_instruction_simple_tmult_unrolled< 9>>
3553  (t, tc1, tc2);
3554  case 10 : return std::make_shared<ga_instruction_simple_tmult_unrolled<10>>
3555  (t, tc1, tc2);
3556  case 11 : return std::make_shared<ga_instruction_simple_tmult_unrolled<11>>
3557  (t, tc1, tc2);
3558  case 12 : return std::make_shared<ga_instruction_simple_tmult_unrolled<12>>
3559  (t, tc1, tc2);
3560  case 13 : return std::make_shared<ga_instruction_simple_tmult_unrolled<13>>
3561  (t, tc1, tc2);
3562  case 14 : return std::make_shared<ga_instruction_simple_tmult_unrolled<14>>
3563  (t, tc1, tc2);
3564  case 15 : return std::make_shared<ga_instruction_simple_tmult_unrolled<15>>
3565  (t, tc1, tc2);
3566  case 16 : return std::make_shared<ga_instruction_simple_tmult_unrolled<16>>
3567  (t, tc1, tc2);
3568  default : return std::make_shared<ga_instruction_simple_tmult>
3569  (t, tc1, tc2);
3570  }
3571  }
3572 
3573 
3574  // Performs Ami Bnj -> Cmnij. To be optimized.
3575  struct ga_instruction_spec_tmult : public ga_instruction {
3576  base_tensor &t, &tc1, &tc2;
3577  size_type s1_2, s2_2;
3578  virtual int exec() {
3579  GA_DEBUG_INFO("Instruction: specific tensor product");
3580  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
3581  size_type s1_1 = tc1.size() / s1_2;
3582  size_type s2_1 = tc2.size() / s2_2;
3583 
3584  base_tensor::iterator it = t.begin();
3585  for (size_type j = 0; j < s2_2; ++j)
3586  for (size_type i = 0; i < s1_2; ++i)
3587  for (size_type n = 0; n < s2_1; ++n)
3588  for (size_type m = 0; m < s1_1; ++m, ++it)
3589  *it = tc1[m+i*s1_1] * tc2[n+j*s2_1];
3590  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3591  return 0;
3592  }
3593  ga_instruction_spec_tmult(base_tensor &t_, base_tensor &tc1_,
3594  base_tensor &tc2_, size_type s1_2_,
3595  size_type s2_2_)
3596  : t(t_), tc1(tc1_), tc2(tc2_), s1_2(s1_2_), s2_2(s2_2_) {}
3597  };
3598 
3599  // Performs Ai Bmj -> Cmij. To be optimized.
3600  struct ga_instruction_spec2_tmult : public ga_instruction {
3601  base_tensor &t, &tc1, &tc2;
3602  virtual int exec() {
3603  GA_DEBUG_INFO("Instruction: second specific tensor product");
3604  GA_DEBUG_ASSERT(t.size() == tc1.size() * tc2.size(), "Wrong sizes");
3605  size_type s1 = tc1.size();
3606  size_type s2_1 = tc2.sizes()[0], s2_2 = tc2.size() / s2_1;
3607 
3608  base_tensor::iterator it = t.begin();
3609  for (size_type j = 0; j < s2_2; ++j)
3610  for (size_type i = 0; i < s1; ++i)
3611  for (size_type m = 0; m < s2_1; ++m, ++it)
3612  *it = tc1[i] * tc2[m+j*s2_1];
3613  GA_DEBUG_ASSERT(it == t.end(), "Wrong sizes");
3614  return 0;
3615  }
3616  ga_instruction_spec2_tmult(base_tensor &t_, base_tensor &tc1_,
3617  base_tensor &tc2_)
3618  : t(t_), tc1(tc1_), tc2(tc2_) {}
3619  };
3620 
3621 
3622 
3623  struct ga_instruction_simple_c_matrix : public ga_instruction {
3624  base_tensor &t;
3625  std::vector<scalar_type *> components;
3626  virtual int exec() {
3627  GA_DEBUG_INFO("Instruction: gathering components for explicit "
3628  "matrix");
3629  GA_DEBUG_ASSERT(t.size() == components.size(), "Wrong sizes");
3630  for (size_type i = 0; i < components.size(); ++i)
3631  t[i] = *(components[i]);
3632  return 0;
3633  }
3634  ga_instruction_simple_c_matrix(base_tensor &t_,
3635  std::vector<scalar_type *> &components_)
3636  : t(t_), components(components_) {}
3637  };
3638 
3639  struct ga_instruction_c_matrix_with_tests : public ga_instruction {
3640  base_tensor &t;
3641  const std::vector<const base_tensor *> components;
3642  virtual int exec() {
3643  GA_DEBUG_INFO("Instruction: gathering components for explicit "
3644  "matrix with tests functions");
3645  size_type s = t.size() / components.size();
3646  GA_DEBUG_ASSERT(s, "Wrong sizes");
3647  base_tensor::iterator it = t.begin();
3648  for (size_type i = 0; i < components.size(); ++i) {
3649  const base_tensor &t1 = *(components[i]);
3650  if (t1.size() > 1) {
3651  GA_DEBUG_ASSERT(t1.size() == s, "Wrong sizes, " << t1.size()
3652  << " != " << s);
3653  for (size_type j = 0; j < s; ++j) *it++ = t1[j];
3654  } else {
3655  for (size_type j = 0; j < s; ++j) *it++ = t1[0];
3656  }
3657  }
3658  return 0;
3659  }
3660  ga_instruction_c_matrix_with_tests
3661  (base_tensor &t_, const std::vector<const base_tensor *> &components_)
3662  : t(t_), components(components_) {}
3663  };
3664 
3665  struct ga_instruction_eval_func_1arg_1res : public ga_instruction {
3666  scalar_type &t;
3667  const scalar_type &c;
3668  pscalar_func_onearg f1;
3669  virtual int exec() {
3670  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3671  "predefined function on a scalar");
3672  t = (*f1)(c);
3673  return 0;
3674  }
3675  ga_instruction_eval_func_1arg_1res(scalar_type &t_, const scalar_type &c_,
3676  pscalar_func_onearg f1_)
3677  : t(t_), c(c_), f1(f1_) {}
3678  };
3679 
3680  struct ga_instruction_eval_func_1arg_1res_expr : public ga_instruction {
3681  scalar_type &t;
3682  const scalar_type &c;
3683  const ga_predef_function &F;
3684  virtual int exec() {
3685  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3686  "predefined function on a scalar");
3687  t = F(c);
3688  return 0;
3689  }
3690  ga_instruction_eval_func_1arg_1res_expr(scalar_type &t_,
3691  const scalar_type &c_,
3692  const ga_predef_function &F_)
3693  : t(t_), c(c_), F(F_) {}
3694  };
3695 
3696  struct ga_instruction_eval_func_1arg : public ga_instruction {
3697  base_tensor &t, &tc1;
3698  pscalar_func_onearg f1;
3699  virtual int exec() {
3700  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3701  "predefined function on tensor");
3702  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3703  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f1)(tc1[i]);
3704  return 0;
3705  }
3706  ga_instruction_eval_func_1arg(base_tensor &t_, base_tensor &c_,
3707  pscalar_func_onearg f1_)
3708  : t(t_), tc1(c_), f1(f1_) {}
3709  };
3710 
3711  struct ga_instruction_eval_func_1arg_expr : public ga_instruction {
3712  base_tensor &t, &tc1;
3713  const ga_predef_function &F;
3714  virtual int exec() {
3715  GA_DEBUG_INFO("Instruction: evaluation of a one argument "
3716  "predefined function on tensor");
3717  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3718  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i]);
3719  return 0;
3720  }
3721  ga_instruction_eval_func_1arg_expr(base_tensor &t_, base_tensor &c_,
3722  const ga_predef_function &F_)
3723  : t(t_), tc1(c_), F(F_) {}
3724  };
3725 
3726  struct ga_instruction_eval_func_2arg_1res : public ga_instruction {
3727  scalar_type &t;
3728  const scalar_type &c, &d;
3729  pscalar_func_twoargs f2;
3730  virtual int exec() {
3731  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3732  "predefined function on two scalar");
3733  t = (*f2)(c, d);
3734  return 0;
3735  }
3736  ga_instruction_eval_func_2arg_1res(scalar_type &t_, const scalar_type &c_,
3737  const scalar_type &d_,
3738  pscalar_func_twoargs f2_)
3739  : t(t_), c(c_), d(d_), f2(f2_) {}
3740  };
3741 
3742  struct ga_instruction_eval_func_2arg_1res_expr : public ga_instruction {
3743  scalar_type &t;
3744  const scalar_type &c, &d;
3745  const ga_predef_function &F;
3746  virtual int exec() {
3747  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3748  "predefined function on two scalar");
3749  t = F(c, d);
3750  return 0;
3751  }
3752  ga_instruction_eval_func_2arg_1res_expr(scalar_type &t_,
3753  const scalar_type &c_,
3754  const scalar_type &d_,
3755  const ga_predef_function &F_)
3756  : t(t_), c(c_), d(d_), F(F_) {}
3757  };
3758 
3759  struct ga_instruction_eval_func_2arg_first_scalar : public ga_instruction {
3760  base_tensor &t, &tc1, &tc2;
3761  pscalar_func_twoargs f2;
3762  virtual int exec() {
3763  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3764  "predefined function on one scalar and one tensor");
3765  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
3766  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[0], tc2[i]);
3767  return 0;
3768  }
3769  ga_instruction_eval_func_2arg_first_scalar
3770  (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3771  pscalar_func_twoargs f2_)
3772  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3773  };
3774 
3775  struct ga_instruction_eval_func_2arg_first_scalar_expr
3776  : public ga_instruction {
3777  base_tensor &t, &tc1, &tc2;
3778  const ga_predef_function &F;
3779  virtual int exec() {
3780  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3781  "predefined function on one scalar and one tensor");
3782  GA_DEBUG_ASSERT(t.size() == tc2.size(), "Wrong sizes");
3783  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[0], tc2[i]);
3784  return 0;
3785  }
3786  ga_instruction_eval_func_2arg_first_scalar_expr
3787  (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3788  const ga_predef_function &F_)
3789  : t(t_), tc1(c_), tc2(d_), F(F_) {}
3790  };
3791 
3792  struct ga_instruction_eval_func_2arg_second_scalar : public ga_instruction {
3793  base_tensor &t, &tc1, &tc2;
3794  pscalar_func_twoargs f2;
3795  virtual int exec() {
3796  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3797  "predefined function on one tensor and one scalar");
3798  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3799  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[0]);
3800  return 0;
3801  }
3802  ga_instruction_eval_func_2arg_second_scalar(base_tensor &t_,
3803  base_tensor &c_,
3804  base_tensor &d_,
3805  pscalar_func_twoargs f2_)
3806  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3807  };
3808 
3809  struct ga_instruction_eval_func_2arg_second_scalar_expr
3810  : public ga_instruction {
3811  base_tensor &t, &tc1, &tc2;
3812  const ga_predef_function &F;
3813  virtual int exec() {
3814  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3815  "predefined function on one tensor and one scalar");
3816  GA_DEBUG_ASSERT(t.size() == tc1.size(), "Wrong sizes");
3817  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[0]);
3818  return 0;
3819  }
3820  ga_instruction_eval_func_2arg_second_scalar_expr
3821  (base_tensor &t_, base_tensor &c_, base_tensor &d_,
3822  const ga_predef_function &F_)
3823  : t(t_), tc1(c_), tc2(d_), F(F_) {}
3824  };
3825 
3826  struct ga_instruction_eval_func_2arg : public ga_instruction {
3827  base_tensor &t, &tc1, &tc2;
3828  pscalar_func_twoargs f2;
3829  virtual int exec() {
3830  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3831  "predefined function on two tensors");
3832  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3833  "Wrong sizes");
3834 
3835  for (size_type i = 0; i < t.size(); ++i) t[i] = (*f2)(tc1[i], tc2[i]);
3836  return 0;
3837  }
3838  ga_instruction_eval_func_2arg(base_tensor &t_, base_tensor &c_,
3839  base_tensor &d_, pscalar_func_twoargs f2_)
3840  : t(t_), tc1(c_), tc2(d_), f2(f2_) {}
3841  };
3842 
3843  struct ga_instruction_eval_func_2arg_expr : public ga_instruction {
3844  base_tensor &t, &tc1, &tc2;
3845  const ga_predef_function &F;
3846  virtual int exec() {
3847  GA_DEBUG_INFO("Instruction: evaluation of a two arguments "
3848  "predefined function on two tensors");
3849  GA_DEBUG_ASSERT(t.size() == tc1.size() && t.size() == tc2.size(),
3850  "Wrong sizes");
3851 
3852  for (size_type i = 0; i < t.size(); ++i) t[i] = F(tc1[i], tc2[i]);
3853  return 0;
3854  }
3855  ga_instruction_eval_func_2arg_expr(base_tensor &t_, base_tensor &c_,
3856  base_tensor &d_,
3857  const ga_predef_function &F_)
3858  : t(t_), tc1(c_), tc2(d_), F(F_) {}
3859  };
3860 
3861  struct ga_instruction_eval_OP : public ga_instruction {
3862  base_tensor &t;
3863  const ga_nonlinear_operator &OP;
3864  ga_nonlinear_operator::arg_list args;
3865  virtual int exec() {
3866  GA_DEBUG_INFO("Instruction: operator evaluation");
3867  OP.value(args, t);
3868  return 0;
3869  }
3870  ga_instruction_eval_OP(base_tensor &t_, const ga_nonlinear_operator &OP_,
3871  ga_nonlinear_operator::arg_list &args_)
3872  : t(t_), OP(OP_), args(args_) {}
3873  };
3874 
3875  struct ga_instruction_eval_derivative_OP : public ga_instruction {
3876  base_tensor &t;
3877  const ga_nonlinear_operator &OP;
3878  ga_nonlinear_operator::arg_list args;
3879  size_type der1;
3880  virtual int exec() {
3881  GA_DEBUG_INFO("Instruction: operator derivative evaluation");
3882  OP.derivative(args, der1, t);
3883  return 0;
3884  }
3885  ga_instruction_eval_derivative_OP(base_tensor &t_,
3886  const ga_nonlinear_operator &OP_,
3887  ga_nonlinear_operator::arg_list &args_,
3888  size_type der1_)
3889  : t(t_), OP(OP_), args(args_), der1(der1_) {}
3890  };
3891 
3892  struct ga_instruction_eval_second_derivative_OP : public ga_instruction {
3893  base_tensor &t;
3894  const ga_nonlinear_operator &OP;
3895  ga_nonlinear_operator::arg_list args;
3896  size_type der1, der2;
3897  virtual int exec() {
3898  GA_DEBUG_INFO("Instruction: operator second derivative evaluation");
3899  OP.second_derivative(args, der1, der2, t);
3900  return 0;
3901  }
3902  ga_instruction_eval_second_derivative_OP
3903  (base_tensor &t_, const ga_nonlinear_operator &OP_,
3904  ga_nonlinear_operator::arg_list &args_, size_type der1_, size_type der2_)
3905  : t(t_), OP(OP_), args(args_), der1(der1_), der2(der2_) {}
3906  };
3907 
3908  struct ga_instruction_tensor_slice : public ga_instruction {
3909  base_tensor &t, &tc1;
3910  bgeot::multi_index mi, indices;
3911  virtual int exec() {
3912  GA_DEBUG_INFO("Instruction: tensor slice");
3913  size_type order = t.sizes().size();
3914  for (bgeot::multi_index mi3(order); !mi3.finished(t.sizes());
3915  mi3.incrementation(t.sizes())) {
3916  for (size_type j = 0; j < order; ++j)
3917  mi[indices[j]] = mi3[j];
3918  t(mi3) = tc1(mi);
3919  }
3920  return 0;
3921  }
3922  ga_instruction_tensor_slice(base_tensor &t_, base_tensor &tc1_,
3923  bgeot::multi_index &mi_,
3924  bgeot::multi_index &indices_)
3925  : t(t_), tc1(tc1_), mi(mi_), indices(indices_) {}
3926  };
3927 
3928  struct ga_instruction_transformation_call : public ga_instruction {
3929  const ga_workspace &workspace;
3930  ga_instruction_set::interpolate_info &inin;
3931  pinterpolate_transformation trans;
3932  fem_interpolation_context &ctx;
3933  const base_small_vector &Normal;
3934  const mesh &m;
3935  bool compute_der;
3936 
3937  virtual int exec() {
3938  GA_DEBUG_INFO("Instruction: call interpolate transformation");
3939  base_node P_ref;
3940  size_type cv;
3941  short_type face_num;
3942  inin.pt_type = trans->transform(workspace, m, ctx, Normal, &(inin.m), cv,
3943  face_num, P_ref, inin.Normal,
3944  inin.derivatives, compute_der);
3945  if (inin.pt_type) {
3946  if (cv != size_type(-1)) {
3947  inin.m->points_of_convex(cv, inin.G);
3948  inin.ctx.change((inin.m)->trans_of_convex(cv),
3949  0, P_ref, inin.G, cv, face_num);
3950  inin.has_ctx = true;
3951  if (face_num != short_type(-1)) {
3952  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
3953  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
3954  } else
3955  inin.Normal.resize(0);
3956  inin.pt_y = inin.ctx.xreal();
3957  } else {
3958  inin.ctx.invalid_convex_num();
3959  inin.Normal.resize(0);
3960  inin.pt_y = P_ref;
3961  inin.has_ctx = false;
3962  }
3963  } else {
3964  inin.ctx.invalid_convex_num();
3965  inin.Normal.resize(0);
3966  inin.pt_y.resize(0);
3967  inin.has_ctx = false;
3968  }
3969  GA_DEBUG_INFO("Instruction: end of call interpolate transformation");
3970  return 0;
3971  }
3972  ga_instruction_transformation_call
3973  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
3974  pinterpolate_transformation t, fem_interpolation_context &ctxx,
3975  const base_small_vector &No, const mesh &mm, bool compute_der_)
3976  : workspace(w), inin(i), trans(t), ctx(ctxx), Normal(No), m(mm),
3977  compute_der(compute_der_) {}
3978  };
3979 
3980  struct ga_instruction_neighbor_transformation_call : public ga_instruction {
3981  const ga_workspace &workspace;
3982  ga_instruction_set::interpolate_info &inin;
3983  pinterpolate_transformation trans;
3984  fem_interpolation_context &ctx;
3985  base_small_vector dummy_normal;
3986  const mesh &m;
3987  size_type &ipt;
3988  papprox_integration &pai;
3990  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp;
3991 
3992  virtual int exec() {
3993  bool cancel_optimization = false;
3994  GA_DEBUG_INFO("Instruction: call interpolate neighbor transformation");
3995  if (ipt == 0) {
3996  if (!(ctx.have_pgp()) || !pai || pai->is_built_on_the_fly()
3997  || cancel_optimization) {
3998  inin.ctx.invalid_convex_num();
3999  } else {
4000  // Test if the situation has already been encountered
4001  size_type cv = ctx.convex_num();
4002  short_type f = ctx.face_num();
4003  auto adj_face = m.adjacent_face(cv, f);
4004  if (adj_face.cv == size_type(-1)) {
4005  GMM_WARNING2("Adjacent face not found, "
4006  "probably an non-interior face");
4007  inin.ctx.invalid_convex_num();
4008  } else {
4009  gauss_pt_corresp gpc;
4010  gpc.pgt1 = m.trans_of_convex(cv);
4011  gpc.pgt2 = m.trans_of_convex(adj_face.cv);
4012  gpc.pai = pai;
4013  auto inds_pt1 = m.ind_points_of_face_of_convex(cv, f);
4014  auto inds_pt2 = m.ind_points_of_face_of_convex(adj_face.cv,
4015  adj_face.f);
4016  auto str1 = gpc.pgt1->structure();
4017  auto str2 = gpc.pgt2->structure();
4018  size_type nbptf1 = str1->nb_points_of_face(f);
4019  size_type nbptf2 = str2->nb_points_of_face(adj_face.f);
4020  gpc.nodes.resize(nbptf1*2);
4021  for (size_type i = 0; i < nbptf1; ++i) {
4022  gpc.nodes[2*i] = str1->ind_points_of_face(f)[i];
4023  bool found = false;
4024  for (size_type j = 0; j < nbptf2; ++j) {
4025  if (inds_pt2[j] == inds_pt1[i]) {
4026  gpc.nodes[2*i+1] = str2->ind_points_of_face(adj_face.f)[j];
4027  found = true;
4028  break;
4029  }
4030  }
4031  GMM_ASSERT1(found, "Internal error");
4032  }
4033  bgeot::pstored_point_tab pspt = 0;
4034  auto itm = neighbor_corresp.find(gpc);
4035  if (itm != neighbor_corresp.end()) {
4036  pspt = itm->second;
4037  } else {
4038  size_type nbpt = pai->nb_points_on_face(f);
4040  gic.init(m.points_of_convex(adj_face.cv), gpc.pgt2);
4041  size_type first_ind = pai->ind_first_point_on_face(f);
4043  &spt = *(pai->pintegration_points());
4044  base_matrix G;
4045  m.points_of_convex(cv, G);
4046  fem_interpolation_context ctx_x(gpc.pgt1, 0, spt[0], G, cv, f);
4047  std::vector<base_node> P_ref(nbpt);
4048 
4049  for (size_type i = 0; i < nbpt; ++i) {
4050  ctx_x.set_xref(spt[first_ind+i]);
4051  bool converged = true;
4052  gic.invert(ctx_x.xreal(), P_ref[i], converged);
4053  bool is_in = (gpc.pgt2->convex_ref()->is_in(P_ref[i]) < 1E-4);
4054  GMM_ASSERT1(is_in && converged,"Geometric transformation "
4055  "inversion has failed in neighbor transformation");
4056  }
4057  pspt = store_point_tab(P_ref);
4058  neighbor_corresp[gpc] = pspt;
4059  }
4060  m.points_of_convex(adj_face.cv, inin.G);
4061  bgeot::pgeotrans_precomp pgp = gp_pool(gpc.pgt2, pspt);
4062  inin.ctx.change(pgp, 0, 0, inin.G, adj_face.cv, adj_face.f);
4063  }
4064  }
4065  }
4066 
4067  if (inin.ctx.have_pgp() && inin.ctx.is_convex_num_valid()) {
4068  inin.ctx.set_ii(ipt);
4069  inin.pt_type = 1;
4070  inin.has_ctx = true;
4071  inin.pt_y = inin.ctx.xreal();
4072  inin.Normal = bgeot::compute_normal(inin.ctx, inin.ctx.face_num());
4073  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4074  inin.m = &m;
4075  } else {
4076  base_node P_ref;
4077  size_type cv;
4078  short_type face_num;
4079  gmm::clear(inin.Normal);
4080  inin.pt_type = trans->transform(workspace, m, ctx, dummy_normal,
4081  &(inin.m), cv, face_num, P_ref,
4082  dummy_normal, inin.derivatives,
4083  false);
4084  if (inin.pt_type) {
4085  if (cv != size_type(-1)) {
4086  inin.m->points_of_convex(cv, inin.G);
4087  inin.ctx.change((inin.m)->trans_of_convex(cv),
4088  0, P_ref, inin.G, cv, face_num);
4089  inin.has_ctx = true;
4090  if (face_num != short_type(-1)) {
4091  inin.Normal = bgeot::compute_normal(inin.ctx, face_num);
4092  gmm::scale(inin.Normal, 1.0/gmm::vect_norm2(inin.Normal));
4093  } else
4094  inin.Normal.resize(0);
4095  inin.pt_y = inin.ctx.xreal();
4096  } else {
4097  inin.ctx.invalid_convex_num();
4098  inin.pt_y = P_ref;
4099  inin.has_ctx = false;
4100  }
4101  } else {
4102  inin.ctx.invalid_convex_num();
4103  inin.Normal.resize(0);
4104  inin.pt_y.resize(0);
4105  inin.has_ctx = false;
4106  }
4107  }
4108  GA_DEBUG_INFO("Instruction: end of call neighbor interpolate "
4109  "transformation");
4110  return 0;
4111  }
4112  ga_instruction_neighbor_transformation_call
4113  (const ga_workspace &w, ga_instruction_set::interpolate_info &i,
4114  pinterpolate_transformation t, fem_interpolation_context &ctxx,
4115  const mesh &mm, size_type &ipt_, papprox_integration &pai_,
4116  bgeot::geotrans_precomp_pool &gp_pool_,
4117  std::map<gauss_pt_corresp, bgeot::pstored_point_tab> &neighbor_corresp_)
4118  : workspace(w), inin(i), trans(t), ctx(ctxx), m(mm),
4119  ipt(ipt_), pai(pai_), gp_pool(gp_pool_),
4120  neighbor_corresp(neighbor_corresp_) {}
4121  };
4122 
4123 
4124  struct ga_instruction_scalar_assembly : public ga_instruction {
4125  const base_tensor &t;
4126  scalar_type &E, &coeff;
4127  virtual int exec() {
4128  GA_DEBUG_INFO("Instruction: scalar term assembly");
4129  E += t[0] * coeff;
4130  return 0;
4131  }
4132  ga_instruction_scalar_assembly(base_tensor &t_, scalar_type &E_,
4133  scalar_type &coeff_)
4134  : t(t_), E(E_), coeff(coeff_) {}
4135  };
4136 
4137  struct ga_instruction_vector_assembly_mf : public ga_instruction
4138  {
4139  const base_tensor &t;
4140  base_vector &VI, &Vi;
4141  const fem_interpolation_context &ctx;
4142  const gmm::sub_interval *const&I, *const I__;
4143  const mesh_fem *const&mf, *const mf__;
4144  const bool &reduced_mf;
4145  const scalar_type &coeff;
4146  const size_type &nbpt, &ipt;
4147  base_vector elem;
4148  const bool interpolate;
4149  virtual int exec() {
4150  GA_DEBUG_INFO("Instruction: vector term assembly for fem variable");
4151  bool empty_weight = (coeff == scalar_type(0));
4152  if (ipt == 0 || interpolate) {
4153  if (empty_weight) elem.resize(0);
4154  elem.resize(t.size());
4155  if (!empty_weight)
4156  copy_scaled_4(t, coeff, elem);
4157  } else if (!empty_weight)
4158  // gmm::add(gmm::scaled(t.as_vector(), coeff), elem);
4159  add_scaled_4(t, coeff, elem);
4160 
4161  if (ipt == nbpt-1 || interpolate) { // finalize
4162  GA_DEBUG_ASSERT(mf, "Internal error");
4163  if (!ctx.is_convex_num_valid()) return 0;
4164  size_type cv_1 = ctx.convex_num();
4165  size_type qmult = mf->get_qdim();
4166  if (qmult > 1) qmult /= mf->fem_of_element(cv_1)->target_dim();
4167  base_vector &V = reduced_mf ? Vi : VI;
4168  GA_DEBUG_ASSERT(V.size() >= I->first() + mf->nb_basic_dof(),
4169  "Bad assembly vector size " << V.size() << ">=" <<
4170  I->first() << "+"<< mf->nb_basic_dof());
4171  auto itr = elem.cbegin();
4172  auto itw = V.begin() + I->first();
4173  for (const auto &dof : mf->ind_scalar_basic_dof_of_element(cv_1))
4174  for (size_type q = 0; q < qmult; ++q)
4175  *(itw+dof+q) += *itr++;
4176  GMM_ASSERT1(itr == elem.end(), "Internal error");
4177  }
4178  return 0;
4179  }
4180 
4181  ga_instruction_vector_assembly_mf
4182  (const base_tensor &t_, base_vector &VI_, base_vector &Vi_,
4183  const fem_interpolation_context &ctx_,
4184  const gmm::sub_interval *&I_, const mesh_fem *&mf_,
4185  const bool &reduced_mf_,
4186  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4187  bool interpolate_)
4188  : t(t_), VI(VI_), Vi(Vi_), ctx(ctx_),
4189  I(I_), I__(nullptr), mf(mf_), mf__(nullptr), reduced_mf(reduced_mf_),
4190  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4191 
4192  ga_instruction_vector_assembly_mf
4193  (const base_tensor &t_, base_vector &V_,
4194  const fem_interpolation_context &ctx_,
4195  const gmm::sub_interval &I_, const mesh_fem &mf_,
4196  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4197  bool interpolate_)
4198  : t(t_), VI(V_), Vi(V_), ctx(ctx_),
4199  I(I__), I__(&I_), mf(mf__), mf__(&mf_), reduced_mf(false_),
4200  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_) {}
4201  protected:
4202  const bool false_=false;
4203  };
4204 
4205  struct ga_instruction_vector_assembly_imd : public ga_instruction {
4206  const base_tensor &t;
4207  base_vector &V;
4208  const fem_interpolation_context &ctx;
4209  const gmm::sub_interval &I;
4210  const im_data &imd;
4211  scalar_type &coeff;
4212  const size_type &ipt;
4213  const bool initialize;
4214  virtual int exec() {
4215  GA_DEBUG_INFO("Instruction: vector term assembly for im_data variable");
4216  size_type cv = ctx.convex_num();
4217  size_type i = t.size() * imd.filtered_index_of_point(cv, ipt);
4218  GMM_ASSERT1(i+t.size() <= I.size(),
4219  "Internal error "<<i<<"+"<<t.size()<<" <= "<<I.size());
4220  auto itw = V.begin() + I.first() + i;
4221  if (initialize)
4222  for (const auto &val : t.as_vector())
4223  *itw++ = coeff*val;
4224  else
4225  for (const auto &val : t.as_vector())
4226  *itw++ += coeff*val;
4227  return 0;
4228  }
4229  ga_instruction_vector_assembly_imd
4230  (const base_tensor &t_, base_vector &V_,
4231  const fem_interpolation_context &ctx_, const gmm::sub_interval &I_,
4232  const im_data &imd_, scalar_type &coeff_, const size_type &ipt_,
4233  bool initialize_=false)
4234  : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), coeff(coeff_), ipt(ipt_),
4235  initialize(initialize_)
4236  {}
4237  };
4238 
4239  struct ga_instruction_vector_assembly : public ga_instruction {
4240  const base_tensor &t;
4241  base_vector &V;
4242  const gmm::sub_interval &I;
4243  scalar_type &coeff;
4244  virtual int exec() {
4245  GA_DEBUG_INFO("Instruction: vector term assembly for "
4246  "fixed size variable");
4247  gmm::add(gmm::scaled(t.as_vector(), coeff), gmm::sub_vector(V, I));
4248  return 0;
4249  }
4250  ga_instruction_vector_assembly(const base_tensor &t_, base_vector &V_,
4251  const gmm::sub_interval &I_,
4252  scalar_type &coeff_)
4253  : t(t_), V(V_), I(I_), coeff(coeff_) {}
4254  };
4255 
4256  struct ga_instruction_assignment : public ga_instruction {
4257  const base_tensor &t;
4258  base_vector &V;
4259  const fem_interpolation_context &ctx;
4260  const im_data *imd;
4261  virtual int exec() {
4262  GA_DEBUG_INFO("Instruction: Assignement to im_data");
4263  imd->set_tensor(V, ctx.convex_num(), ctx.ii(), t);
4264  return 0;
4265  }
4266  ga_instruction_assignment(const base_tensor &t_, base_vector &V_,
4267  const fem_interpolation_context &ctx_,
4268  const im_data *imd_)
4269  : t(t_), V(V_), ctx(ctx_), imd(imd_) {}
4270  };
4271 
4272  struct ga_instruction_extract_residual_on_imd_dofs : public ga_instruction {
4273  base_tensor &t;
4274  const base_vector &V;
4275  const fem_interpolation_context &ctx;
4276  const gmm::sub_interval &I;
4277  const im_data &imd;
4278  const size_type &ipt;
4279  virtual int exec() {
4280  GA_DEBUG_INFO("Instruction: extract residual for im_data variable");
4281  size_type ifirst = I.first();
4282  size_type cv = ctx.convex_num();
4283  size_type i = t.size() * imd.filtered_index_of_point(cv, ipt);
4284  GMM_ASSERT1(i+t.size() <= I.size(),
4285  "Internal error "<<i<<"+"<<t.size()<<" <= "<<I.size());
4286  for (auto &&val : t.as_vector())
4287  val = V[ifirst+(i++)];
4288  return 0;
4289  }
4290  ga_instruction_extract_residual_on_imd_dofs
4291  (base_tensor &t_, const base_vector &V_,
4292  const fem_interpolation_context &ctx_, const gmm::sub_interval &I_,
4293  const im_data &imd_, const size_type &ipt_)
4294  : t(t_), V(V_), ctx(ctx_), I(I_), imd(imd_), ipt(ipt_)
4295  {}
4296  };
4297 
4298 
4299  template <class MAT>
4300  inline void add_elem_matrix
4301  (MAT &K, const std::vector<size_type> &dofs1,
4302  const std::vector<size_type> &dofs2, std::vector<size_type> &/*dofs1_sort*/,
4303  const base_vector &elem, scalar_type threshold, size_type /* N */) {
4304 
4305  base_vector::const_iterator it = elem.cbegin();
4306  for (const size_type &dof2 : dofs2)
4307  for (const size_type &dof1 : dofs1) {
4308  if (gmm::abs(*it) > threshold)
4309  K(dof1, dof2) += *it;
4310  ++it;
4311  }
4312  }
4313 
4314  // static const std::vector<size_type> *the_indto_sort;
4315  // int compare_my_indices(const void *a, const void *b) {
4316  // size_type aa = *((const size_type *)(a));
4317  // size_type bb = *((const size_type *)(b));
4318  // return int((*the_indto_sort)[aa]) - int((*the_indto_sort)[bb]);
4319  // }
4320 
4321  inline void add_elem_matrix
4322  (gmm::col_matrix<gmm::rsvector<scalar_type>> &K,
4323  const std::vector<size_type> &dofs1, const std::vector<size_type> &dofs2,
4324  std::vector<size_type> &dofs1_sort,
4325  const base_vector &elem, scalar_type threshold, size_type N) {
4326 
4327  size_type s1 = dofs1.size();
4328 
4329  dofs1_sort.resize(s1);
4330  for (size_type i = 0; i < s1; ++i) { // insertion sort
4331  size_type j = i, k = j-1;
4332  while (j > 0 && dofs1[i] < dofs1[dofs1_sort[k]])
4333  { dofs1_sort[j] = dofs1_sort[k]; j--; k--; }
4334  dofs1_sort[j] = i;
4335  }
4336 
4337  // dofs1_sort.resize(s1); // test with qsort: not faster in the tested cases
4338  // for (size_type i = 0; i < s1; ++i) dofs1_sort[i] = i;
4339  // the_indto_sort = &dofs1;
4340  // qsort(&(dofs1_sort[0]), s1, sizeof(size_type), compare_my_indices);
4341 
4342  gmm::elt_rsvector_<scalar_type> ev;
4343 
4344  size_type maxest = (N+1) * s1;
4345  base_vector::const_iterator it = elem.cbegin();
4346  bool first(true);
4347  for (const size_type &dof2 : dofs2) { // Iteration on columns
4348  if (first) first = false;
4349  else it += s1;
4350  std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4351  size_type nb = col.size();
4352 
4353  if (nb == 0) {
4354  col.reserve(maxest);
4355  for (size_type k : dofs1_sort) {
4356  ev.e = *(it+k);
4357  if (gmm::abs(ev.e) > threshold) {
4358  ev.c=dofs1[k];
4359  col.push_back(ev);
4360  }
4361  }
4362  } else { // column merge
4363  size_type ind = 0;
4364  for (size_type k : dofs1_sort) {
4365  ev.e = *(it+k);
4366  if (gmm::abs(ev.e) > threshold) {
4367  ev.c = dofs1[k];
4368 
4369  size_type count = nb - ind, step, l;
4370  while (count > 0) {
4371  step = count / 2;
4372  l = ind + step;
4373  if (col[l].c < ev.c) {
4374  ind = ++l;
4375  count -= step + 1;
4376  }
4377  else
4378  count = step;
4379  }
4380 
4381  auto itc = col.begin() + ind;
4382  if (ind != nb && itc->c == ev.c)
4383  itc->e += ev.e;
4384  else {
4385  if (nb - ind > 1300)
4386  GMM_WARNING2("Inefficient addition of element in rsvector with "
4387  << col.size() - ind << " non-zero entries");
4388  col.push_back(ev);
4389  if (ind != nb) {
4390  itc = col.begin() + ind;
4391  auto ite = col.end();
4392  --ite;
4393  auto itee = ite;
4394  for (; ite != itc; --ite) { --itee; *ite = *itee; }
4395  *itc = ev;
4396  }
4397  ++nb;
4398  }
4399  ++ind;
4400  }
4401  }
4402  }
4403  }
4404  }
4405 
4406 
4407  inline void add_elem_matrix_contiguous_rows
4408  (gmm::col_matrix<gmm::rsvector<scalar_type>> &K,
4409  const size_type &i1, const size_type &s1,
4410  const std::vector<size_type> &dofs2,
4411  const base_vector &elem, scalar_type threshold) {
4412 
4413  gmm::elt_rsvector_<scalar_type> ev;
4414 
4415  base_vector::const_iterator it = elem.cbegin();
4416  bool first(true);
4417  for (const size_type &dof2 : dofs2) { // Iteration on columns
4418  if (first) first = false;
4419  else it += s1;
4420  std::vector<gmm::elt_rsvector_<scalar_type>> &col = K[dof2];
4421  size_type nb = col.size();
4422 
4423  if (nb == 0) {
4424  col.reserve(s1);
4425  for (size_type i = 0; i < s1; ++i) {
4426  ev.e = *(it+i);
4427  if (gmm::abs(ev.e) > threshold) {
4428  ev.c = i1 + i;
4429  col.push_back(ev);
4430  }
4431  }
4432  } else { // column merge (can be optimized for a contiguous range)
4433  size_type ind = 0;
4434  for (size_type i = 0; i < s1; ++i) {
4435  ev.e = *(it+i);
4436  if (gmm::abs(ev.e) > threshold) {
4437  ev.c = i1 + i;
4438 
4439  size_type count = nb - ind, step, l;
4440  while (count > 0) {
4441  step = count / 2;
4442  l = ind + step;
4443  if (col[l].c < ev.c) {
4444  ind = ++l;
4445  count -= step + 1;
4446  }
4447  else
4448  count = step;
4449  }
4450 
4451  auto itc = col.begin() + ind;
4452  if (ind != nb && itc->c == ev.c)
4453  itc->e += ev.e;
4454  else {
4455  if (nb - ind > 1300)
4456  GMM_WARNING2("Inefficient addition of element in rsvector with "
4457  << col.size() - ind << " non-zero entries");
4458  col.push_back(ev);
4459  if (ind != nb) {
4460  itc = col.begin() + ind;
4461  auto ite = col.end();
4462  --ite;
4463  auto itee = ite;
4464  for (; ite != itc; --ite) { --itee; *ite = *itee; }
4465  *itc = ev;
4466  }
4467  ++nb;
4468  }
4469  ++ind;
4470  }
4471  }
4472  }
4473  }
4474  }
4475 
4476  inline void populate_dofs_vector
4477  (std::vector<size_type> &dofs,
4478  const size_type &size, const size_type &ifirst, const size_type &qmult,
4479  const getfem::mesh::ind_set &mfdofs)
4480  {
4481  dofs.assign(size, ifirst);
4482  auto itd = dofs.begin();
4483  if (qmult == 1)
4484  for (const auto &dof : mfdofs) *itd++ += dof;
4485  else
4486  for (const auto &dof : mfdofs)
4487  for (size_type q = 0; q < qmult; ++q) *itd++ += dof + q;
4488  }
4489 
4490  inline void populate_dofs_vector // special case for qmult == 1
4491  (std::vector<size_type> &dofs, const size_type &size, const size_type &ifirst,
4492  const getfem::mesh::ind_set &mfdofs)
4493  {
4494  dofs.assign(size, ifirst);
4495  auto itd = dofs.begin();
4496  for (const auto &dof : mfdofs) *itd++ += dof;
4497  }
4498 
4499 
4500  inline void populate_contiguous_dofs_vector
4501  (std::vector<size_type> &dofs, const size_type &size, const size_type &ifirst)
4502  {
4503  dofs.assign(size, ifirst);
4504  for (size_type i=0; i < size; ++i) dofs[i] += i;
4505  }
4506 
4507  struct ga_instruction_matrix_assembly_base : public ga_instruction {
4508  const base_tensor &t;
4509  const fem_interpolation_context &ctx1, &ctx2;
4510  const scalar_type &alpha1, &alpha2, &coeff;
4511  const size_type &nbpt, &ipt;
4512  base_vector elem;
4513  bool interpolate;
4514  std::vector<size_type> dofs1, dofs2, dofs1_sort;
4515  void add_tensor_to_element_matrix(bool initialize, bool empty_weight) {
4516  if (initialize) {
4517  if (empty_weight) elem.resize(0);
4518  elem.resize(t.size());
4519  if (!empty_weight)
4520  copy_scaled_4(t, coeff*alpha1*alpha2, elem);
4521  } else if (!empty_weight)
4522  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4523  // Faster than a daxpy blas call on my config
4524  add_scaled_4(t, coeff*alpha1*alpha2, elem);
4525  }
4526  ga_instruction_matrix_assembly_base
4527  (const base_tensor &t_,
4528  const fem_interpolation_context &ctx1_,
4529  const fem_interpolation_context &ctx2_,
4530  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
4531  const size_type &nbpt_, const size_type &ipt_, bool interpolate_)
4532  : t(t_), ctx1(ctx1_), ctx2(ctx2_), alpha1(a1), alpha2(a2),
4533  coeff(coeff_), nbpt(nbpt_), ipt(ipt_), interpolate(interpolate_),
4534  dofs1(0), dofs2(0), dofs1_sort(0)
4535  {}
4536  protected:
4537  const bool false_=false;
4538  const size_type zero_=0;
4539  };
4540 
4541 
4542  struct ga_instruction_matrix_assembly_mf_mf
4543  : public ga_instruction_matrix_assembly_base
4544  {
4545  model_real_sparse_matrix &Krr, &Kru, &Kur, &Kuu;
4546  const gmm::sub_interval *const&I1, *const&I2, *const I1__, *const I2__;
4547  const mesh_fem *const&mf1, *const&mf2, *const mf1__, *const mf2__;
4548  const bool &reduced_mf1, &reduced_mf2; // refs to mf1/2->is_reduced()
4549  virtual int exec() {
4550  GA_DEBUG_INFO("Instruction: matrix term assembly mf-mf");
4551  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
4552 
4553  bool initialize = (ipt == 0 || interpolate);
4554  bool empty_weight = (coeff == scalar_type(0));
4555  add_tensor_to_element_matrix(initialize, empty_weight); // t --> elem
4556 
4557  if (ipt == nbpt-1 || interpolate) { // finalize
4558  model_real_sparse_matrix &K = reduced_mf1 ? (reduced_mf2 ? Kuu : Kur)
4559  : (reduced_mf2 ? Kru : Krr);
4560  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
4561 
4562  scalar_type ninf = gmm::vect_norminf(elem);
4563  if (ninf == scalar_type(0)) return 0;
4564 
4565  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4566  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4567  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4568 
4569  size_type N = ctx1.N();
4570  size_type qmult1 = mf1->get_qdim();
4571  if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
4572  populate_dofs_vector(dofs1, s1, ifirst1, qmult1, // --> dofs1
4573  mf1->ind_scalar_basic_dof_of_element(cv1));
4574  if (mf1 == mf2 && cv1 == cv2) {
4575  if (ifirst1 == ifirst2) {
4576  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4577  } else {
4578  populate_dofs_vector(dofs2, dofs1.size(), ifirst2 - ifirst1, dofs1);
4579  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4580  }
4581  } else {
4582  N = std::max(N, ctx2.N());
4583  size_type qmult2 = mf2->get_qdim();
4584  if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
4585  populate_dofs_vector(dofs2, s2, ifirst2, qmult2, // --> dofs2
4586  mf2->ind_scalar_basic_dof_of_element(cv2));
4587  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4588  }
4589  }
4590  return 0;
4591  }
4592 
4593  ga_instruction_matrix_assembly_mf_mf
4594  (const base_tensor &t_,
4595  model_real_sparse_matrix &Krr_, model_real_sparse_matrix &Kru_,
4596  model_real_sparse_matrix &Kur_, model_real_sparse_matrix &Kuu_,
4597  const fem_interpolation_context &ctx1_,
4598  const fem_interpolation_context &ctx2_,
4599  const ga_instruction_set::variable_group_info &vgi1,
4600  const ga_instruction_set::variable_group_info &vgi2,
4601  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4602  bool interpolate_)
4603  : ga_instruction_matrix_assembly_base
4604  (t_, ctx1_, ctx2_, vgi1.alpha, vgi2.alpha, coeff_, nbpt_, ipt_,
4605  interpolate_),
4606  Krr(Krr_), Kru(Kru_), Kur(Kur_), Kuu(Kuu_),
4607  I1(vgi1.I), I2(vgi2.I), I1__(nullptr), I2__(nullptr),
4608  mf1(vgi1.mf), mf2(vgi2.mf), mf1__(nullptr), mf2__(nullptr),
4609  reduced_mf1(vgi1.reduced_mf), reduced_mf2(vgi2.reduced_mf) {}
4610 
4611  ga_instruction_matrix_assembly_mf_mf
4612  (const base_tensor &t_,
4613  model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
4614  const fem_interpolation_context &ctx1_,
4615  const fem_interpolation_context &ctx2_,
4616  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
4617  const ga_instruction_set::variable_group_info &vgi2,
4618  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4619  bool interpolate_)
4620  : ga_instruction_matrix_assembly_base
4621  (t_, ctx1_, ctx2_, a1, vgi2.alpha, coeff_, nbpt_, ipt_, interpolate_),
4622  Krr(Kxr_), Kru(Kxu_), Kur(Kxr_), Kuu(Kxu_),
4623  I1(I1__), I2(vgi2.I), I1__(&I1_), I2__(nullptr),
4624  mf1(mf1__), mf2(vgi2.mf), mf1__(&mf1_), mf2__(nullptr),
4625  reduced_mf1(false_), reduced_mf2(vgi2.reduced_mf) {}
4626 
4627  ga_instruction_matrix_assembly_mf_mf
4628  (const base_tensor &t_,
4629  model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
4630  const fem_interpolation_context &ctx1_,
4631  const fem_interpolation_context &ctx2_,
4632  const ga_instruction_set::variable_group_info &vgi1,
4633  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
4634  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4635  bool interpolate_)
4636  : ga_instruction_matrix_assembly_base
4637  (t_, ctx1_, ctx2_, vgi1.alpha, a2, coeff_, nbpt_, ipt_, interpolate_),
4638  Krr(Krx_), Kru(Krx_), Kur(Kux_), Kuu(Kux_),
4639  I1(vgi1.I), I2(I2__), I1__(nullptr), I2__(&I2_),
4640  mf1(vgi1.mf), mf2(mf2__), mf1__(nullptr), mf2__(&mf2_),
4641  reduced_mf1(vgi1.reduced_mf), reduced_mf2(false_) {}
4642 
4643  ga_instruction_matrix_assembly_mf_mf
4644  (const base_tensor &t_, model_real_sparse_matrix &K_,
4645  const fem_interpolation_context &ctx1_,
4646  const fem_interpolation_context &ctx2_,
4647  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
4648  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
4649  const scalar_type &coeff_, const size_type &nbpt_, const size_type &ipt_,
4650  bool interpolate_)
4651  : ga_instruction_matrix_assembly_base
4652  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, interpolate_),
4653  Krr(K_), Kru(K_), Kur(K_), Kuu(K_),
4654  I1(I1__), I2(I2__), I1__(&I1_), I2__(&I2_),
4655  mf1(mf1__), mf2(mf2__), mf1__(&mf1_), mf2__(&mf2_),
4656  reduced_mf1(false_), reduced_mf2(false_) {}
4657  };
4658 
4659 
4660  struct ga_instruction_matrix_assembly_imd_mf
4661  : public ga_instruction_matrix_assembly_base
4662  {
4663  model_real_sparse_matrix &Kxr, &Kxu;
4664  const gmm::sub_interval *I1, *I2__, * const &I2;
4665  const im_data *imd1;
4666  const mesh_fem * const mf2__, * const &mf2;
4667  const bool &reduced_mf2; // ref to mf2->is_reduced()
4668  virtual int exec() {
4669  GA_DEBUG_INFO("Instruction: matrix term assembly "
4670  "(imdata or fixed size)-mf");
4671  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
4672 
4673  bool empty_weight = (coeff == scalar_type(0));
4674  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
4675 
4676  scalar_type ninf = gmm::vect_norminf(elem);
4677  if (ninf == scalar_type(0)) return 0;
4678 
4679  model_real_sparse_matrix &K = reduced_mf2 ? Kxu : Kxr;
4680  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
4681  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4682  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4683  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4684  if (imd1) ifirst1 += s1 * imd1->filtered_index_of_point(cv1, ipt);
4685 
4686  populate_contiguous_dofs_vector(dofs1, s1, ifirst1); // --> dofs1
4687  size_type qmult2 = mf2->get_qdim();
4688  if (qmult2 > 1) qmult2 /= mf2->fem_of_element(cv2)->target_dim();
4689  populate_dofs_vector(dofs2, s2, ifirst2, qmult2, // --> dofs2
4690  mf2->ind_scalar_basic_dof_of_element(cv2));
4691  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx2.N());
4692  return 0;
4693  }
4694 
4695  ga_instruction_matrix_assembly_imd_mf
4696  (const base_tensor &t_,
4697  model_real_sparse_matrix &Kxr_, model_real_sparse_matrix &Kxu_,
4698  const fem_interpolation_context &ctx1_,
4699  const fem_interpolation_context &ctx2_,
4700  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
4701  const ga_instruction_set::variable_group_info &vgi2,
4702  const scalar_type &coeff_, const size_type &ipt_)
4703  : ga_instruction_matrix_assembly_base
4704  (t_, ctx1_, ctx2_, a1, vgi2.alpha, coeff_, zero_, ipt_, false),
4705  Kxr(Kxr_), Kxu(Kxu_), I1(&I1_), I2__(nullptr), I2(vgi2.I),
4706  imd1(imd1_), mf2__(nullptr), mf2(vgi2.mf), reduced_mf2(vgi2.reduced_mf)
4707  {}
4708 
4709  ga_instruction_matrix_assembly_imd_mf
4710  (const base_tensor &t_, model_real_sparse_matrix &K_,
4711  const fem_interpolation_context &ctx1_,
4712  const fem_interpolation_context &ctx2_,
4713  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
4714  const gmm::sub_interval &I2_, const mesh_fem &mf2_, const scalar_type &a2,
4715  const scalar_type &coeff_, const size_type &ipt_)
4716  : ga_instruction_matrix_assembly_base
4717  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4718  Kxr(K_), Kxu(K_), I1(&I1_), I2__(&I2_), I2(I2__),
4719  imd1(imd1_), mf2__(&mf2_), mf2(mf2__), reduced_mf2(false_) {}
4720  };
4721 
4722  struct ga_instruction_matrix_assembly_mf_imd
4723  : public ga_instruction_matrix_assembly_base
4724  {
4725  model_real_sparse_matrix &Krx, &Kux;
4726  const gmm::sub_interval * const &I1, *const I1__, *I2;
4727  const mesh_fem * const &mf1, *const mf1__;
4728  const bool &reduced_mf1; // ref to mf1->is_reduced()
4729  const im_data *imd2;
4730  virtual int exec() {
4731  GA_DEBUG_INFO("Instruction: matrix term assembly "
4732  "mf-(imdata or fixed size)");
4733  if (!ctx1.is_convex_num_valid() || !ctx2.is_convex_num_valid()) return 0;
4734 
4735  bool empty_weight = (coeff == scalar_type(0));
4736  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
4737 
4738  scalar_type ninf = gmm::vect_norminf(elem);
4739  if (ninf == scalar_type(0)) return 0;
4740 
4741  model_real_sparse_matrix &K = reduced_mf1 ? Kux : Krx;
4742  GA_DEBUG_ASSERT(I1->size() && I2->size(), "Internal error");
4743  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4744  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4745  size_type ifirst1 = I1->first(), ifirst2 = I2->first();
4746  if (imd2) ifirst2 += s2 * imd2->filtered_index_of_point(cv2, ipt);
4747 
4748  size_type qmult1 = mf1->get_qdim();
4749  if (qmult1 > 1) qmult1 /= mf1->fem_of_element(cv1)->target_dim();
4750  populate_dofs_vector(dofs1, s1, ifirst1, qmult1, // --> dofs1
4751  mf1->ind_scalar_basic_dof_of_element(cv1));
4752  populate_contiguous_dofs_vector(dofs2, s2, ifirst2); // --> dofs2
4753  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, ctx1.N());
4754  return 0;
4755  }
4756 
4757  ga_instruction_matrix_assembly_mf_imd
4758  (const base_tensor &t_,
4759  model_real_sparse_matrix &Krx_, model_real_sparse_matrix &Kux_,
4760  const fem_interpolation_context &ctx1_,
4761  const fem_interpolation_context &ctx2_,
4762  const ga_instruction_set::variable_group_info &vgi1,
4763  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
4764  const scalar_type &coeff_, const size_type &ipt_)
4765  : ga_instruction_matrix_assembly_base
4766  (t_, ctx1_, ctx2_, vgi1.alpha, a2, coeff_, zero_, ipt_, false),
4767  Krx(Krx_), Kux(Kux_), I1(vgi1.I), I1__(nullptr), I2(&I2_),
4768  mf1(vgi1.mf), mf1__(nullptr), reduced_mf1(vgi1.reduced_mf), imd2(imd2_)
4769  {}
4770 
4771  ga_instruction_matrix_assembly_mf_imd
4772  (const base_tensor &t_, model_real_sparse_matrix &K_,
4773  const fem_interpolation_context &ctx1_,
4774  const fem_interpolation_context &ctx2_,
4775  const gmm::sub_interval &I1_, const mesh_fem &mf1_, const scalar_type &a1,
4776  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
4777  const scalar_type &coeff_, const size_type &ipt_)
4778  : ga_instruction_matrix_assembly_base
4779  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4780  Krx(K_), Kux(K_), I1(I1__), I1__(&I1_), I2(&I2_),
4781  mf1(mf1__), mf1__(&mf1_), reduced_mf1(false_), imd2(imd2_) {}
4782  };
4783 
4784 
4785 
4786  struct ga_instruction_matrix_assembly_imd_imd
4787  : public ga_instruction_matrix_assembly_base
4788  {
4789  model_real_sparse_matrix &K;
4790  const gmm::sub_interval &I1, &I2;
4791  const im_data *imd1, *imd2;
4792  virtual int exec() {
4793  GA_DEBUG_INFO("Instruction: matrix term assembly "
4794  "(imdata or fixed size)-(imdata or fixed size)");
4795  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4796 
4797  bool empty_weight = (coeff == scalar_type(0));
4798  add_tensor_to_element_matrix(true, empty_weight); // t --> elem
4799 
4800  scalar_type ninf = gmm::vect_norminf(elem);
4801  if (ninf == scalar_type(0)) return 0;
4802 
4803  size_type s1 = t.sizes()[0], s2 = t.sizes()[1];
4804  size_type ifirst1 = I1.first(), ifirst2 = I2.first();
4805  if (imd1)
4806  ifirst1 += s1 * imd1->filtered_index_of_point(ctx1.convex_num(), ipt);
4807  if (imd2)
4808  ifirst2 += s2 * imd2->filtered_index_of_point(ctx2.convex_num(), ipt);
4809 
4810  populate_contiguous_dofs_vector(dofs2, s2, ifirst2);
4811  add_elem_matrix_contiguous_rows(K, ifirst1, s1, dofs2, elem, ninf*1E-14);
4812  return 0;
4813  }
4814  ga_instruction_matrix_assembly_imd_imd
4815  (const base_tensor &t_, model_real_sparse_matrix &K_,
4816  const fem_interpolation_context &ctx1_,
4817  const fem_interpolation_context &ctx2_,
4818  const gmm::sub_interval &I1_, const im_data *imd1_, const scalar_type &a1,
4819  const gmm::sub_interval &I2_, const im_data *imd2_, const scalar_type &a2,
4820  const scalar_type &coeff_, const size_type &ipt_)
4821  : ga_instruction_matrix_assembly_base
4822  (t_, ctx1_, ctx2_, a1, a2, coeff_, zero_, ipt_, false),
4823  K(K_), I1(I1_), I2(I2_), imd1(imd1_), imd2(imd2_) {}
4824  };
4825 
4826 
4827  struct ga_instruction_matrix_assembly_standard_scalar
4828  : public ga_instruction_matrix_assembly_base
4829  {
4830  model_real_sparse_matrix &K;
4831  const gmm::sub_interval &I1, &I2;
4832  const mesh_fem *pmf1, *pmf2;
4833  virtual int exec() {
4834  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4835  "scalar fems");
4836  if (ipt == 0) {
4837  elem.resize(t.size());
4838  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4839  copy_scaled_4(t, coeff*alpha1*alpha2, elem);
4840  } else
4841  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4842  // Faster than a daxpy blas call on my config
4843  add_scaled_4(t, coeff*alpha1*alpha2, elem);
4844 
4845  if (ipt == nbpt-1) { // finalize
4846  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4847 
4848  scalar_type ninf = gmm::vect_norminf(elem);
4849  if (ninf == scalar_type(0)) return 0;
4850 
4851  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num(), N=ctx1.N();
4852  if (cv1 == size_type(-1)) return 0;
4853  auto &ct1 = pmf1->ind_scalar_basic_dof_of_element(cv1);
4854  GA_DEBUG_ASSERT(ct1.size() == t.sizes()[0], "Internal error");
4855  populate_dofs_vector(dofs1, ct1.size(), I1.first(), ct1);
4856 
4857  if (pmf2 == pmf1 && cv1 == cv2) {
4858  if (I1.first() == I2.first()) {
4859  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4860  } else {
4861  populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
4862  dofs1);
4863  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4864  }
4865  } else {
4866  if (cv2 == size_type(-1)) return 0;
4867  auto &ct2 = pmf2->ind_scalar_basic_dof_of_element(cv2);
4868  GA_DEBUG_ASSERT(ct2.size() == t.sizes()[1], "Internal error");
4869  populate_dofs_vector(dofs2, ct2.size(), I2.first(), ct2);
4870  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4871  }
4872  }
4873  return 0;
4874  }
4875  ga_instruction_matrix_assembly_standard_scalar
4876  (const base_tensor &t_, model_real_sparse_matrix &K_,
4877  const fem_interpolation_context &ctx1_,
4878  const fem_interpolation_context &ctx2_,
4879  const gmm::sub_interval &I1_, const gmm::sub_interval &I2_,
4880  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
4881  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
4882  const size_type &nbpt_, const size_type &ipt_)
4883  : ga_instruction_matrix_assembly_base
4884  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
4885  K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
4886  };
4887 
4888  struct ga_instruction_matrix_assembly_standard_vector
4889  : public ga_instruction_matrix_assembly_base
4890  {
4891  model_real_sparse_matrix &K;
4892  const gmm::sub_interval &I1, &I2;
4893  const mesh_fem *pmf1, *pmf2;
4894  virtual int exec() {
4895  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4896  "vector fems");
4897  if (ipt == 0) {
4898  elem.resize(t.size());
4899  copy_scaled_8(t, coeff*alpha1*alpha2, elem);
4900  // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4901  } else
4902  // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
4903  // (Far) faster than a daxpy blas call on my config.
4904  add_scaled_8(t, coeff*alpha1*alpha2, elem);
4905 
4906  if (ipt == nbpt-1) { // finalize
4907  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4908 
4909  scalar_type ninf = gmm::vect_norminf(elem);
4910  if (ninf == scalar_type(0)) return 0;
4911  size_type s1 = t.sizes()[0], s2 = t.sizes()[1], N = ctx1.N();
4912 
4913  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4914  if (cv1 == size_type(-1)) return 0;
4915  size_type qmult1 = pmf1->get_qdim();
4916  if (qmult1 > 1) qmult1 /= pmf1->fem_of_element(cv1)->target_dim();
4917  populate_dofs_vector(dofs1, s1, I1.first(), qmult1, // --> dofs1
4918  pmf1->ind_scalar_basic_dof_of_element(cv1));
4919 
4920  if (pmf2 == pmf1 && cv1 == cv2 && I1.first() == I2.first()) {
4921  add_elem_matrix(K, dofs1, dofs1, dofs1_sort, elem, ninf*1E-14, N);
4922  } else {
4923  if (pmf2 == pmf1 && cv1 == cv2) {
4924  populate_dofs_vector(dofs2, dofs1.size(), I2.first() - I1.first(),
4925  dofs1);
4926  } else {
4927  if (cv2 == size_type(-1)) return 0;
4928  size_type qmult2 = pmf2->get_qdim();
4929  if (qmult2 > 1) qmult2 /= pmf2->fem_of_element(cv2)->target_dim();
4930  populate_dofs_vector(dofs2, s2, I2.first(), qmult2, // --> dofs2
4931  pmf2->ind_scalar_basic_dof_of_element(cv2));
4932  }
4933  add_elem_matrix(K, dofs1, dofs2, dofs1_sort, elem, ninf*1E-14, N);
4934  }
4935  }
4936  return 0;
4937  }
4938  ga_instruction_matrix_assembly_standard_vector
4939  (const base_tensor &t_, model_real_sparse_matrix &K_,
4940  const fem_interpolation_context &ctx1_,
4941  const fem_interpolation_context &ctx2_,
4942  const gmm::sub_interval &I1_, const gmm::sub_interval &I2_,
4943  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
4944  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
4945  const size_type &nbpt_, const size_type &ipt_)
4946  : ga_instruction_matrix_assembly_base
4947  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
4948  K(K_), I1(I1_), I2(I2_), pmf1(mfn1_), pmf2(mfn2_) {}
4949  };
4950 
4951  template<int QQ>
4952  struct ga_instruction_matrix_assembly_standard_vector_opt10
4953  : public ga_instruction_matrix_assembly_base
4954  {
4955  model_real_sparse_matrix &K;
4956  const gmm::sub_interval &I1, &I2;
4957  const mesh_fem *pmf1, *pmf2;
4958  virtual int exec() {
4959  GA_DEBUG_INFO("Instruction: matrix term assembly for standard "
4960  "vector fems optimized for format 10 qdim " << QQ);
4961  size_type s1_q = QQ*t.sizes()[0];
4962  size_type ss1 = t.sizes()[0]/QQ, ss2 = t.sizes()[1]/QQ;
4963  scalar_type e = coeff*alpha1*alpha2;
4964  if (ipt == 0) {
4965  elem.resize(ss1*ss2);
4966  auto itel = elem.begin();
4967  for (size_type j = 0; j < ss2; ++j) {
4968  auto it = t.begin() + j*s1_q;
4969  for (size_type i = 0; i < ss1; ++i, it += QQ)
4970  *itel++ = (*it) * e;
4971  }
4972  } else {
4973  auto itel = elem.begin();
4974  for (size_type j = 0; j < ss2; ++j) {
4975  auto it = t.begin() + j*s1_q;
4976  for (size_type i = 0; i < ss1; ++i, it += QQ)
4977  *itel++ += (*it) * e;
4978  }
4979  }
4980  if (ipt == nbpt-1) { // finalize
4981  GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
4982 
4983  scalar_type ninf = gmm::vect_norminf(elem) * 1E-14;
4984  if (ninf == scalar_type(0)) return 0;
4985  size_type N = ctx1.N();
4986  size_type cv1 = ctx1.convex_num(), cv2 = ctx2.convex_num();
4987  size_type i1 = I1.first(), i2 = I2.first();
4988  if (cv1 == size_type(-1)) return 0;
4989  populate_dofs_vector(dofs1, ss1, i1,
4990  pmf1->ind_scalar_basic_dof_of_element(cv1));
4991  bool same_dofs(pmf2 == pmf1 && cv1 == cv2 && i1 == i2);
4992 
4993  if (!same_dofs) {
4994  if (cv2 == size_type(-1)) return 0;
4995  populate_dofs_vector(dofs2, ss2, i2,
4996  pmf2->ind_scalar_basic_dof_of_element(cv2));
4997  }
4998  std::vector<size_type> &dofs2_ = same_dofs ? dofs1 : dofs2;
4999  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5000  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5001  if (!same_dofs) for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5002  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5003  if (QQ >= 3) {
5004  for (size_type i = 0; i < ss1; ++i) (dofs1[i])++;
5005  if (!same_dofs) for (size_type i = 0; i < ss2; ++i) (dofs2[i])++;
5006  add_elem_matrix(K, dofs1, dofs2_, dofs1_sort, elem, ninf, N);
5007  }
5008  }
5009  return 0;
5010  }
5011 
5012  ga_instruction_matrix_assembly_standard_vector_opt10
5013  (const base_tensor &t_, model_real_sparse_matrix &Kn_,
5014  const fem_interpolation_context &ctx1_,
5015  const fem_interpolation_context &ctx2_,
5016  const gmm::sub_interval &In1_, const gmm::sub_interval &In2_,
5017  const mesh_fem *mfn1_, const mesh_fem *mfn2_,
5018  const scalar_type &a1, const scalar_type &a2, const scalar_type &coeff_,
5019  const size_type &nbpt_, const size_type &ipt_)
5020  : ga_instruction_matrix_assembly_base
5021  (t_, ctx1_, ctx2_, a1, a2, coeff_, nbpt_, ipt_, false),
5022  K(Kn_), I1(In1_), I2(In2_), pmf1(mfn1_), pmf2(mfn2_)
5023  {
5024  static_assert(QQ >= 2 && QQ <=3,
5025  "Template implemented only for QQ=2 and QQ=3");
5026  }
5027  };
5028 
5029 
5030  struct ga_instruction_condensation_sub : public ga_instruction {
5031  // one such instruction is used for every cluster of intercoupled
5032  // condensed variables
5033  gmm::dense_matrix<base_tensor *> KQJprime;
5034  std::vector<base_tensor *> RQprime;
5035  gmm::dense_matrix<base_tensor const *> KQQloc, KQJloc;
5036  base_tensor invKqqqq, Kqqjj;
5037  base_vector Rqq;
5038  std::vector<std::array<size_type,3>> partQ, partJ;
5039  const scalar_type &coeff; // &alpha1, &alpha2 ?
5040  virtual int exec() { // implementation can be optimized
5041  GA_DEBUG_INFO("Instruction: variable cluster subdiagonal condensation");
5042  // copy from KQQ to invKqqqq
5043  for (const auto &qqq1 : partQ) {
5044  size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5045  for (const auto &qqq2 : partQ) {
5046  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5047  if (KQQloc(q1,q2)) {
5048  auto itr = KQQloc(q1,q2)->cbegin();
5049  GMM_ASSERT1(KQQloc(q1,q2)->size()
5050  == (qq1end-qq1start)*(qq2end-qq2start),
5051  "Internal error");
5052  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2)
5053  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5054  invKqqqq(qq1,qq2) = *itr++;
5055  }
5056  }
5057  }
5058  // calculate inverse matrix invKqqqq
5059  bgeot::lu_inverse(&(invKqqqq[0]), invKqqqq.size(0));
5060 
5061  // Resize Kqqjj as primary variable sizes may change dynamically
5062  size_type prev_j(0);
5063  for (auto &&jjj : partJ) {
5064  size_type j=jjj[0];
5065  size_type new_j(0);
5066  for (const auto &qqq : partQ) {
5067  size_type q=qqq[0];
5068  if (KQJloc(q,j)) {
5069  if (new_j) {
5070  GMM_ASSERT1(new_j == KQJloc(q,j)->size(1), "Internal error");
5071  } else
5072  new_j = KQJloc(q,j)->size(1);
5073  }
5074  }
5075  // Resize KQJprime submatrices to match KQJloc sizes
5076  for (const auto &qqq : partQ) {
5077  size_type q=qqq[0];
5078  KQJprime(q,j)->adjust_sizes(qqq[2]-qqq[1], new_j);
5079  }
5080  jjj[1] = prev_j;
5081  prev_j += new_j;
5082  jjj[2] = prev_j;
5083  }
5084 
5085  Kqqjj.adjust_sizes(partQ.back()[2], partJ.back()[2]);
5086  gmm::clear(Kqqjj.as_vector());
5087  gmm::clear(Rqq);
5088 
5089  // multiply invKqqqq with all submatrices in KQJloc and RQprime and store
5090  // the results in Kqqjj and Rqq
5091  for (const auto &jjj : partJ) {
5092  size_type j = jjj[0], jjstart = jjj[1], jjend = jjj[2];
5093  for (const auto &qqq2 : partQ) {
5094  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5095  if (KQJloc(q2,j)) {
5096  auto itr = KQJloc(q2,j)->begin(); // auto &mat = KQJloc(q2,j);
5097  for (size_type jj=jjstart; jj < jjend; ++jj) {
5098  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5099  for (size_type qq1=0; qq1 < partQ.back()[2]; ++qq1) {
5100  Kqqjj(qq1,jj) += invKqqqq(qq1,qq2)*(*itr);
5101  // Kqqjj(qq1,jj) += invKqq(qq1,qq2)*mat(qq2-qqstart,jj-jjstart);
5102  } // for qq1
5103  } // for qq2
5104  } // for jj
5105  GMM_ASSERT1(itr == KQJloc(q2,j)->cend(), "Internal error");
5106  }
5107  } // in partQ
5108  } // in partJ
5109  for (const auto &qqq2 : partQ) {
5110  size_type q2 = qqq2[0], qq2start = qqq2[1], qq2end = qqq2[2];
5111  if (RQprime[q2]) {
5112  auto itr = RQprime[q2]->cbegin();
5113  for (size_type qq2=qq2start; qq2 < qq2end; ++qq2, ++itr) {
5114  for (size_type qq1=0; qq1 < invKqqqq.size(0); ++qq1)
5115  Rqq[qq1] += invKqqqq(qq1,qq2)*(*itr);
5116  } // for qq2
5117  GMM_ASSERT1(itr == RQprime[q2]->cend(), "Internal error");
5118  }
5119  } // in partQ
5120 
5121  // distribute the results from Kqqjj/Rqq to KQJprime/RQprime
5122  // submatrices/subvectors
5123  for (const auto &qqq1 : partQ) {
5124  size_type q1 = qqq1[0], qq1start = qqq1[1], qq1end = qqq1[2];
5125  { // writing into RQprime
5126  auto itw = RQprime[q1]->begin();
5127  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5128  *itw++ = Rqq[qq1]/coeff;
5129  }
5130  for (const auto &jjj2 : partJ) {
5131  size_type j2 = jjj2[0], jj2start = jjj2[1], jj2end = jjj2[2];
5132  auto itw = KQJprime(q1,j2)->begin();
5133  for (size_type jj2=jj2start; jj2 < jj2end; ++jj2)
5134  for (size_type qq1=qq1start; qq1 < qq1end; ++qq1)
5135  *itw++ = Kqqjj(qq1,jj2);
5136  }
5137  }
5138  return 0;
5139  }
5140 
5141  ga_instruction_condensation_sub(gmm::dense_matrix<base_tensor *> &KQJpr,
5142  std::vector<base_tensor *> &RQpr, // input/output
5143  const gmm::dense_matrix<base_tensor *> &KQQ,
5144  const gmm::dense_matrix<base_tensor *> &KQJ,
5145  const std::set<size_type> &Qset,
5146  const scalar_type &coeff_)
5147  : KQJprime(KQJpr), RQprime(RQpr), coeff(coeff_)
5148  {
5149  // * to const *
5150  KQQloc.resize(KQQ.nrows(), KQQ.ncols());
5151  KQJloc.resize(KQJ.nrows(), KQJ.ncols());
5152  for (size_type i=0; i < KQQ.as_vector().size(); ++i) KQQloc[i] = KQQ[i];
5153  for (size_type i=0; i < KQJ.as_vector().size(); ++i) KQJloc[i] = KQJ[i];
5154 
5155  for (size_type j=0; j < KQJ.ncols(); ++j)
5156  for (const size_type &q : Qset)
5157  if (KQJ(q,j)) {
5158  partJ.push_back(std::array<size_type,3>{j,0,0});
5159  break;
5160  }
5161 
5162  partQ.resize(0);
5163  for (const size_type &q : Qset)
5164  partQ.push_back(std::array<size_type,3>{q,0,0});
5165  size_type prev_q(0);
5166  for (auto &qqq1 : partQ) {
5167  size_type q1 = qqq1[0];
5168  size_type new_q(0);
5169  for (const size_type &q2 : Qset)
5170  if (new_q) {
5171  GMM_ASSERT1(new_q == KQQ(q1,q2)->size(0) &&
5172  new_q == KQQ(q2,q1)->size(1), "Internal error");
5173  } else
5174  new_q = KQQ(q1,q2)->size(0);
5175  qqq1[1] = prev_q;
5176  prev_q += new_q;
5177  qqq1[2] = prev_q;
5178  }
5179  invKqqqq.adjust_sizes(partQ.back()[2], partQ.back()[2]);
5180  Rqq.resize(partQ.back()[2]);
5181  // Kqqjj will be resized dynamically due to possible changes in j interval
5182  }
5183  };
5184 
5185 
5186  struct ga_instruction_condensation_super_K : public ga_instruction {
5187  base_tensor &Kij;
5188  std::vector<base_tensor *> KiQ, KQj; // indexed wrt q in Q
5189  size_type Qsize;
5190 
5191  virtual int exec() {
5192  GA_DEBUG_INFO("Instruction: contribution of condensation to kept part");
5193 
5194  size_type m = KiQ[0]->size(0);
5195  size_type n = KQj[0]->size(1);
5196  Kij.adjust_sizes(m,n);
5197  gmm::clear(Kij.as_vector());
5198  for (size_type k=0; k < Qsize; ++k) {
5199  const base_tensor &K1 = *KiQ[k], &K2 = *KQj[k];
5200  size_type qqsize = K1.size(1);
5201  GMM_ASSERT1(K1.size(0) == m && K2.size(1) == n && K2.size(0) == qqsize,
5202  "Internal error");
5203 
5204  base_tensor::iterator it = Kij.begin();
5205  for (size_type jj = 0; jj < n; ++jj)
5206  for (size_type ii = 0; ii < m; ++ii, ++it)
5207  for (size_type qq = 0; qq < qqsize; ++qq)
5208  *it -= K1[ii+qq*m] * K2[qq+jj*qqsize];
5209  GA_DEBUG_ASSERT(it == Kij.end(), "Wrong sizes");
5210  }
5211  return 0;
5212  }
5213  ga_instruction_condensation_super_K(base_tensor &Kij_,
5214  const std::vector<base_tensor *> KiQ_,
5215  const std::vector<base_tensor *> KQj_)
5216  : Kij(Kij_), KiQ(KiQ_), KQj(KQj_)
5217  {
5218  Qsize = KiQ.size();
5219  GMM_ASSERT1(KiQ.size() == KQj.size(), "Internal error");
5220  }
5221  };
5222 
5223  struct ga_instruction_condensation_super_R : public ga_instruction {
5224  base_tensor &Ri;
5225  std::vector<base_tensor *> KiQ, RQpr; // indexed wrt q in Q
5226  size_type Qsize;
5227 
5228  virtual int exec() {
5229  GA_DEBUG_INFO("Instruction: contribution of condensation to primary rhs");
5230 
5231  size_type m = KiQ[0]->size(0);
5232  Ri.adjust_sizes(m);
5233  gmm::clear(Ri.as_vector());
5234  for (size_type k=0; k < Qsize; ++k) {
5235  const base_tensor &K1 = *KiQ[k], &R2 = *RQpr[k];
5236  size_type qqsize = K1.size(1);
5237  GMM_ASSERT1(K1.size(0) == m && R2.size(0) == qqsize, "Internal error");
5238  base_tensor::iterator it = Ri.begin();
5239  for (size_type ii = 0; ii < m; ++ii, ++it)
5240  for (size_type qq = 0; qq < qqsize; ++qq)
5241  *it -= K1[ii+qq*m] * R2[qq];
5242  GA_DEBUG_ASSERT(it == Ri.end(), "Wrong sizes");
5243  }
5244  return 0;
5245  }
5246  ga_instruction_condensation_super_R(base_tensor &Ri_,
5247  const std::vector<base_tensor *> KiQ_,
5248  const std::vector<base_tensor *> RQpr_)
5249  : Ri(Ri_), KiQ(KiQ_), RQpr(RQpr_)
5250  {
5251  Qsize = KiQ.size();
5252  GMM_ASSERT1(KiQ.size() == RQpr.size(), "Internal error");
5253  }
5254  };
5255 
5256  //=========================================================================
5257  // Compilation of assembly trees into a list of basic instructions
5258  //=========================================================================
5259 
5260  static void extend_variable_in_gis(const ga_workspace &workspace,
5261  const std::string &varname,
5262  ga_instruction_set &gis) {
5263  if (workspace.variable_group_exists(varname)) {
5264  for (const std::string &v : workspace.variable_group(varname))
5265  extend_variable_in_gis(workspace, v, gis);
5266  } else if (gis.extended_vars.count(varname) == 0) {
5267  const mesh_fem *mf = workspace.associated_mf(varname);
5268  if (mf->is_reduced()) {
5269  auto n = (mf->get_qdim() == 1) ? workspace.qdim(varname) : 1;
5270  base_vector &U = gis.really_extended_vars[varname];
5271  gmm::resize(U, mf->nb_basic_dof() * n);
5272  mf->extend_vector(workspace.value(varname), U);
5273  gis.extended_vars[varname] = &(gis.really_extended_vars[varname]);
5274  } else {
5275  gis.extended_vars[varname] = &(workspace.value(varname));
5276  }
5277  }
5278  }
5279 
5280  static void ga_clear_node_list
5281  (pga_tree_node pnode, std::map<scalar_type,
5282  std::list<pga_tree_node> > &node_list) {
5283  std::list<pga_tree_node> &loc_node_list = node_list[pnode->hash_value];
5284  for (std::list<pga_tree_node>::iterator it = loc_node_list.begin();
5285  it != loc_node_list.end(); ) {
5286  if (*it == pnode) it = loc_node_list.erase(it); else ++it;
5287  }
5288  for (size_type i = 0; i < pnode->children.size(); ++i)
5289  ga_clear_node_list(pnode->children[i], node_list);
5290  }
5291 
5292  // workspace argument is not const because of declaration of temporary
5293  // unreduced variables
5294  static void ga_compile_node(const pga_tree_node pnode,
5295  ga_workspace &workspace,
5296  ga_instruction_set &gis,
5297  ga_instruction_set::region_mim_instructions &rmi,
5298  const mesh &m, bool function_case,
5299  ga_if_hierarchy &if_hierarchy) {
5300 
5301  if (pnode->node_type == GA_NODE_PREDEF_FUNC ||
5302  pnode->node_type == GA_NODE_OPERATOR ||
5303  pnode->node_type == GA_NODE_SPEC_FUNC ||
5304  pnode->node_type == GA_NODE_CONSTANT ||
5305  pnode->node_type == GA_NODE_ALLINDICES ||
5306  pnode->node_type == GA_NODE_RESHAPE ||
5307  pnode->node_type == GA_NODE_SWAP_IND ||
5308  pnode->node_type == GA_NODE_IND_MOVE_LAST ||
5309  pnode->node_type == GA_NODE_CONTRACT) return;
5310 
5311  // cout << "compiling "; ga_print_node(pnode, cout); cout << endl;
5312 
5313  pga_instruction pgai;
5314  ga_if_hierarchy *pif_hierarchy = &if_hierarchy;
5315  ga_if_hierarchy new_if_hierarchy;
5316 
5317  const mesh_fem *mf1 = 0, *mf2 = 0;
5318  const mesh_fem **mfg1 = 0, **mfg2 = 0;
5319  fem_interpolation_context *pctx1 = 0, *pctx2 = 0;
5320  bool tensor_to_clear = false;
5321  bool tensor_to_adapt = false;
5322 
5323  if (pnode->test_function_type) {
5324  if (pnode->name_test1.size())
5325  mf1 = workspace.associated_mf(pnode->name_test1);
5326  if (mf1) {
5327  pctx1 = &(gis.ctx);
5328  const std::string &intn1 = pnode->interpolate_name_test1;
5329  if (intn1.size()) {
5330  if (workspace.secondary_domain_exists(intn1)) {
5331  pctx1 = &(rmi.secondary_domain_infos.ctx);
5332  } else {
5333  tensor_to_adapt = true;
5334  pctx1 = &(rmi.interpolate_infos[intn1].ctx);
5335  if (workspace.variable_group_exists(pnode->name_test1)) {
5336  ga_instruction_set::variable_group_info &vgi =
5337  rmi.interpolate_infos[intn1].groups_info[pnode->name_test1];
5338  mfg1 = &(vgi.mf);
5339  mf1 = 0;
5340  }
5341  }
5342  }
5343  }
5344  if (pnode->name_test2.size())
5345  mf2 = workspace.associated_mf(pnode->name_test2);
5346  if (mf2) {
5347  pctx2 = &(gis.ctx);
5348  const std::string &intn2 = pnode->interpolate_name_test2;
5349  if (intn2.size()) {
5350  if (workspace.secondary_domain_exists(intn2)) {
5351  pctx2 = &(rmi.secondary_domain_infos.ctx);
5352  } else {
5353  tensor_to_adapt = true;
5354  pctx2 = &(rmi.interpolate_infos[intn2].ctx);
5355  if (workspace.variable_group_exists(pnode->name_test2)) {
5356  ga_instruction_set::variable_group_info &vgi =
5357  rmi.interpolate_infos[intn2].groups_info[pnode->name_test2];
5358  mfg2 = &(vgi.mf);
5359  mf2 = 0;
5360  }
5361  }
5362  }
5363  }
5364  }
5365 
5366  // Produce a resize instruction which is stored if no equivalent node is
5367  // detected and if the mesh is not uniform.
5368  pnode->t.set_to_original(); pnode->t.set_sparsity(0, 0);
5369  bool is_uniform = false;
5370  if (pnode->test_function_type == 1) {
5371  if (mf1 || mfg1)
5372  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5373  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5374  if (mf1 && mf1->is_uniform())
5375  { is_uniform = true; pctx1->invalid_convex_num(); }
5376  } else if (pnode->test_function_type == 2) {
5377  if (mf2 || mfg2)
5378  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5379  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5380  if (mf2 && mf2->is_uniform())
5381  { is_uniform = true; pctx2->invalid_convex_num(); }
5382  } else if (pnode->test_function_type == 3) {
5383  if ((mf1 || mfg1) && (mf2 || mfg2)) {
5384  pgai = std::make_shared<ga_instruction_two_first_ind_tensor>
5385  (pnode->tensor(), *pctx1, *pctx2, pnode->qdim1, mf1, mfg1,
5386  pnode->qdim2, mf2, mfg2);
5387  if (mf1 && mf1->is_uniform() && mf2 && mf2->is_uniform()) {
5388  is_uniform = true;
5389  pctx1->invalid_convex_num();
5390  pctx2->invalid_convex_num();
5391  }
5392  } else if (mf1 || mfg1) {
5393  pgai = std::make_shared<ga_instruction_first_ind_tensor>
5394  (pnode->tensor(), *pctx1, pnode->qdim1, mf1, mfg1);
5395  if (mf1 && mf1->is_uniform())
5396  { is_uniform = true; pctx1->invalid_convex_num(); }
5397  } else if (mf2 || mfg2) {
5398  pgai = std::make_shared<ga_instruction_second_ind_tensor>
5399  (pnode->tensor(), *pctx2, pnode->qdim2, mf2, mfg2);
5400  if (mf2 && mf2->is_uniform())
5401  { is_uniform = true; pctx2->invalid_convex_num(); }
5402  }
5403  }
5404 
5405  // Optimization: detects if an equivalent node has already been compiled
5406  pnode->t.set_to_original();
5407  if (rmi.node_list.count(pnode->hash_value) != 0) {
5408  for (pga_tree_node &pnode1 : rmi.node_list[pnode->hash_value]) {
5409  // cout << "found potential equivalent nodes ";
5410  // ga_print_node(pnode, cout);
5411  // cout << " and "; ga_print_node(pnode1, cout); cout << endl;
5412  if (sub_tree_are_equal(pnode, pnode1, workspace, 1)) {
5413  pnode->t.set_to_copy(pnode1->t);
5414  return;
5415  }
5416  if (sub_tree_are_equal(pnode, pnode1, workspace, 2)) {
5417  // cout << "confirmed with transpose" << endl;
5418  if (pnode->nb_test_functions() == 2) {
5419  if (pgai) { // resize instruction if needed
5420  if (is_uniform)
5421  { pgai->exec(); }
5422  else { rmi.instructions.push_back(std::move(pgai)); }
5423  }
5424  pgai = std::make_shared<ga_instruction_transpose_test>
5425  (pnode->tensor(), pnode1->tensor());
5426  rmi.instructions.push_back(std::move(pgai));
5427  } else {
5428  pnode->t.set_to_copy(pnode1->t);
5429  }
5430  return;
5431  }
5432  // cout << "sub_tree_are_equal = " << int(sub_tree_are_equal(pnode, pnode1, workspace, 1)) << endl;
5433  std::stringstream ss;
5434  ss << "Detected wrong equivalent nodes:" << endl;
5435  ga_print_node(pnode, ss);
5436  ss << endl << " and " << endl;
5437  ga_print_node(pnode1, ss);
5438  ss << endl << "No problem, but hash values could be adapted." << endl;
5439  GMM_TRACE2(ss.str());
5440  }
5441  }
5442 
5443  if (pgai) { // resize instruction if needed and no equivalent node detected
5444  if (is_uniform) { pgai->exec(); }
5445  else {
5446  if (tensor_to_adapt)
5447  rmi.instructions.push_back(std::move(pgai));
5448  else
5449  rmi.elt_instructions.push_back(std::move(pgai));
5450  }
5451  }
5452 
5453  size_type interpolate_filter_inst = rmi.instructions.size();
5454  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5455  pgai = pga_instruction();
5456  rmi.instructions.push_back(std::move(pgai));
5457  if_hierarchy.increment();
5458  new_if_hierarchy.child_of(if_hierarchy);
5459  pif_hierarchy = &new_if_hierarchy;
5460  }
5461 
5462  for (size_type i = 0; i < pnode->children.size(); ++i)
5463  ga_compile_node(pnode->children[i], workspace, gis, rmi, m,
5464  function_case, *pif_hierarchy);
5465 
5466  if (pnode->node_type == GA_NODE_INTERPOLATE_FILTER) {
5467  const std::string &intn = pnode->interpolate_name;
5468  ga_instruction_set::interpolate_info &inin = rmi.interpolate_infos[intn];
5469  pgai = std::make_shared<ga_instruction_interpolate_filter>
5470  (pnode->tensor(), inin, pnode->nbc1,
5471  int(rmi.instructions.size() - interpolate_filter_inst));
5472  rmi.instructions[interpolate_filter_inst].swap(pgai);
5473  pgai = std::make_shared<ga_instruction_copy_tensor>
5474  (pnode->tensor(), pnode->children[0]->tensor());
5475  rmi.instructions.push_back(std::move(pgai));
5476  ga_clear_node_list(pnode->children[0], rmi.node_list);
5477  }
5478 
5479  static scalar_type minus = -scalar_type(1);
5480  size_type nbch = pnode->children.size();
5481  pga_tree_node child0 = (nbch > 0) ? pnode->children[0] : 0;
5482  pga_tree_node child1 = (nbch > 1) ? pnode->children[1] : 0;
5483  bgeot::multi_index mi;
5484  const bgeot::multi_index &size0 = child0 ? child0->t.sizes() : mi;
5485  // const bgeot::multi_index &size1 = child1 ? child1->t.sizes() : mi;
5486  size_type dim0 = child0 ? child0->tensor_order() : 0;
5487  size_type dim1 = child1 ? child1->tensor_order() : 0;
5488 
5489  switch (pnode->node_type) {
5490 
5491  case GA_NODE_PREDEF_FUNC: case GA_NODE_OPERATOR: case GA_NODE_SPEC_FUNC:
5492  case GA_NODE_CONSTANT: case GA_NODE_ALLINDICES: case GA_NODE_ZERO:
5493  case GA_NODE_RESHAPE: case GA_NODE_CROSS_PRODUCT:
5494  case GA_NODE_SWAP_IND: case GA_NODE_IND_MOVE_LAST:
5495  case GA_NODE_CONTRACT: case GA_NODE_INTERPOLATE_FILTER:
5496  break;
5497 
5498  case GA_NODE_X:
5499  GMM_ASSERT1(!function_case,
5500  "No use of X is allowed in scalar functions");
5501  if (pnode->nbc1) {
5502  GA_DEBUG_ASSERT(pnode->tensor().size() == 1, "dimensions mismatch");
5503  GMM_ASSERT1(pnode->nbc1 <= m.dim(),
5504  "Bad index for X in expression");
5505  pgai = std::make_shared<ga_instruction_X_component>
5506  (pnode->tensor()[0], gis.ctx, pnode->nbc1-1);
5507  } else {
5508  if (pnode->tensor().size() != m.dim())
5509  pnode->init_vector_tensor(m.dim());
5510  pgai = std::make_shared<ga_instruction_X>(pnode->tensor(), gis.ctx);
5511  }
5512  rmi.instructions.push_back(std::move(pgai));
5513  break;
5514 
5515  case GA_NODE_ELT_SIZE:
5516  GMM_ASSERT1(!function_case,
5517  "No use of element_size is allowed in functions");
5518  if (pnode->tensor().size() != 1) pnode->init_scalar_tensor(0);
5519  pgai = std::make_shared<ga_instruction_element_size>
5520  (pnode->tensor(), gis.elt_size);
5521  gis.need_elt_size = true;
5522  rmi.instructions.push_back(std::move(pgai));
5523  break;
5524 
5525  case GA_NODE_ELT_K:
5526  GMM_ASSERT1(!function_case,
5527  "No use of element_K is allowed in functions");
5528  pgai = std::make_shared<ga_instruction_element_K>(pnode->tensor(),
5529  gis.ctx);
5530  rmi.instructions.push_back(std::move(pgai));
5531  break;
5532 
5533  case GA_NODE_ELT_B:
5534  GMM_ASSERT1(!function_case,
5535  "No use of element_B is allowed in functions");
5536  pgai = std::make_shared<ga_instruction_element_B>(pnode->tensor(),
5537  gis.ctx);
5538  rmi.instructions.push_back(std::move(pgai));
5539  break;
5540 
5541  case GA_NODE_NORMAL:
5542  {
5543  GMM_ASSERT1(!function_case,
5544  "No use of Normal is allowed in functions");
5545  if (pnode->tensor().size() != m.dim())
5546  pnode->init_vector_tensor(m.dim());
5547  const mesh_im_level_set *mimls
5548  = dynamic_cast<const mesh_im_level_set *>(rmi.im);
5549  if (mimls && mimls->location()==mesh_im_level_set::INTEGRATE_BOUNDARY) {
5550  // Appel avec ctx (pt de Gauss)
5551  pgai = std::make_shared<ga_instruction_level_set_normal_vector>
5552  (pnode->tensor(), mimls, gis.ctx);
5553  rmi.instructions.push_back(std::move(pgai));
5554  } else {
5555  pgai = std::make_shared<ga_instruction_copy_Normal>
5556  (pnode->tensor(), gis.Normal);
5557  rmi.instructions.push_back(std::move(pgai));
5558  }
5559  }
5560  break;
5561 
5562  case GA_NODE_INTERPOLATE_X:
5563  case GA_NODE_INTERPOLATE_NORMAL:
5564  GMM_ASSERT1(!function_case,
5565  "No use of Interpolate is allowed in functions");
5566  if (pnode->tensor().size() != m.dim())
5567  pnode->init_vector_tensor(m.dim());
5568  if (pnode->node_type == GA_NODE_INTERPOLATE_X)
5569  pgai = std::make_shared<ga_instruction_copy_interpolated_small_vect>
5570  (pnode->tensor(),
5571  rmi.interpolate_infos[pnode->interpolate_name].pt_y,
5572  rmi.interpolate_infos[pnode->interpolate_name]);
5573  else if (pnode->node_type == GA_NODE_INTERPOLATE_NORMAL)
5574  pgai = std::make_shared<ga_instruction_copy_Normal>
5575  (pnode->tensor(),
5576  rmi.interpolate_infos[pnode->interpolate_name].Normal);
5577  rmi.instructions.push_back(std::move(pgai));
5578  break;
5579 
5580  case GA_NODE_INTERPOLATE_ELT_K:
5581  case GA_NODE_INTERPOLATE_ELT_B:
5582  GMM_ASSERT1(!function_case,
5583  "No use of Interpolate is allowed in functions");
5584  if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_K)
5585  pgai = std::make_shared<ga_instruction_element_K>
5586  (pnode->tensor(),
5587  rmi.interpolate_infos[pnode->interpolate_name].ctx);
5588  else if (pnode->node_type == GA_NODE_INTERPOLATE_ELT_B)
5589  pgai = std::make_shared<ga_instruction_element_B>
5590  (pnode->tensor(),
5591  rmi.interpolate_infos[pnode->interpolate_name].ctx);
5592  rmi.instructions.push_back(std::move(pgai));
5593  break;
5594 
5595  case GA_NODE_SECONDARY_DOMAIN_X:
5596  case GA_NODE_SECONDARY_DOMAIN_NORMAL:
5597  {
5598  GMM_ASSERT1(!function_case,
5599  "No use of Secondary_domain is allowed in functions");
5600  auto psd = workspace.secondary_domain(pnode->interpolate_name);
5601  size_type sddim = psd->mim().linked_mesh().dim();
5602  if (pnode->tensor().size() != sddim)
5603  pnode->init_vector_tensor(sddim);
5604  if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_X)
5605  pgai = std::make_shared<ga_instruction_X>
5606  (pnode->tensor(), rmi.secondary_domain_infos.ctx);
5607  else if (pnode->node_type == GA_NODE_SECONDARY_DOMAIN_NORMAL)
5608  pgai = std::make_shared<ga_instruction_copy_Normal>
5609  (pnode->tensor(), rmi.secondary_domain_infos.Normal);
5610  rmi.instructions.push_back(std::move(pgai));
5611  }
5612  break;
5613 
5614  case GA_NODE_VAL: case GA_NODE_GRAD:
5615  case GA_NODE_HESS: case GA_NODE_DIVERG:
5616  case GA_NODE_ELEMENTARY_VAL: case GA_NODE_ELEMENTARY_GRAD:
5617  case GA_NODE_ELEMENTARY_HESS: case GA_NODE_ELEMENTARY_DIVERG:
5618  case GA_NODE_XFEM_PLUS_VAL: case GA_NODE_XFEM_PLUS_GRAD:
5619  case GA_NODE_XFEM_PLUS_HESS: case GA_NODE_XFEM_PLUS_DIVERG:
5620  case GA_NODE_XFEM_MINUS_VAL: case GA_NODE_XFEM_MINUS_GRAD:
5621  case GA_NODE_XFEM_MINUS_HESS: case GA_NODE_XFEM_MINUS_DIVERG:
5622  {
5623  bool is_elementary = (pnode->node_type == GA_NODE_ELEMENTARY_VAL ||
5624  pnode->node_type == GA_NODE_ELEMENTARY_GRAD ||
5625  pnode->node_type == GA_NODE_ELEMENTARY_HESS ||
5626  pnode->node_type == GA_NODE_ELEMENTARY_DIVERG);
5627  if (function_case) {
5628  GMM_ASSERT1(!is_elementary,
5629  "No elementary transformation is allowed in functions");
5630  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_PLUS_VAL &&
5631  pnode->node_type != GA_NODE_XFEM_PLUS_GRAD &&
5632  pnode->node_type != GA_NODE_XFEM_PLUS_HESS &&
5633  pnode->node_type != GA_NODE_XFEM_PLUS_DIVERG,
5634  "Xfem_plus not allowed in functions");
5635  GMM_ASSERT1(pnode->node_type != GA_NODE_XFEM_MINUS_VAL &&
5636  pnode->node_type != GA_NODE_XFEM_MINUS_GRAD &&
5637  pnode->node_type != GA_NODE_XFEM_MINUS_HESS &&
5638  pnode->node_type != GA_NODE_XFEM_MINUS_DIVERG,
5639  "Xfem_plus not allowed in functions");
5640  const mesh_fem *mf = workspace.associated_mf(pnode->name);
5641  const im_data *imd = workspace.associated_im_data(pnode->name);
5642  GMM_ASSERT1(!mf, "No fem expression is allowed in "
5643  "function expression");
5644  GMM_ASSERT1(!imd, "No integration method data is allowed in "
5645  "function expression");
5646  if (gmm::vect_size(workspace.value(pnode->name)) == 1)
5647  pgai = std::make_shared<ga_instruction_copy_scalar>
5648  (pnode->tensor()[0], (workspace.value(pnode->name))[0]);
5649  else
5650  pgai = std::make_shared<ga_instruction_copy_vect>
5651  (pnode->tensor().as_vector(), workspace.value(pnode->name));
5652  rmi.instructions.push_back(std::move(pgai));
5653  } else {
5654  const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
5655  const im_data *imd = workspace.associated_im_data(pnode->name);
5656 
5657  if (is_elementary) {
5658  mf = workspace.associated_mf(pnode->elementary_target);
5659  GMM_ASSERT1(mf && mfo,
5660  "Wrong context for elementary transformation");
5661  GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
5662  "The finite element of variable " << pnode->name
5663  << " has to be defined on the same mesh as the "
5664  << "integration method or interpolation used");
5665  }
5666 
5667  if (imd) {
5668  GMM_ASSERT1(pnode->node_type == GA_NODE_VAL,
5669  "Only values can be extracted on im_data (no " <<
5670  "gradient, Hessian, xfem or elementary tranformation" <<
5671  " allowed)");
5672  pgai = std::make_shared<ga_instruction_extract_local_im_data>
5673  (pnode->tensor(), *imd, workspace.value(pnode->name),
5674  gis.pai, gis.ctx, workspace.qdim(pnode->name));
5675  rmi.instructions.push_back(std::move(pgai));
5676  } else {
5677  GMM_ASSERT1(mf, "Internal error");
5678 
5679  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
5680  "The finite element of variable " <<
5681  (is_elementary ? pnode->elementary_target : pnode->name)
5682  << " has to be defined on the same mesh as the "
5683  << "integration method or interpolation used");
5684 
5685  // An instruction for extracting local dofs of the variable.
5686  if (rmi.local_dofs.count(pnode->name) == 0) {
5687  rmi.local_dofs[pnode->name] = base_vector(1);
5688  extend_variable_in_gis(workspace, pnode->name, gis);
5689  // cout << "local dof of " << pnode->name << endl;
5690  size_type qmult2 = mfo->get_qdim();
5691  if (qmult2 > 1 && !(mfo->is_uniformly_vectorized()))
5692  qmult2 = size_type(-1);
5693  pgai = std::make_shared<ga_instruction_slice_local_dofs>
5694  (*mfo, *(gis.extended_vars[pnode->name]), gis.ctx,
5695  rmi.local_dofs[pnode->name],
5696  workspace.qdim(pnode->name) / mfo->get_qdim(), qmult2);
5697  rmi.elt_instructions.push_back(std::move(pgai));
5698  }
5699 
5700  // An instruction for pfp update
5701  if (mf->is_uniform()) {
5702  if (rmi.pfps.count(mf) == 0) {
5703  rmi.pfps[mf] = 0;
5704  pgai = std::make_shared<ga_instruction_update_pfp>
5705  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5706  rmi.begin_instructions.push_back(std::move(pgai));
5707  }
5708  } else if (rmi.pfps.count(mf) == 0 ||
5709  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
5710  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
5711  rmi.pfps[mf] = 0;
5712  pgai = std::make_shared<ga_instruction_update_pfp>
5713  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
5714  rmi.instructions.push_back(std::move(pgai));
5715  }
5716 
5717  // An instruction for the base value
5718  pgai = pga_instruction();
5719  switch (pnode->node_type) {
5720  case GA_NODE_VAL: case GA_NODE_ELEMENTARY_VAL:
5721  if (rmi.base.count(mf) == 0 ||
5722  !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
5723  rmi.base_hierarchy[mf].push_back(if_hierarchy);
5724  pgai = std::make_shared<ga_instruction_val_base>
5725  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5726  }
5727  break;
5728  case GA_NODE_XFEM_PLUS_VAL:
5729  if (rmi.xfem_plus_base.count(mf) == 0 ||
5730  !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
5731  {
5732  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
5733  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
5734  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5735  }
5736  break;
5737  case GA_NODE_XFEM_MINUS_VAL:
5738  if (rmi.xfem_minus_base.count(mf) == 0 ||
5739  !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
5740  {
5741  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
5742  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
5743  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
5744  }
5745  break;
5746  case GA_NODE_GRAD: case GA_NODE_DIVERG:
5747  case GA_NODE_ELEMENTARY_GRAD: case GA_NODE_ELEMENTARY_DIVERG:
5748  if (rmi.grad.count(mf) == 0 ||
5749  !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
5750  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
5751  pgai = std::make_shared<ga_instruction_grad_base>
5752  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5753  }
5754  break;
5755  case GA_NODE_XFEM_PLUS_GRAD: case GA_NODE_XFEM_PLUS_DIVERG:
5756  if (rmi.xfem_plus_grad.count(mf) == 0 ||
5757  !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
5758  {
5759  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
5760  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
5761  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5762  }
5763  break;
5764  case GA_NODE_XFEM_MINUS_GRAD: case GA_NODE_XFEM_MINUS_DIVERG:
5765  if (rmi.xfem_minus_grad.count(mf) == 0 ||
5766  !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
5767  {
5768  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
5769  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
5770  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
5771  }
5772  break;
5773  case GA_NODE_HESS: case GA_NODE_ELEMENTARY_HESS:
5774  if (rmi.hess.count(mf) == 0 ||
5775  !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
5776  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
5777  pgai = std::make_shared<ga_instruction_hess_base>
5778  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5779  }
5780  break;
5781  case GA_NODE_XFEM_PLUS_HESS:
5782  if (rmi.xfem_plus_hess.count(mf) == 0 ||
5783  !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
5784  {
5785  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
5786  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
5787  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5788  }
5789  break;
5790  case GA_NODE_XFEM_MINUS_HESS:
5791  if (rmi.xfem_minus_hess.count(mf) == 0 ||
5792  !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
5793  {
5794  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
5795  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
5796  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
5797  }
5798  break;
5799 
5800  default : GMM_ASSERT1(false, "Internal error");
5801  }
5802  if (pgai) rmi.instructions.push_back(std::move(pgai));
5803 
5804  // The eval instruction
5805  switch (pnode->node_type) {
5806  case GA_NODE_VAL: // --> t(target_dim*Qmult)
5807  pgai = std::make_shared<ga_instruction_val>
5808  (pnode->tensor(), rmi.base[mf], rmi.local_dofs[pnode->name],
5809  workspace.qdim(pnode->name));
5810  break;
5811  case GA_NODE_GRAD: // --> t(target_dim*Qmult,N)
5812  pgai = std::make_shared<ga_instruction_grad>
5813  (pnode->tensor(), rmi.grad[mf],
5814  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5815  break;
5816  case GA_NODE_HESS: // --> t(target_dim*Qmult,N,N)
5817  pgai = std::make_shared<ga_instruction_hess>
5818  (pnode->tensor(), rmi.hess[mf],
5819  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5820  break;
5821  case GA_NODE_DIVERG: // --> t(1)
5822  pgai = std::make_shared<ga_instruction_diverg>
5823  (pnode->tensor(), rmi.grad[mf],
5824  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5825  break;
5826  case GA_NODE_XFEM_PLUS_VAL: // --> t(target_dim*Qmult)
5827  pgai = std::make_shared<ga_instruction_val>
5828  (pnode->tensor(), rmi.xfem_plus_base[mf],
5829  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5830  break;
5831  case GA_NODE_XFEM_PLUS_GRAD: // --> t(target_dim*Qmult,N)
5832  pgai = std::make_shared<ga_instruction_grad>
5833  (pnode->tensor(), rmi.xfem_plus_grad[mf],
5834  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5835  break;
5836  case GA_NODE_XFEM_PLUS_HESS: // --> t(target_dim*Qmult,N,N)
5837  pgai = std::make_shared<ga_instruction_hess>
5838  (pnode->tensor(), rmi.xfem_plus_hess[mf],
5839  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5840  break;
5841  case GA_NODE_XFEM_PLUS_DIVERG: // --> t(1)
5842  pgai = std::make_shared<ga_instruction_diverg>
5843  (pnode->tensor(), rmi.xfem_plus_grad[mf],
5844  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5845  break;
5846  case GA_NODE_XFEM_MINUS_VAL: // --> t(target_dim*Qmult)
5847  pgai = std::make_shared<ga_instruction_val>
5848  (pnode->tensor(), rmi.xfem_minus_base[mf],
5849  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5850  break;
5851  case GA_NODE_XFEM_MINUS_GRAD: // --> t(target_dim*Qmult,N)
5852  pgai = std::make_shared<ga_instruction_grad>
5853  (pnode->tensor(), rmi.xfem_minus_grad[mf],
5854  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5855  break;
5856  case GA_NODE_XFEM_MINUS_HESS: // --> t(target_dim*Qmult,N,N)
5857  pgai = std::make_shared<ga_instruction_hess>
5858  (pnode->tensor(), rmi.xfem_minus_hess[mf],
5859  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5860  break;
5861  case GA_NODE_XFEM_MINUS_DIVERG: // --> t(1)
5862  pgai = std::make_shared<ga_instruction_diverg>
5863  (pnode->tensor(), rmi.xfem_minus_grad[mf],
5864  rmi.local_dofs[pnode->name], workspace.qdim(pnode->name));
5865  break;
5866  case GA_NODE_ELEMENTARY_VAL:
5867  { // --> t(target_dim*Qmult)
5868  ga_instruction_set::elementary_trans_info &eti
5869  = rmi.elementary_trans_infos
5870  [std::make_tuple(pnode->elementary_name, mfo, mf)];
5871  pgai =
5872  std::make_shared<ga_instruction_elementary_trans_val>
5873  (pnode->tensor(), rmi.base[mf],
5874  rmi.local_dofs[pnode->name],
5875  workspace.qdim(pnode->elementary_target),
5876  workspace.elementary_transformation(pnode->elementary_name),
5877  *mfo, *mf, gis.ctx, eti.M, eti.icv);
5878  }
5879  break;
5880  case GA_NODE_ELEMENTARY_GRAD:
5881  { // --> t(target_dim*Qmult,N)
5882  ga_instruction_set::elementary_trans_info &eti
5883  = rmi.elementary_trans_infos
5884  [std::make_tuple(pnode->elementary_name, mfo, mf)];
5885  pgai =
5886  std::make_shared<ga_instruction_elementary_trans_grad>
5887  (pnode->tensor(), rmi.grad[mf],
5888  rmi.local_dofs[pnode->name],
5889  workspace.qdim(pnode->elementary_target),
5890  workspace.elementary_transformation(pnode->elementary_name),
5891  *mfo, *mf, gis.ctx, eti.M, eti.icv);
5892  }
5893  break;
5894  case GA_NODE_ELEMENTARY_HESS:
5895  { // --> t(target_dim*Qmult,N,N)
5896  ga_instruction_set::elementary_trans_info &eti
5897  = rmi.elementary_trans_infos
5898  [std::make_tuple(pnode->elementary_name, mfo, mf)];
5899  pgai =
5900  std::make_shared<ga_instruction_elementary_trans_hess>
5901  (pnode->tensor(), rmi.hess[mf],
5902  rmi.local_dofs[pnode->name],
5903  workspace.qdim(pnode->elementary_target),
5904  workspace.elementary_transformation(pnode->elementary_name),
5905  *mfo, *mf, gis.ctx, eti.M, eti.icv);
5906  }
5907  break;
5908  case GA_NODE_ELEMENTARY_DIVERG:
5909  { // --> t(1)
5910  ga_instruction_set::elementary_trans_info &eti
5911  = rmi.elementary_trans_infos
5912  [std::make_tuple(pnode->elementary_name, mfo, mf)];
5913  pgai =
5914  std::make_shared<ga_instruction_elementary_trans_diverg>
5915  (pnode->tensor(), rmi.grad[mf],
5916  rmi.local_dofs[pnode->name],
5917  workspace.qdim(pnode->elementary_target),
5918  workspace.elementary_transformation(pnode->elementary_name),
5919  *mfo, *mf, gis.ctx, eti.M, eti.icv);
5920  }
5921  break;
5922  default: break;
5923  }
5924  rmi.instructions.push_back(std::move(pgai));
5925  }
5926  }
5927  }
5928  break;
5929 
5930  case GA_NODE_SECONDARY_DOMAIN_VAL: case GA_NODE_SECONDARY_DOMAIN_GRAD:
5931  case GA_NODE_SECONDARY_DOMAIN_HESS: case GA_NODE_SECONDARY_DOMAIN_DIVERG:
5932  {
5933  GMM_ASSERT1(!function_case, "internal error");
5934  const mesh_fem *mf = workspace.associated_mf(pnode->name);
5935  const im_data *imd = workspace.associated_im_data(pnode->name);
5936  const std::string &intn = pnode->interpolate_name;
5937  auto &sdi = rmi.secondary_domain_infos;
5938 
5939  fem_interpolation_context *pctx = &(sdi.ctx);
5940  papprox_integration pai = sdi.pai;
5941  psecondary_domain psd = workspace.secondary_domain(intn);
5942 
5943  if (imd) {
5944  pgai = std::make_shared<ga_instruction_extract_local_im_data>
5945  (pnode->tensor(), *imd, workspace.value(pnode->name),
5946  pai, *pctx, workspace.qdim(pnode->name));
5947  rmi.instructions.push_back(std::move(pgai));
5948  } else {
5949  GMM_ASSERT1(mf, "Internal error");
5950  GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
5951  "The finite element of variable " << pnode->name <<
5952  " has to be defined on the same mesh as the "
5953  "integration method or interpolation used on the "
5954  "secondary domain");
5955 
5956  // An instruction for extracting local dofs of the variable.
5957  if (sdi.local_dofs.count(pnode->name) == 0) {
5958  sdi.local_dofs[pnode->name] = base_vector(1);
5959  extend_variable_in_gis(workspace, pnode->name, gis);
5960  size_type qmult2 = mf->get_qdim();
5961  if (qmult2 > 1 && !(mf->is_uniformly_vectorized()))
5962  qmult2 = size_type(-1);
5963  pgai = std::make_shared<ga_instruction_slice_local_dofs>
5964  (*mf, *(gis.extended_vars[pnode->name]), *pctx,
5965  sdi.local_dofs[pnode->name],
5966  workspace.qdim(pnode->name) / mf->get_qdim(), qmult2);
5967  rmi.elt_instructions.push_back(std::move(pgai));
5968  }
5969 
5970  // An instruction for pfp update
5971  if (mf->is_uniform()) {
5972  if (sdi.pfps.count(mf) == 0) {
5973  sdi.pfps[mf] = 0;
5974  pgai = std::make_shared<ga_instruction_update_pfp>
5975  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
5976  rmi.begin_instructions.push_back(std::move(pgai));
5977  }
5978  } else if (sdi.pfps.count(mf) == 0 ||
5979  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
5980  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
5981  sdi.pfps[mf] = 0;
5982  pgai = std::make_shared<ga_instruction_update_pfp>
5983  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
5984  rmi.instructions.push_back(std::move(pgai));
5985  }
5986 
5987  // An instruction for the base value
5988  pgai = pga_instruction();
5989  switch (pnode->node_type) {
5990  case GA_NODE_SECONDARY_DOMAIN_VAL:
5991  if (sdi.base.count(mf) == 0 ||
5992  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
5993  rmi.base_hierarchy[mf].push_back(if_hierarchy);
5994  pgai = std::make_shared<ga_instruction_val_base>
5995  (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
5996  }
5997  break;
5998  case GA_NODE_SECONDARY_DOMAIN_GRAD:
5999  case GA_NODE_SECONDARY_DOMAIN_DIVERG:
6000  if (sdi.grad.count(mf) == 0 ||
6001  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6002  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6003  pgai = std::make_shared<ga_instruction_grad_base>
6004  (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6005  }
6006  break;
6007  case GA_NODE_SECONDARY_DOMAIN_HESS:
6008  if (sdi.hess.count(mf) == 0 ||
6009  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6010  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6011  pgai = std::make_shared<ga_instruction_hess_base>
6012  (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6013  }
6014  break;
6015  default : GMM_ASSERT1(false, "Internal error");
6016  }
6017  if (pgai) rmi.instructions.push_back(std::move(pgai));
6018 
6019  // The eval instruction
6020  switch (pnode->node_type) {
6021  case GA_NODE_SECONDARY_DOMAIN_VAL: // --> t(target_dim*Qmult)
6022  pgai = std::make_shared<ga_instruction_val>
6023  (pnode->tensor(), sdi.base[mf], sdi.local_dofs[pnode->name],
6024  workspace.qdim(pnode->name));
6025  break;
6026  case GA_NODE_SECONDARY_DOMAIN_GRAD: // --> t(target_dim*Qmult,N)
6027  pgai = std::make_shared<ga_instruction_grad>
6028  (pnode->tensor(), sdi.grad[mf],
6029  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6030  break;
6031  case GA_NODE_SECONDARY_DOMAIN_HESS: // --> t(target_dim*Qmult,N,N)
6032  pgai = std::make_shared<ga_instruction_hess>
6033  (pnode->tensor(), sdi.hess[mf],
6034  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6035  break;
6036  case GA_NODE_SECONDARY_DOMAIN_DIVERG: // --> t(1)
6037  pgai = std::make_shared<ga_instruction_diverg>
6038  (pnode->tensor(), sdi.grad[mf],
6039  sdi.local_dofs[pnode->name], workspace.qdim(pnode->name));
6040  break;
6041  default: break;
6042  }
6043  rmi.instructions.push_back(std::move(pgai));
6044  }
6045  }
6046  break;
6047 
6048  case GA_NODE_INTERPOLATE_VAL: case GA_NODE_INTERPOLATE_GRAD:
6049  case GA_NODE_INTERPOLATE_HESS: case GA_NODE_INTERPOLATE_DIVERG:
6050  {
6051  extend_variable_in_gis(workspace, pnode->name, gis);
6052 
6053  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6054  const std::string &intn = pnode->interpolate_name;
6055  const base_vector *Un = gis.extended_vars[pnode->name], **Ug = 0;
6056  fem_interpolation_context *pctx = &(rmi.interpolate_infos[intn].ctx);
6057  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6058  if (workspace.variable_group_exists(pnode->name)) {
6059  ga_instruction_set::variable_group_info &vgi =
6060  rmi.interpolate_infos[intn].groups_info[pnode->name];
6061  mfg = &(vgi.mf); mfn = 0; Ug = &(vgi.U); Un = 0;
6062  }
6063 
6064  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL) {
6065  // --> t(target_dim*Qmult)
6066  pgai = std::make_shared<ga_instruction_interpolate_val>
6067  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6068  workspace.qdim(pnode->name),
6069  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6070  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD) {
6071  // --> t(target_dim*Qmult,N)
6072  pgai = std::make_shared<ga_instruction_interpolate_grad>
6073  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6074  workspace.qdim(pnode->name),
6075  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6076  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS) {
6077  // --> t(target_dim*Qmult,N,N)
6078  pgai = std::make_shared<ga_instruction_interpolate_hess>
6079  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6080  workspace.qdim(pnode->name),
6081  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6082  } else { // --> t(1)
6083  pgai = std::make_shared<ga_instruction_interpolate_diverg>
6084  (pnode->tensor(), m2, mfn, mfg, Un, Ug, *pctx,
6085  workspace.qdim(pnode->name),
6086  gis.ipt, gis.fp_pool, rmi.interpolate_infos[intn]);
6087  }
6088  rmi.instructions.push_back(std::move(pgai));
6089  }
6090  break;
6091 
6092  case GA_NODE_INTERPOLATE_DERIVATIVE:
6093  GMM_ASSERT1(!function_case,
6094  "No use of Interpolate is allowed in functions");
6095  pgai = std::make_shared<ga_instruction_copy_tensor_possibly_void>
6096  (pnode->tensor(),
6097  rmi.interpolate_infos[pnode->interpolate_name_der]
6098  .derivatives[var_trans_pair(pnode->name, pnode->interpolate_name)]);
6099  rmi.instructions.push_back(std::move(pgai));
6100  break;
6101 
6102  case GA_NODE_VAL_TEST: case GA_NODE_GRAD_TEST:
6103  case GA_NODE_HESS_TEST: case GA_NODE_DIVERG_TEST:
6104  case GA_NODE_ELEMENTARY_VAL_TEST: case GA_NODE_ELEMENTARY_GRAD_TEST:
6105  case GA_NODE_ELEMENTARY_HESS_TEST: case GA_NODE_ELEMENTARY_DIVERG_TEST:
6106  case GA_NODE_XFEM_PLUS_VAL_TEST: case GA_NODE_XFEM_PLUS_GRAD_TEST:
6107  case GA_NODE_XFEM_PLUS_HESS_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6108  case GA_NODE_XFEM_MINUS_VAL_TEST: case GA_NODE_XFEM_MINUS_GRAD_TEST:
6109  case GA_NODE_XFEM_MINUS_HESS_TEST: case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6110  // GMM_ASSERT1(!function_case,
6111  // "Test functions not allowed in functions");
6112  {
6113  bool is_elementary = (pnode->node_type==GA_NODE_ELEMENTARY_VAL_TEST ||
6114  pnode->node_type==GA_NODE_ELEMENTARY_GRAD_TEST ||
6115  pnode->node_type==GA_NODE_ELEMENTARY_HESS_TEST ||
6116  pnode->node_type==GA_NODE_ELEMENTARY_DIVERG_TEST);
6117  const mesh_fem *mf = workspace.associated_mf(pnode->name), *mfo=mf;
6118  if (is_elementary) {
6119  mf = workspace.associated_mf(pnode->elementary_target);
6120  GMM_ASSERT1(mf && mfo,
6121  "Wrong context for elementary transformation");
6122  GMM_ASSERT1(&(mfo->linked_mesh()) == &(m),
6123  "The finite element of variable " << pnode->name
6124  << " has to be defined on the same mesh as the "
6125  << "integration method or interpolation used");
6126  }
6127 
6128  if (mf) {
6129  GMM_ASSERT1(&(mf->linked_mesh()) == &(m),
6130  "The finite element of variable " <<
6131  (is_elementary ? pnode->elementary_target : pnode->name)
6132  << " and the applied integration method have to be"
6133  << " defined on the same mesh");
6134 
6135  // An instruction for pfp update
6136  if (is_uniform) {
6137  if (rmi.pfps.count(mf) == 0) {
6138  rmi.pfps[mf] = 0;
6139  pgai = std::make_shared<ga_instruction_update_pfp>
6140  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6141  rmi.begin_instructions.push_back(std::move(pgai));
6142  }
6143  } else if (rmi.pfps.count(mf) == 0 ||
6144  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6145  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6146  rmi.pfps[mf] = 0;
6147  pgai = std::make_shared<ga_instruction_update_pfp>
6148  (*mf, rmi.pfps[mf], gis.ctx, gis.fp_pool);
6149  rmi.instructions.push_back(std::move(pgai));
6150  }
6151 
6152  // An instruction for the base value
6153  pgai = pga_instruction();
6154  switch (pnode->node_type) {
6155  case GA_NODE_VAL_TEST: case GA_NODE_ELEMENTARY_VAL_TEST:
6156  if (rmi.base.count(mf) == 0 ||
6157  !if_hierarchy.is_compatible(rmi.base_hierarchy[mf])) {
6158  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6159  pgai = std::make_shared<ga_instruction_val_base>
6160  (rmi.base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6161  }
6162  break;
6163  case GA_NODE_XFEM_PLUS_VAL_TEST:
6164  if (rmi.xfem_plus_base.count(mf) == 0 ||
6165  !if_hierarchy.is_compatible(rmi.xfem_plus_base_hierarchy[mf]))
6166  {
6167  rmi.xfem_plus_base_hierarchy[mf].push_back(if_hierarchy);
6168  pgai = std::make_shared<ga_instruction_xfem_plus_val_base>
6169  (rmi.xfem_plus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6170  }
6171  break;
6172  case GA_NODE_XFEM_MINUS_VAL_TEST:
6173  if (rmi.xfem_minus_base.count(mf) == 0 ||
6174  !if_hierarchy.is_compatible(rmi.xfem_minus_base_hierarchy[mf]))
6175  {
6176  rmi.xfem_minus_base_hierarchy[mf].push_back(if_hierarchy);
6177  pgai = std::make_shared<ga_instruction_xfem_minus_val_base>
6178  (rmi.xfem_minus_base[mf], gis.ctx, *mf, rmi.pfps[mf]);
6179  }
6180  break;
6181  case GA_NODE_GRAD_TEST: case GA_NODE_DIVERG_TEST:
6182  case GA_NODE_ELEMENTARY_GRAD_TEST:
6183  case GA_NODE_ELEMENTARY_DIVERG_TEST:
6184  if (rmi.grad.count(mf) == 0 ||
6185  !if_hierarchy.is_compatible(rmi.grad_hierarchy[mf])) {
6186  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6187  pgai = std::make_shared<ga_instruction_grad_base>
6188  (rmi.grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6189  }
6190  break;
6191  case GA_NODE_XFEM_PLUS_GRAD_TEST: case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6192  if (rmi.xfem_plus_grad.count(mf) == 0 ||
6193  !if_hierarchy.is_compatible(rmi.xfem_plus_grad_hierarchy[mf]))
6194  {
6195  rmi.xfem_plus_grad_hierarchy[mf].push_back(if_hierarchy);
6196  pgai = std::make_shared<ga_instruction_xfem_plus_grad_base>
6197  (rmi.xfem_plus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6198  }
6199  break;
6200  case GA_NODE_XFEM_MINUS_GRAD_TEST:
6201  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6202  if (rmi.xfem_minus_grad.count(mf) == 0 ||
6203  !if_hierarchy.is_compatible(rmi.xfem_minus_grad_hierarchy[mf]))
6204  {
6205  rmi.xfem_minus_grad_hierarchy[mf].push_back(if_hierarchy);
6206  pgai = std::make_shared<ga_instruction_xfem_minus_grad_base>
6207  (rmi.xfem_minus_grad[mf], gis.ctx, *mf, rmi.pfps[mf]);
6208  }
6209  break;
6210  case GA_NODE_HESS_TEST: case GA_NODE_ELEMENTARY_HESS_TEST:
6211  if (rmi.hess.count(mf) == 0 ||
6212  !if_hierarchy.is_compatible(rmi.hess_hierarchy[mf])) {
6213  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6214  pgai = std::make_shared<ga_instruction_hess_base>
6215  (rmi.hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6216  }
6217  break;
6218  case GA_NODE_XFEM_PLUS_HESS_TEST:
6219  if (rmi.xfem_plus_hess.count(mf) == 0 ||
6220  !if_hierarchy.is_compatible(rmi.xfem_plus_hess_hierarchy[mf]))
6221  {
6222  rmi.xfem_plus_hess_hierarchy[mf].push_back(if_hierarchy);
6223  pgai = std::make_shared<ga_instruction_xfem_plus_hess_base>
6224  (rmi.xfem_plus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6225  }
6226  break;
6227  case GA_NODE_XFEM_MINUS_HESS_TEST:
6228  if (rmi.xfem_minus_hess.count(mf) == 0 ||
6229  !if_hierarchy.is_compatible(rmi.xfem_minus_hess_hierarchy[mf]))
6230  {
6231  rmi.xfem_minus_hess_hierarchy[mf].push_back(if_hierarchy);
6232  pgai = std::make_shared<ga_instruction_xfem_minus_hess_base>
6233  (rmi.xfem_minus_hess[mf], gis.ctx, *mf, rmi.pfps[mf]);
6234  }
6235  break;
6236 
6237  default : GMM_ASSERT1(false, "Internal error");
6238  }
6239  if (pgai) rmi.instructions.push_back(std::move(pgai));
6240 
6241  // The copy of the real_base_value
6242  switch(pnode->node_type) {
6243  case GA_NODE_VAL_TEST:
6244  // --> t(Qmult*ndof,Qmult*target_dim)
6245  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6246  pnode->t.set_sparsity(1, mf->get_qdim());
6247  tensor_to_clear = true;
6248  pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6249  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6250  } else {
6251  pgai = std::make_shared<ga_instruction_copy_val_base>
6252  (pnode->tensor(), rmi.base[mf], mf->get_qdim());
6253  }
6254  break;
6255  case GA_NODE_GRAD_TEST:
6256  // --> t(Qmult*ndof,Qmult*target_dim,N)
6257  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6258  pnode->t.set_sparsity(2, mf->get_qdim());
6259  tensor_to_clear = true;
6260  pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6261  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6262  } else {
6263  pgai = std::make_shared<ga_instruction_copy_grad_base>
6264  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6265  }
6266  break;
6267  case GA_NODE_HESS_TEST:
6268  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6269  pgai = std::make_shared<ga_instruction_copy_hess_base>
6270  (pnode->tensor(), rmi.hess[mf], mf->get_qdim());
6271  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6272  pnode->t.set_sparsity(3, mf->get_qdim());
6273  break;
6274  case GA_NODE_DIVERG_TEST:
6275  // --> t(Qmult*ndof)
6276  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6277  (pnode->tensor(), rmi.grad[mf], mf->get_qdim());
6278  break;
6279  case GA_NODE_XFEM_PLUS_VAL_TEST:
6280  // -->t(Qmult*ndof,Qmult*target_dim)
6281  pgai = std::make_shared<ga_instruction_copy_val_base>
6282  (pnode->tensor(), rmi.xfem_plus_base[mf], mf->get_qdim());
6283  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6284  pnode->t.set_sparsity(1, mf->get_qdim());
6285  break;
6286  case GA_NODE_XFEM_PLUS_GRAD_TEST:
6287  // --> t(Qmult*ndof,Qmult*target_dim,N)
6288  pgai = std::make_shared<ga_instruction_copy_grad_base>
6289  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6290  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6291  pnode->t.set_sparsity(2, mf->get_qdim());
6292  break;
6293  case GA_NODE_XFEM_PLUS_HESS_TEST:
6294  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6295  pgai = std::make_shared<ga_instruction_copy_hess_base>
6296  (pnode->tensor(), rmi.xfem_plus_hess[mf], mf->get_qdim());
6297  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6298  pnode->t.set_sparsity(3, mf->get_qdim());
6299  break;
6300  case GA_NODE_XFEM_PLUS_DIVERG_TEST:
6301  // --> t(Qmult*ndof)
6302  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6303  (pnode->tensor(), rmi.xfem_plus_grad[mf], mf->get_qdim());
6304  break;
6305  case GA_NODE_XFEM_MINUS_VAL_TEST:
6306  // -->t(Qmult*ndof,Qmult*target_dim)
6307  pgai = std::make_shared<ga_instruction_copy_val_base>
6308  (pnode->tensor(), rmi.xfem_minus_base[mf], mf->get_qdim());
6309  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6310  pnode->t.set_sparsity(1, mf->get_qdim());
6311  break;
6312  case GA_NODE_XFEM_MINUS_GRAD_TEST:
6313  // --> t(Qmult*ndof,Qmult*target_dim,N)
6314  pgai = std::make_shared<ga_instruction_copy_grad_base>
6315  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6316  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6317  pnode->t.set_sparsity(2, mf->get_qdim());
6318  break;
6319  case GA_NODE_XFEM_MINUS_HESS_TEST:
6320  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6321  pgai = std::make_shared<ga_instruction_copy_hess_base>
6322  (pnode->tensor(), rmi.xfem_minus_hess[mf], mf->get_qdim());
6323  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6324  pnode->t.set_sparsity(3, mf->get_qdim());
6325  break;
6326  case GA_NODE_XFEM_MINUS_DIVERG_TEST:
6327  // --> t(Qmult*ndof)
6328  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6329  (pnode->tensor(), rmi.xfem_minus_grad[mf], mf->get_qdim());
6330  break;
6331  case GA_NODE_ELEMENTARY_VAL_TEST:
6332  { // --> t(Qmult*ndof,Qmult*target_dim)
6333  ga_instruction_set::elementary_trans_info &eti
6334  = rmi.elementary_trans_infos
6335  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6336  pgai =
6337  std::make_shared<ga_instruction_elementary_trans_val_base>
6338  (pnode->tensor(), rmi.base[mf], mf->get_qdim(),
6339  workspace.elementary_transformation(pnode->elementary_name),
6340  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6341  }
6342  break;
6343  case GA_NODE_ELEMENTARY_GRAD_TEST:
6344  { // --> t(Qmult*ndof,Qmult*target_dim,N)
6345  ga_instruction_set::elementary_trans_info &eti
6346  = rmi.elementary_trans_infos
6347  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6348  pgai =
6349  std::make_shared<ga_instruction_elementary_trans_grad_base>
6350  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6351  workspace.elementary_transformation(pnode->elementary_name),
6352  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6353  }
6354  break;
6355  case GA_NODE_ELEMENTARY_HESS_TEST:
6356  { // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6357  ga_instruction_set::elementary_trans_info &eti
6358  = rmi.elementary_trans_infos
6359  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6360  pgai =
6361  std::make_shared<ga_instruction_elementary_trans_hess_base>
6362  (pnode->tensor(), rmi.hess[mf], mf->get_qdim(),
6363  workspace.elementary_transformation(pnode->elementary_name),
6364  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6365  }
6366  break;
6367  case GA_NODE_ELEMENTARY_DIVERG_TEST:
6368  { // --> t(Qmult*ndof)
6369  ga_instruction_set::elementary_trans_info &eti
6370  = rmi.elementary_trans_infos
6371  [std::make_tuple(pnode->elementary_name, mfo, mf)];
6372  pgai =
6373  std::make_shared<ga_instruction_elementary_trans_diverg_base>
6374  (pnode->tensor(), rmi.grad[mf], mf->get_qdim(),
6375  workspace.elementary_transformation(pnode->elementary_name),
6376  *mfo, *mf, gis.ctx, eti.M, eti.icv);
6377  }
6378  break;
6379  default: break;
6380  }
6381  if (pgai) rmi.instructions.push_back(std::move(pgai));
6382  }
6383  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6384  }
6385  break;
6386 
6387  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6388  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6389  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6390  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6391  {
6392  GMM_ASSERT1(!function_case, "internal error");
6393  const mesh_fem *mf = workspace.associated_mf(pnode->name);
6394  const std::string &intn = pnode->interpolate_name;
6395  auto &sdi = rmi.secondary_domain_infos;
6396 
6397  fem_interpolation_context *pctx = &(sdi.ctx);
6398  papprox_integration pai = sdi.pai;
6399  psecondary_domain psd = workspace.secondary_domain(intn);
6400  if (mf) {
6401  GMM_ASSERT1(&(mf->linked_mesh()) == &(psd->mim().linked_mesh()),
6402  "The finite element of variable " << pnode->name <<
6403  " and the applied integration method have to be"
6404  " defined on the same mesh for secondary domain");
6405 
6406  // An instruction for pfp update
6407  if (is_uniform) {
6408  if (sdi.pfps.count(mf) == 0) {
6409  sdi.pfps[mf] = 0;
6410  pgai = std::make_shared<ga_instruction_update_pfp>
6411  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6412  rmi.begin_instructions.push_back(std::move(pgai));
6413  }
6414  } else if (sdi.pfps.count(mf) == 0 ||
6415  !if_hierarchy.is_compatible(rmi.pfp_hierarchy[mf])) {
6416  rmi.pfp_hierarchy[mf].push_back(if_hierarchy);
6417  sdi.pfps[mf] = 0;
6418  pgai = std::make_shared<ga_instruction_update_pfp>
6419  (*mf, sdi.pfps[mf], *pctx, gis.fp_pool);
6420  rmi.instructions.push_back(std::move(pgai));
6421  }
6422 
6423  // An instruction for the base value
6424  pgai = pga_instruction();
6425  switch (pnode->node_type) {
6426  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6427  if (sdi.base.count(mf) == 0 ||
6428  !(if_hierarchy.is_compatible(rmi.base_hierarchy[mf]))) {
6429  rmi.base_hierarchy[mf].push_back(if_hierarchy);
6430  pgai = std::make_shared<ga_instruction_val_base>
6431  (sdi.base[mf], *pctx, *mf, sdi.pfps[mf]);
6432  }
6433  break;
6434  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6435  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6436  if (sdi.grad.count(mf) == 0 ||
6437  !(if_hierarchy.is_compatible(rmi.grad_hierarchy[mf]))) {
6438  rmi.grad_hierarchy[mf].push_back(if_hierarchy);
6439  pgai = std::make_shared<ga_instruction_grad_base>
6440  (sdi.grad[mf], *pctx, *mf, sdi.pfps[mf]);
6441  }
6442  break;
6443  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6444  if (sdi.hess.count(mf) == 0 ||
6445  !(if_hierarchy.is_compatible(rmi.hess_hierarchy[mf]))) {
6446  rmi.hess_hierarchy[mf].push_back(if_hierarchy);
6447  pgai = std::make_shared<ga_instruction_hess_base>
6448  (sdi.hess[mf], *pctx, *mf, sdi.pfps[mf]);
6449  }
6450  break;
6451  default : GMM_ASSERT1(false, "Internal error");
6452  }
6453  if (pgai) rmi.instructions.push_back(std::move(pgai));
6454 
6455  // The copy of the real_base_value
6456  switch(pnode->node_type) {
6457  case GA_NODE_SECONDARY_DOMAIN_VAL_TEST:
6458  // --> t(Qmult*ndof,Qmult*target_dim)
6459  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6460  pnode->t.set_sparsity(1, mf->get_qdim());
6461  tensor_to_clear = true;
6462  pgai = std::make_shared<ga_instruction_copy_vect_val_base>
6463  (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6464  } else {
6465  pgai = std::make_shared<ga_instruction_copy_val_base>
6466  (pnode->tensor(), sdi.base[mf], mf->get_qdim());
6467  }
6468  break;
6469  case GA_NODE_SECONDARY_DOMAIN_GRAD_TEST:
6470  // --> t(Qmult*ndof,Qmult*target_dim,N)
6471  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized()) {
6472  pnode->t.set_sparsity(2, mf->get_qdim());
6473  tensor_to_clear = true;
6474  pgai = std::make_shared<ga_instruction_copy_vect_grad_base>
6475  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6476  } else {
6477  pgai = std::make_shared<ga_instruction_copy_grad_base>
6478  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6479  }
6480  break;
6481  case GA_NODE_SECONDARY_DOMAIN_HESS_TEST:
6482  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6483  pgai = std::make_shared<ga_instruction_copy_hess_base>
6484  (pnode->tensor(), sdi.hess[mf], mf->get_qdim());
6485  if (mf->get_qdim() > 1 && mf->is_uniformly_vectorized())
6486  pnode->t.set_sparsity(3, mf->get_qdim());
6487  break;
6488  case GA_NODE_SECONDARY_DOMAIN_DIVERG_TEST:
6489  // --> t(Qmult*ndof)
6490  pgai = std::make_shared<ga_instruction_copy_diverg_base>
6491  (pnode->tensor(), sdi.grad[mf], mf->get_qdim());
6492  break;
6493  default: break;
6494  }
6495  if (pgai) rmi.instructions.push_back(std::move(pgai));
6496  }
6497  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6498  }
6499  break;
6500 
6501  case GA_NODE_INTERPOLATE_VAL_TEST: case GA_NODE_INTERPOLATE_GRAD_TEST:
6502  case GA_NODE_INTERPOLATE_HESS_TEST: case GA_NODE_INTERPOLATE_DIVERG_TEST:
6503  {
6504  const mesh_fem *mfn = workspace.associated_mf(pnode->name), **mfg = 0;
6505  const std::string &intn = pnode->interpolate_name;
6506  const mesh **m2 = &(rmi.interpolate_infos[intn].m);
6507  if (workspace.variable_group_exists(pnode->name)) {
6508  ga_instruction_set::variable_group_info &vgi =
6509  rmi.interpolate_infos[intn].groups_info[pnode->name];
6510  mfg = &(vgi.mf); mfn = 0;
6511  }
6512 
6513  if (pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST) {
6514  // --> t(Qmult*ndof,Qmult*target_dim)
6515  pgai = std::make_shared<ga_instruction_interpolate_val_base>
6516  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6517  workspace.qdim(pnode->name), rmi.interpolate_infos[intn],
6518  gis.fp_pool);
6519  } else if (pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST) {
6520  // --> t(Qmult*ndof,Qmult*target_dim,N)
6521  pgai = std::make_shared<ga_instruction_interpolate_grad_base>
6522  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6523  workspace.qdim(pnode->name),
6524  rmi.interpolate_infos[intn], gis.fp_pool);
6525  } else if (pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST) {
6526  // --> t(Qmult*ndof,Qmult*target_dim,N,N)
6527  pgai = std::make_shared<ga_instruction_interpolate_hess_base>
6528  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6529  workspace.qdim(pnode->name),
6530  rmi.interpolate_infos[intn], gis.fp_pool);
6531  } else { // if (pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST) {
6532  // --> t(Qmult*ndof)
6533  pgai = std::make_shared<ga_instruction_interpolate_diverg_base>
6534  (pnode->tensor(), m2, mfn, mfg, gis.ipt,
6535  workspace.qdim(pnode->name),
6536  rmi.interpolate_infos[intn], gis.fp_pool);
6537  }
6538  rmi.instructions.push_back(std::move(pgai));
6539  workspace.add_temporary_interval_for_unreduced_variable(pnode->name);
6540  }
6541  break;
6542 
6543  case GA_NODE_OP:
6544  switch(pnode->op_type) {
6545 
6546  case GA_PLUS:
6547  if (pnode->tensor().size() == 1) {
6548  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
6549  "Internal error: child0 not scalar");
6550  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
6551  "Internal error: child1 not scalar");
6552  pgai = std::make_shared<ga_instruction_scalar_add>
6553  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6554  } else {
6555  pgai = std::make_shared<ga_instruction_add>
6556  (pnode->tensor(), child0->tensor(), child1->tensor());
6557  }
6558  if (child0->t.sparsity() == child1->t.sparsity()
6559  && child0->t.qdim() == child1->t.qdim())
6560  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6561  rmi.instructions.push_back(std::move(pgai));
6562  break;
6563 
6564  case GA_MINUS:
6565  if (pnode->tensor().size() == 1) {
6566  GA_DEBUG_ASSERT(child0->tensor().size() == 1,
6567  "Internal error: child0 not scalar");
6568  GA_DEBUG_ASSERT(child1->tensor().size() == 1,
6569  "Internal error: child1 not scalar");
6570  pgai = std::make_shared<ga_instruction_scalar_sub>
6571  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6572  } else {
6573  pgai = std::make_shared<ga_instruction_sub>
6574  (pnode->tensor(), child0->tensor(), child1->tensor());
6575  }
6576  if (child0->t.sparsity() == child1->t.sparsity()
6577  && child0->t.qdim() == child1->t.qdim())
6578  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6579  rmi.instructions.push_back(std::move(pgai));
6580  break;
6581 
6582  case GA_UNARY_MINUS:
6583  if (pnode->tensor().size() == 1) {
6584  GA_DEBUG_ASSERT(child0->tensor().size() == 1, "Internal error");
6585  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6586  (pnode->tensor()[0], child0->tensor()[0], minus);
6587  } else {
6588  pgai = std::make_shared<ga_instruction_scalar_mult>
6589  (pnode->tensor(), child0->tensor(), minus);
6590  }
6591  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6592  rmi.instructions.push_back(std::move(pgai));
6593  break;
6594 
6595 
6596  case GA_DOT: case GA_COLON: case GA_MULT:
6597  {
6598  size_type tps0 = child0->tensor_proper_size();
6599  size_type tps1 = child1->tensor_proper_size();
6600  size_type s1 = (tps0 * tps1) / pnode->tensor_proper_size();
6601  size_type s2 = size_type(round(sqrt(scalar_type(s1))));
6602 
6603  pgai = pga_instruction();
6604  if ((pnode->op_type == GA_DOT && dim1 <= 1) ||
6605  (pnode->op_type == GA_COLON && dim1 <= 2) ||
6606  (pnode->op_type == GA_MULT && dim0 == 4) ||
6607  (pnode->op_type == GA_MULT && dim1 <= 1) ||
6608  child0->tensor().size() == 1 || tps1 == 1) {
6609 
6610  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6611  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6612  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6613  }
6614  else if (child0->tensor().size() == 1) {
6615  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6616  pgai = std::make_shared<ga_instruction_scalar_mult>
6617  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6618  }
6619  else if (child1->tensor().size() == 1) {
6620  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6621  pgai = std::make_shared<ga_instruction_scalar_mult>
6622  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6623  }
6624  else if (pnode->test_function_type < 3) {
6625  if (tps0 == 1) {
6626  if (is_uniform) // Unrolled instruction
6627  pgai = ga_uniform_instruction_simple_tmult
6628  (pnode->tensor(), child0->tensor(), child1->tensor());
6629  else
6630  pgai = std::make_shared<ga_instruction_simple_tmult>
6631  (pnode->tensor(), child0->tensor(), child1->tensor());
6632  } else {
6633  if (tps1 == 1) {
6634  if (is_uniform) // Unrolled instruction
6635  pgai = ga_uniform_instruction_simple_tmult
6636  (pnode->tensor(), child1->tensor(), child0->tensor());
6637  else
6638  pgai = std::make_shared<ga_instruction_simple_tmult>
6639  (pnode->tensor(), child1->tensor(), child0->tensor());
6640  } else if (is_uniform) // Unrolled instruction
6641  pgai = ga_uniform_instruction_contraction_switch
6642  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6643  else // Unrolled instruction
6644  pgai = ga_instruction_contraction_switch
6645  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6646  }
6647  } else {
6648  if (child1->test_function_type == 1 ||
6649  child1->test_function_type == 3) {
6650  if (child1->test_function_type == 3 ||
6651  child1->tensor_proper_size() <= s2) {
6652  if (tps0 == 1) {
6653  if (is_uniform) { // Unrolled instruction
6654  pgai = ga_uniform_instruction_simple_tmult
6655  (pnode->tensor(), child1->tensor(), child0->tensor());
6656  } else
6657  pgai = std::make_shared<ga_instruction_simple_tmult>
6658  (pnode->tensor(), child1->tensor(), child0->tensor());
6659  } else if (is_uniform) // Unrolled instruction
6660  pgai = ga_uniform_instruction_contraction_switch
6661  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6662  else // Unrolled instruction
6663  pgai = ga_instruction_contraction_switch
6664  (pnode->t, child0->t, child1->t, s2, tensor_to_clear);
6665  } else
6666  pgai = std::make_shared<ga_instruction_spec_contraction>
6667  (pnode->tensor(), child1->tensor(), child0->tensor(), s2);
6668  } else if (child1->test_function_type == 0 ||
6669  (child0->tensor_proper_size() == s2 &&
6670  child1->tensor_proper_size() == s2)) {
6671  if (tps0 == 1) {
6672  if (is_uniform) { // Unrolled instruction
6673  pgai = ga_uniform_instruction_simple_tmult
6674  (pnode->tensor(), child0->tensor(), child1->tensor());
6675  } else
6676  pgai = std::make_shared<ga_instruction_simple_tmult>
6677  (pnode->tensor(), child0->tensor(), child1->tensor());
6678  } else {
6679  if (is_uniform) // Unrolled instruction
6680  pgai = ga_uniform_instruction_contraction_switch
6681  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6682  else // Unrolled instruction
6683  pgai = ga_instruction_contraction_switch
6684  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6685  }
6686  } else {
6687  if (child0->tensor_proper_size() == s2)
6688  pgai = ga_uniform_instruction_contraction_switch
6689  (pnode->t, child1->t, child0->t, s2, tensor_to_clear);
6690  else if (child1->tensor_proper_size() == s2)
6691  pgai = std::make_shared<ga_instruction_spec_contraction>
6692  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6693  else
6694  pgai = std::make_shared<ga_instruction_spec2_contraction>
6695  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6696  }
6697  }
6698  } else { // GA_MULT or GA_DOT for dim1 > 1 or GA_COLON for dim1 > 2
6699  // and child1->tensor_proper_size() > 1
6700  if (pnode->test_function_type < 3) {
6701  if (tps0 == 1) {
6702  if (is_uniform) // Unrolled instruction
6703  pgai = ga_uniform_instruction_simple_tmult
6704  (pnode->tensor(), child0->tensor(), child1->tensor());
6705  else
6706  pgai = std::make_shared<ga_instruction_simple_tmult>
6707  (pnode->tensor(), child0->tensor(), child1->tensor());
6708  } else {
6709  if (child1->test_function_type == 0)
6710  pgai = std::make_shared<ga_instruction_matrix_mult>
6711  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6712  else
6713  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
6714  (pnode->tensor(), child0->tensor(), child1->tensor(),
6715  s2, tps0/s2, tps1/s2);
6716  }
6717  } else {
6718  if (child0->tensor_proper_size() == 1) {
6719  if (child0->test_function_type == 0 ||
6720  child0->test_function_type == 1) {
6721  if (is_uniform) // Unrolled instruction
6722  pgai = ga_uniform_instruction_simple_tmult
6723  (pnode->tensor(), child0->tensor(), child1->tensor());
6724  else
6725  pgai = std::make_shared<ga_instruction_simple_tmult>
6726  (pnode->tensor(), child0->tensor(), child1->tensor());
6727  } else
6728  pgai = std::make_shared<ga_instruction_spec_tmult>
6729  (pnode->tensor(), child1->tensor(), child0->tensor(),
6730  tps1, tps0);
6731  } else {
6732  if (child1->test_function_type == 0)
6733  pgai = std::make_shared<ga_instruction_matrix_mult>
6734  (pnode->tensor(), child0->tensor(), child1->tensor(), s2);
6735  else if (child1->test_function_type == 2)
6736  pgai = std::make_shared<ga_instruction_matrix_mult_spec>
6737  (pnode->tensor(), child0->tensor(), child1->tensor(),
6738  s2, tps0/s2, tps1/s2);
6739  else
6740  pgai = std::make_shared<ga_instruction_matrix_mult_spec2>
6741  (pnode->tensor(), child0->tensor(), child1->tensor(),
6742  s2, tps0/s2, tps1/s2);
6743  }
6744  }
6745  }
6746  rmi.instructions.push_back(std::move(pgai));
6747  }
6748  break;
6749 
6750  case GA_DIV:
6751  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6752  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
6753  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6754  } else if (child1->tensor().size() == 1) {
6755  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6756  pgai = std::make_shared<ga_instruction_scalar_div>
6757  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6758  } else GMM_ASSERT1(false, "Internal error");
6759  rmi.instructions.push_back(std::move(pgai));
6760  break;
6761 
6762  case GA_PRINT:
6763  pnode->t.set_to_copy(child0->t);
6764  pgai = std::make_shared<ga_instruction_print_tensor>
6765  (pnode->tensor(), child0, gis.ctx, gis.nbpt, gis.ipt);
6766  rmi.instructions.push_back(std::move(pgai));
6767  break;
6768 
6769  case GA_QUOTE:
6770  if (pnode->tensor_proper_size() > 1) {
6771  size_type n1 = child0->tensor_proper_size(0);
6772  size_type n2 = (child0->tensor_order() > 1) ?
6773  child0->tensor_proper_size(1) : 1;
6774  size_type nn = 1;
6775  for (size_type i = 2; i < child0->tensor_order(); ++i)
6776  nn *= child0->tensor_proper_size(i);
6777  if (child0->nb_test_functions() == 0)
6778  pgai = std::make_shared<ga_instruction_transpose_no_test>
6779  (pnode->tensor(), child0->tensor(), n1, n2, nn);
6780  else
6781  pgai = std::make_shared<ga_instruction_transpose>
6782  (pnode->tensor(), child0->tensor(), n1, n2, nn);
6783  rmi.instructions.push_back(std::move(pgai));
6784  } else {
6785  pnode->t.set_to_copy(child0->t);
6786  }
6787  break;
6788 
6789  case GA_SYM:
6790  if (pnode->tensor_proper_size() != 1) {
6791  pgai = std::make_shared<ga_instruction_sym>
6792  (pnode->tensor(), child0->tensor());
6793  rmi.instructions.push_back(std::move(pgai));
6794  } else {
6795  pnode->t.set_to_copy(child0->t);
6796  }
6797  break;
6798 
6799  case GA_SKEW:
6800  {
6801  pgai = std::make_shared<ga_instruction_skew>
6802  (pnode->tensor(), child0->tensor());
6803  rmi.instructions.push_back(std::move(pgai));
6804  }
6805  break;
6806 
6807  case GA_TRACE:
6808  {
6809  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
6810  if (N == 1) {
6811  pnode->t.set_to_copy(child0->t);
6812  } else {
6813  pgai = std::make_shared<ga_instruction_trace>
6814  (pnode->tensor(), child0->tensor(), N);
6815  rmi.instructions.push_back(std::move(pgai));
6816  }
6817  }
6818  break;
6819 
6820  case GA_DEVIATOR:
6821  {
6822  size_type N = (child0->tensor_proper_size() == 1) ? 1:size0.back();
6823  pgai = std::make_shared<ga_instruction_deviator>
6824  (pnode->tensor(), child0->tensor(), N);
6825  rmi.instructions.push_back(std::move(pgai));
6826  }
6827  break;
6828 
6829  case GA_DOTMULT:
6830 
6831  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6832  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6833  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6834  } else if (child0->tensor().size() == 1) {
6835  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6836  pgai = std::make_shared<ga_instruction_scalar_mult>
6837  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6838  }
6839  else if (child1->tensor().size() == 1) {
6840  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6841  pgai = std::make_shared<ga_instruction_scalar_mult>
6842  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6843  }
6844  else if (child1->test_function_type == 0)
6845  pgai = std::make_shared<ga_instruction_dotmult>
6846  (pnode->tensor(), child0->tensor(), child1->tensor());
6847  else if (child0->test_function_type == 0)
6848  pgai = std::make_shared<ga_instruction_dotmult>
6849  (pnode->tensor(), child1->tensor(), child0->tensor());
6850  else if (child0->test_function_type == 1)
6851  pgai = std::make_shared<ga_instruction_dotmult_spec>
6852  (pnode->tensor(), child0->tensor(), child1->tensor());
6853  else
6854  pgai = std::make_shared<ga_instruction_dotmult_spec>
6855  (pnode->tensor(), child1->tensor(), child0->tensor());
6856 
6857  rmi.instructions.push_back(std::move(pgai));
6858  break;
6859 
6860 
6861  case GA_DOTDIV:
6862  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6863  pgai = std::make_shared<ga_instruction_scalar_scalar_div>
6864  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6865  } else if (child1->tensor().size() == 1) {
6866  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6867  pgai = std::make_shared<ga_instruction_scalar_div>
6868  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6869  } else if (child1->test_function_type == 0) {
6870  pgai = std::make_shared<ga_instruction_dotdiv>
6871  (pnode->tensor(), child0->tensor(), child1->tensor());
6872  } else GMM_ASSERT1(false, "Internal error");
6873  rmi.instructions.push_back(std::move(pgai));
6874  break;
6875 
6876 
6877  case GA_TMULT:
6878  if (child0->tensor().size() == 1 && child1->tensor().size() == 1) {
6879  pgai = std::make_shared<ga_instruction_scalar_scalar_mult>
6880  (pnode->tensor()[0], child0->tensor()[0], child1->tensor()[0]);
6881  } else if (child0->tensor().size() == 1) {
6882  pnode->t.set_sparsity(child1->t.sparsity(), child1->t.qdim());
6883  pgai = std::make_shared<ga_instruction_scalar_mult>
6884  (pnode->tensor(), child1->tensor(), child0->tensor()[0]);
6885  }
6886  else if (child1->tensor().size() == 1) {
6887  pnode->t.set_sparsity(child0->t.sparsity(), child0->t.qdim());
6888  pgai = std::make_shared<ga_instruction_scalar_mult>
6889  (pnode->tensor(), child0->tensor(), child1->tensor()[0]);
6890  }
6891  else if (child1->test_function_type == 0) {
6892  if (is_uniform) // Unrolled instruction
6893  pgai = ga_uniform_instruction_simple_tmult
6894  (pnode->tensor(), child0->tensor(), child1->tensor());
6895  else
6896  pgai = std::make_shared<ga_instruction_simple_tmult>
6897  (pnode->tensor(), child0->tensor(), child1->tensor());
6898  } else if (child1->tensor_proper_size() == 1)
6899  pgai = std::make_shared<ga_instruction_spec2_tmult>
6900  (pnode->tensor(), child0->tensor(), child1->tensor());
6901  else
6902  pgai = std::make_shared<ga_instruction_spec_tmult>
6903  (pnode->tensor(), child0->tensor(), child1->tensor(),
6904  child0->tensor_proper_size(),
6905  child1->tensor_proper_size());
6906 
6907  rmi.instructions.push_back(std::move(pgai));
6908  break;
6909 
6910  default:GMM_ASSERT1(false, "Unexpected operation. Internal error.");
6911  }
6912  break;
6913 
6914  case GA_NODE_C_MATRIX:
6915  {
6916  if (pnode->test_function_type) {
6917  std::vector<const base_tensor *> components(pnode->children.size());
6918  for (size_type i = 0; i < pnode->children.size(); ++i)
6919  components[i] = &(pnode->children[i]->tensor());
6920  pgai = std::make_shared<ga_instruction_c_matrix_with_tests>
6921  (pnode->tensor(), components);
6922  } else {
6923  std::vector<scalar_type *> components(pnode->children.size());
6924  for (size_type i = 0; i < pnode->children.size(); ++i)
6925  components[i] = &(pnode->children[i]->tensor()[0]);
6926  pgai = std::make_shared<ga_instruction_simple_c_matrix>
6927  (pnode->tensor(), components);
6928  }
6929  rmi.instructions.push_back(std::move(pgai));
6930  }
6931  break;
6932 
6933  case GA_NODE_PARAMS:
6934  if (child0->node_type == GA_NODE_RESHAPE) {
6935  pgai = std::make_shared<ga_instruction_copy_tensor>(pnode->tensor(),
6936  child1->tensor());
6937  rmi.instructions.push_back(std::move(pgai));
6938  } else if (child0->node_type == GA_NODE_CROSS_PRODUCT) {
6939  pga_tree_node child2 = pnode->children[2];
6940  if (child1->test_function_type==2 && child2->test_function_type==1)
6941  pgai = std::make_shared<ga_instruction_cross_product_tf>
6942  (pnode->tensor(), child2->tensor(), child1->tensor(), true);
6943  else if (child1->test_function_type || child2->test_function_type)
6944  pgai = std::make_shared<ga_instruction_cross_product_tf>
6945  (pnode->tensor(), child1->tensor(), child2->tensor(), false);
6946  else
6947  pgai = std::make_shared<ga_instruction_cross_product>
6948  (pnode->tensor(), child1->tensor(), child2->tensor());
6949  rmi.instructions.push_back(std::move(pgai));
6950  } else if (child0->node_type == GA_NODE_IND_MOVE_LAST) {
6951  size_type ind;
6952  ind = size_type(round(pnode->children[2]->tensor()[0])-1);
6953  size_type ii2 = 1;
6954  for (size_type i = 0; i < child1->tensor_order(); ++i)
6955  if (i>ind) ii2 *= child1->tensor_proper_size(i);
6956  size_type nn = child1->tensor_proper_size(ind);
6957  pgai = std::make_shared<ga_instruction_index_move_last>
6958  (pnode->tensor(), child1->tensor(), nn, ii2);
6959  rmi.instructions.push_back(std::move(pgai));
6960  } else if (child0->node_type == GA_NODE_SWAP_IND) {
6961  size_type ind[4];
6962  for (size_type i = 2; i < 4; ++i)
6963  ind[i] = size_type(round(pnode->children[i]->tensor()[0])-1);
6964  if (ind[2] > ind[3]) std::swap(ind[2], ind[3]);
6965  size_type ii2 = 1, ii3 = 1;
6966  for (size_type i = 0; i < child1->tensor_order(); ++i) {
6967  if (i>ind[2] && i<ind[3]) ii2 *= child1->tensor_proper_size(i);
6968  if (i>ind[3]) ii3 *= child1->tensor_proper_size(i);
6969  }
6970  size_type nn1 = child1->tensor_proper_size(ind[2]);
6971  size_type nn2 = child1->tensor_proper_size(ind[3]);
6972 
6973  pgai = std::make_shared<ga_instruction_swap_indices>
6974  (pnode->tensor(), child1->tensor(), nn1, nn2, ii2, ii3);
6975  rmi.instructions.push_back(std::move(pgai));
6976  } else if (child0->node_type == GA_NODE_CONTRACT) {
6977  std::vector<size_type> ind(2), indsize(2);
6978  pga_tree_node child2(0);
6979  if (pnode->children.size() == 4)
6980  { ind[0] = 2; ind[1] = 3; }
6981  else if (pnode->children.size() == 5)
6982  { ind[0] = 2; ind[1] = 4; child2 = pnode->children[3]; }
6983  else if (pnode->children.size() == 7) {
6984  ind.resize(4); indsize.resize(4);
6985  ind[0] = 2; ind[1] = 3; ind[2] = 5; ind[3] = 6;
6986  child2 = pnode->children[4];
6987  }
6988  size_type kk = 0, ll = 1;
6989  for (size_type i = 1; i < pnode->children.size(); ++i) {
6990  if (i == ind[kk]) {
6991  ind[kk] = size_type(round(pnode->children[i]->tensor()[0])-1);
6992  indsize[kk] = pnode->children[ll]->tensor_proper_size(ind[kk]);
6993  ++kk;
6994  } else ll = i;
6995  }
6996 
6997  if (pnode->children.size() == 4) {
6998  size_type i1 = ind[0], i2 = ind[1];
6999  if (i1 > i2) std::swap(i1, i2);
7000  size_type ii2 = 1, ii3 = 1;
7001  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7002  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7003  if (i > i2) ii3 *= child1->tensor_proper_size(i);
7004  }
7005  pgai = std::make_shared<ga_instruction_contract_1_1>
7006  (pnode->tensor(), child1->tensor(), indsize[0], ii2, ii3);
7007  }
7008  else if (pnode->children.size() == 5) {
7009  // Particular cases should be detected (ii2=ii3=1 in particular).
7010  size_type i1 = ind[0], i2 = ind[1];
7011  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1;
7012  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7013  if (i < i1) ii1 *= child1->tensor_proper_size(i);
7014  if (i > i1) ii2 *= child1->tensor_proper_size(i);
7015  }
7016  for (size_type i = 0; i < child2->tensor_order(); ++i) {
7017  if (i < i2) ii3 *= child2->tensor_proper_size(i);
7018  if (i > i2) ii4 *= child2->tensor_proper_size(i);
7019  }
7020  if (child1->test_function_type==1 && child2->test_function_type==2)
7021  pgai = std::make_shared<ga_instruction_contract_2_1_rev>
7022  (pnode->tensor(), child1->tensor(), child2->tensor(),
7023  indsize[0], ii1, ii2, ii3, ii4);
7024  else
7025  pgai = std::make_shared<ga_instruction_contract_2_1>
7026  (pnode->tensor(), child1->tensor(), child2->tensor(),
7027  indsize[0], ii1, ii2, ii3, ii4);
7028  }
7029  else if (pnode->children.size() == 7) {
7030  // Particular cases should be detected (ii2=ii3=1 in particular).
7031  size_type i1 = ind[0], i2 = ind[1], i3 = ind[2], i4 = ind[3];
7032  size_type nn1 = indsize[0], nn2 = indsize[1];
7033  size_type ii1 = 1, ii2 = 1, ii3 = 1, ii4 = 1, ii5 = 1, ii6 = 1;
7034  if (i1 > i2)
7035  { std::swap(i1, i2); std::swap(i3, i4); std::swap(nn1, nn2); }
7036  for (size_type i = 0; i < child1->tensor_order(); ++i) {
7037  if (i < i1) ii1 *= child1->tensor_proper_size(i);
7038  if (i > i1 && i < i2) ii2 *= child1->tensor_proper_size(i);
7039  if (i > i2) ii3 *= child1->tensor_proper_size(i);
7040  }
7041  for (size_type i = 0; i < child2->tensor_order(); ++i) {
7042  if (i < i3 && i < i4) ii4 *= child2->tensor_proper_size(i);
7043  if ((i > i3 && i < i4) || (i > i4 && i < i3))
7044  ii5 *= child2->tensor_proper_size(i);
7045  if (i > i3 && i > i4) ii6 *= child2->tensor_proper_size(i);
7046  }
7047  if (child1->test_function_type==1 && child2->test_function_type==2)
7048  pgai = std::make_shared<ga_instruction_contract_2_2_rev>
7049  (pnode->tensor(), child1->tensor(), child2->tensor(),
7050  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7051  else
7052  pgai = std::make_shared<ga_instruction_contract_2_2>
7053  (pnode->tensor(), child1->tensor(), child2->tensor(),
7054  nn1, nn2, ii1, ii2, ii3, ii4, ii5, ii6, i4 < i3);
7055  }
7056  rmi.instructions.push_back(std::move(pgai));
7057  } else if (child0->node_type == GA_NODE_PREDEF_FUNC) {
7058 
7059  std::string name = child0->name;
7060  const ga_predef_function_tab &PREDEF_FUNCTIONS
7062  ga_predef_function_tab::const_iterator it = PREDEF_FUNCTIONS.find(name);
7063  const ga_predef_function &F = it->second;
7064  size_type nbargs = F.nbargs();
7065  pga_tree_node child2 = (nbargs == 2) ? pnode->children[2] : child1;
7066 
7067  if (nbargs == 1) {
7068  if (child1->tensor().size() == 1) {
7069  if (F.ftype() == 0)
7070  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res>
7071  (pnode->tensor()[0], child1->tensor()[0], F.f1());
7072  else
7073  pgai = std::make_shared<ga_instruction_eval_func_1arg_1res_expr>
7074  (pnode->tensor()[0], child1->tensor()[0], F);
7075  } else {
7076  if (F.ftype() == 0)
7077  pgai = std::make_shared<ga_instruction_eval_func_1arg>
7078  (pnode->tensor(), child1->tensor(), F.f1());
7079  else
7080  pgai = std::make_shared<ga_instruction_eval_func_1arg_expr>
7081  (pnode->tensor(), child1->tensor(), F);
7082  }
7083  } else {
7084  if (child1->tensor().size() == 1 && child2->tensor().size() == 1) {
7085  if (F.ftype() == 0)
7086  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res>
7087  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7088  F.f2());
7089  else
7090  pgai = std::make_shared<ga_instruction_eval_func_2arg_1res_expr>
7091  (pnode->tensor()[0], child1->tensor()[0], child2->tensor()[0],
7092  F);
7093  } else if (child1->tensor().size() == 1) {
7094  if (F.ftype() == 0)
7095  pgai =
7096  std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
7097  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7098  else
7099  pgai =
7100  std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
7101  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7102  } else if (child2->tensor().size() == 1) {
7103  if (F.ftype() == 0)
7104  pgai =
7105  std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
7106  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7107  else
7108  pgai =
7109  std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
7110  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7111  } else {
7112  if (F.ftype() == 0)
7113  pgai = std::make_shared<ga_instruction_eval_func_2arg>
7114  (pnode->tensor(), child1->tensor(), child2->tensor(), F.f2());
7115  else
7116  pgai = std::make_shared<ga_instruction_eval_func_2arg_expr>
7117  (pnode->tensor(), child1->tensor(), child2->tensor(), F);
7118  }
7119  }
7120  rmi.instructions.push_back(std::move(pgai));
7121 
7122  } else if (child0->node_type == GA_NODE_SPEC_FUNC) {
7123 
7124  GMM_ASSERT1(false, "Internal error");
7125 
7126  } else if (child0->node_type == GA_NODE_OPERATOR) {
7127 
7128  ga_predef_operator_tab &PREDEF_OPERATORS
7130  ga_predef_operator_tab::T::iterator it
7131  = PREDEF_OPERATORS.tab.find(child0->name);
7132  const ga_nonlinear_operator &OP = *(it->second);
7133  ga_nonlinear_operator::arg_list args;
7134  for (size_type i = 1; i < pnode->children.size(); ++i)
7135  args.push_back(&(pnode->children[i]->tensor()));
7136 
7137  if (child0->der1 && child0->der2 == 0) {
7138  pgai = std::make_shared<ga_instruction_eval_derivative_OP>
7139  (pnode->tensor(), OP, args, child0->der1);
7140  } else if (child0->der1 && child0->der2) {
7141  pgai = std::make_shared<ga_instruction_eval_second_derivative_OP>
7142  (pnode->tensor(), OP, args, child0->der1, child0->der2);
7143  } else {
7144  pgai = std::make_shared<ga_instruction_eval_OP>(pnode->tensor(),
7145  OP, args);
7146  }
7147  rmi.instructions.push_back(std::move(pgai));
7148 
7149  } else { // Access to a component of the tensor
7150  bgeot::multi_index mi1(size0.size()), indices;
7151  size_type nb_test = pnode->nb_test_functions();
7152  if (pnode->tensor().size() == 1) {
7153  for (size_type i = 0; i < child0->tensor_order(); ++i)
7154  mi1[i+nb_test] = size_type(round(pnode->children[i+1]->tensor()[0])-1);
7155  pgai = std::make_shared<ga_instruction_copy_scalar>
7156  (pnode->tensor()[0], child0->tensor()(mi1));
7157  } else {
7158  for (size_type i = 0; i < nb_test; ++i) indices.push_back(i);
7159  for (size_type i = 0; i < child0->tensor_order(); ++i) {
7160  if (pnode->children[i+1]->node_type != GA_NODE_ALLINDICES)
7161  mi1[i+nb_test]
7162  = size_type(round(pnode->children[i+1]->tensor()[0])- 1);
7163  else
7164  indices.push_back(i+nb_test);
7165  }
7166  pgai = std::make_shared<ga_instruction_tensor_slice>
7167  (pnode->tensor(), child0->tensor(), mi1, indices);
7168  }
7169  rmi.instructions.push_back(std::move(pgai));
7170  }
7171 
7172  break;
7173 
7174  default:GMM_ASSERT1(false, "Unexpected node type " << pnode->node_type
7175  << " in compilation. Internal error.");
7176  }
7177  if (tensor_to_clear) {
7178  gmm::clear(pnode->tensor().as_vector());
7179  if (!is_uniform) {
7180  pgai = std::make_shared<ga_instruction_clear_tensor>(pnode->tensor());
7181  rmi.elt_instructions.push_back(std::move(pgai));
7182  }
7183  }
7184  rmi.node_list[pnode->hash_value].push_back(pnode);
7185  } // ga_compile_node
7186 
7187  void ga_compile_function(ga_workspace &workspace,
7188  ga_instruction_set &gis, bool scalar) {
7189  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7190  const ga_workspace::tree_description &td = workspace.tree_info(i);
7191 
7192  gis.trees.push_back(*(td.ptree));
7193  pga_tree_node root = gis.trees.back().root;
7194  if (root) {
7195  GMM_ASSERT1(!scalar || (root->tensor().size() == 1),
7196  "The result of the given expression is not a scalar");
7197  ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7198  gis.all_instructions[rm].m = td.m;
7199  ga_if_hierarchy if_hierarchy;
7200  ga_compile_node(root, workspace, gis, gis.all_instructions[rm],
7201  *(td.m), true, if_hierarchy);
7202 
7203  gis.coeff = scalar_type(1);
7204  pga_instruction pgai;
7205  workspace.assembled_tensor() = root->tensor();
7206  pgai = std::make_shared<ga_instruction_add_to_coeff>
7207  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7208  gis.all_instructions[rm].instructions.push_back(std::move(pgai));
7209  }
7210  }
7211  }
7212 
7213  static bool ga_node_used_interpolates
7214  (const pga_tree_node pnode, const ga_workspace &workspace,
7215  std::map<std::string, std::set<std::string> > &interpolates,
7216  std::set<std::string> &interpolates_der) {
7217  bool found = false;
7218  bool intrpl(pnode->node_type == GA_NODE_INTERPOLATE_VAL ||
7219  pnode->node_type == GA_NODE_INTERPOLATE_GRAD ||
7220  pnode->node_type == GA_NODE_INTERPOLATE_HESS ||
7221  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG);
7222  bool intrpl_test(pnode->node_type == GA_NODE_INTERPOLATE_VAL_TEST ||
7223  pnode->node_type == GA_NODE_INTERPOLATE_GRAD_TEST ||
7224  pnode->node_type == GA_NODE_INTERPOLATE_HESS_TEST ||
7225  pnode->node_type == GA_NODE_INTERPOLATE_DIVERG_TEST);
7226 
7227  if (intrpl || intrpl_test ||
7228  pnode->node_type == GA_NODE_INTERPOLATE_FILTER ||
7229  pnode->node_type == GA_NODE_INTERPOLATE_X ||
7230  pnode->node_type == GA_NODE_INTERPOLATE_NORMAL) {
7231  interpolates[pnode->interpolate_name].size();
7232  if (intrpl || intrpl_test) {
7233  if (workspace.variable_group_exists(pnode->name))
7234  interpolates[pnode->interpolate_name].insert(pnode->name);
7235  }
7236  found = true;
7237  }
7238  if (pnode->node_type == GA_NODE_INTERPOLATE_DERIVATIVE) {
7239  interpolates_der.insert(pnode->interpolate_name_der);
7240  interpolates[pnode->interpolate_name_der].size();
7241  if (workspace.variable_group_exists(pnode->name))
7242  interpolates[pnode->interpolate_name_der].insert(pnode->name);
7243  }
7244  for (size_type i = 0; i < pnode->children.size(); ++i)
7245  found = ga_node_used_interpolates(pnode->children[i], workspace,
7246  interpolates, interpolates_der)
7247  || found;
7248  return found;
7249  }
7250 
7251 
7252  static void ga_compile_interpolate_trans
7253  (const pga_tree_node pnode, const ga_workspace &workspace,
7254  ga_instruction_set &gis, ga_instruction_set::region_mim_instructions &rmi,
7255  const mesh &m) {
7256 
7257  std::set<std::string> interpolates_der;
7258  std::map<std::string, std::set<std::string> > transformations;
7259  ga_node_used_interpolates(pnode, workspace, transformations,
7260  interpolates_der);
7261 
7262  for (const auto &transformation : transformations) {
7263  const std::string &transname = transformation.first;
7264  bool compute_der = (interpolates_der.count(transname) != 0);
7265  if (rmi.transformations.count(transname) == 0 ||
7266  (compute_der && rmi.transformations_der.count(transname) == 0)) {
7267  rmi.transformations[transname].size();
7268  gis.transformations.insert(transname);
7269  if (compute_der) rmi.transformations_der.insert(transname);
7270  pga_instruction pgai;
7271  if (transname.compare("neighbor_element") == 0 ||
7272  transname.compare("neighbour_elt") == 0) {
7273  pgai = std::make_shared<ga_instruction_neighbor_transformation_call>
7274  (workspace, rmi.interpolate_infos[transname],
7275  workspace.interpolate_transformation(transname), gis.ctx,
7276  m, gis.ipt, gis.pai, gis.gp_pool, gis.neighbor_corresp);
7277  } else {
7278  pgai = std::make_shared<ga_instruction_transformation_call>
7279  (workspace, rmi.interpolate_infos[transname],
7280  workspace.interpolate_transformation(transname), gis.ctx,
7281  gis.Normal, m, compute_der);
7282  }
7283  if (pgai) rmi.instructions.push_back(std::move(pgai));
7284  }
7285 
7286  for (const std::string &nodename : transformation.second) {
7287  if (rmi.transformations[transname].count(nodename) == 0) {
7288  auto&& inin = rmi.interpolate_infos[transname];
7289  pga_instruction pgai =
7290  std::make_shared<ga_instruction_update_group_info>
7291  (workspace, gis, inin, nodename, inin.groups_info[nodename]);
7292  rmi.instructions.push_back(std::move(pgai));
7293  rmi.transformations[transname].insert(nodename);
7294  }
7295  }
7296  }
7297  }
7298 
7299  void ga_compile_interpolation(ga_workspace &workspace,
7300  ga_instruction_set &gis) {
7301  gis.transformations.clear();
7302  gis.all_instructions.clear();
7303  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7304  const ga_workspace::tree_description &td = workspace.tree_info(i);
7305  if (td.operation != ga_workspace::ASSEMBLY) {
7306  gis.trees.push_back(*(td.ptree));
7307 
7308  // Semantic analysis mainly to evaluate fixed size variables and data
7309  const mesh *m = td.m;
7310  GMM_ASSERT1(m, "Internal error");
7311  ga_semantic_analysis(gis.trees.back(), workspace, *m,
7312  ref_elt_dim_of_mesh(*m, *(td.rg)), true, false);
7313  pga_tree_node root = gis.trees.back().root;
7314  if (root) {
7315  // Compile tree
7316  ga_instruction_set::region_mim rm(td.mim, td.rg, 0);
7317  auto &rmi = gis.all_instructions[rm];
7318  rmi.m = td.m;
7319  rmi.im = td.mim;
7320  // rmi.interpolate_infos.clear();
7321  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7322  ga_compile_node(root, workspace, gis,rmi, *(td.m), false,
7323  rmi.current_hierarchy);
7324 
7325  // After compile tree
7326  workspace.assembled_tensor() = root->tensor();
7327  pga_instruction pgai = std::make_shared<ga_instruction_add_to>
7328  (workspace.assembled_tensor(), root->tensor());
7329  rmi.instructions.push_back(std::move(pgai));
7330  }
7331  }
7332  }
7333  }
7334 
7335 
7336  struct var_set : std::map<std::string,size_type> {
7337  // This class indexes variable names in the order of their addition
7338  size_type operator[](const std::string &name) {
7339  if (name.empty()) return size_type(-1);
7340  size_type id = size();
7341  auto it = find(name);
7342  if (it == end()) {
7343  emplace(name, id);
7344  return id;
7345  }
7346  return it->second;
7347  }
7348  std::string operator[](const size_type &id) const {
7349  for (const auto &key_value : *this) // brute force reverse search
7350  if (key_value.second == id)
7351  return key_value.first;
7352  return std::string("");
7353  }
7354  };
7355 
7356 
7357  struct condensation_description {
7358  var_set Ivars, Jvars, Qvars; // sets of variables involved in condensation
7359  // Clusters of intercoupled condensed variables and subdiagonally coupled
7360  // primary variables for each cluster
7361  std::vector<std::set<size_type>> Qclusters, Jclusters;
7362  // Each element of Qclusters contains a group of intercoupled condensed
7363  // variables. Due to the couplings within each group, all variables of the
7364  // same group need to be condensed out simultaneously. Per definition two
7365  // clusters cannot share a common variable.
7366  // indexing of groups
7367  std::vector<size_type> cluster_of_Qvar;
7368  // Matrices of pointers to submatrices for all coupling terms
7369  gmm::dense_matrix<base_tensor *> KQQ, // diagonal
7370  KQJ, KQJpr, // subdiagonal
7371  KIQ, // superdiagonal
7372  KIJ; // outcome
7373  std::vector<base_tensor *> RI, // res. vector of coupled primary variables
7374  RQpr; // partial solution for condensed variables (initially stores residuals)
7375  };
7376 
7377  void ga_compile(ga_workspace &workspace,
7378  ga_instruction_set &gis, size_type order, bool condensation) {
7379  gis.transformations.clear();
7380  gis.all_instructions.clear();
7381  gis.unreduced_terms.clear();
7382  workspace.clear_temporary_variable_intervals();
7383 
7384  std::map<const ga_instruction_set::region_mim, condensation_description>
7385  condensations;
7386 
7387  if (condensation && order == 2) {
7388  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7389  ga_workspace::tree_description &td = workspace.tree_info(i);
7390  if (td.order != 2 && td.order != size_type(-1))
7391  continue;
7392  ga_tree tree(*(td.ptree)); // temporary tree (not used later)
7393  ga_semantic_analysis(tree, workspace, td.mim->linked_mesh(),
7394  ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
7395  true, false);
7396  pga_tree_node root = tree.root;
7397  if (root) {
7398  const bool
7399  v1_is_intern = workspace.is_internal_variable(root->name_test1),
7400  v2_is_intern = workspace.is_internal_variable(root->name_test2);
7401  if (v1_is_intern || v2_is_intern) {
7402  GMM_ASSERT1(tree.secondary_domain.empty(),
7403  "Condensed variable cannot be used in secondary domain");
7404 
7405  for (const auto &key_val : condensations) {
7406  const ga_instruction_set::region_mim rm0 = key_val.first;
7407  const condensation_description &CC0 = key_val.second;
7408  if (rm0.mim() == td.mim && rm0.region() != td.rg
7409  && (CC0.Qvars.count(root->name_test1) ||
7410  CC0.Qvars.count(root->name_test2))) {
7411  mesh_region intrsct = getfem::mesh_region::intersection
7412  (*(rm0.region()), *(td.rg));
7413  GMM_ASSERT1(intrsct.is_empty(),
7414  "Cannot condense coupled variables between "
7415  "intersecting regions");
7416  }
7417  }
7418  const ga_instruction_set::region_mim rm(td.mim, td.rg, nullptr);
7419 
7420  condensation_description &CC = condensations[rm];
7421  size_type
7422  q1 = v1_is_intern ? CC.Qvars[root->name_test1] : size_type(-1),
7423  q2 = v2_is_intern ? CC.Qvars[root->name_test2] : size_type(-1);
7424  GMM_ASSERT1(q1 != size_type(-1) || q2 != size_type(-1), "Error");
7425  std::vector<size_type> selected_clusters;
7426  for (size_type j=0; j < CC.Qclusters.size(); ++j)
7427  if (CC.Qclusters[j].count(q1) || CC.Qclusters[j].count(q2))
7428  selected_clusters.push_back(j);
7429 
7430  if (selected_clusters.empty()) { // create new cluster
7431  CC.Qclusters.push_back(std::set<size_type>());
7432  if (q1 != size_type(-1)) CC.Qclusters.back().insert(q1);
7433  if (q2 != size_type(-1)) CC.Qclusters.back().insert(q2);
7434  } else { // add into existing cluster / merge clusters together
7435  auto &target = CC.Qclusters[selected_clusters[0]];
7436  if (q1 != size_type(-1)) target.insert(q1);
7437  if (q2 != size_type(-1)) target.insert(q2);
7438  for (size_type j=selected_clusters.size()-1; j > 1; --j) {
7439  auto &source = CC.Qclusters[selected_clusters[j]];
7440  target.insert(source.begin(), source.end());
7441  CC.Qclusters.erase(CC.Qclusters.begin() + selected_clusters[j]);
7442  }
7443  }
7444  } // is_internal_variable
7445  } // if (root)
7446  } // for (size_type i = 0; i < workspace.nb_trees(); ++i)
7447 
7448  for (auto &key_value : condensations) {
7449  condensation_description &CC = key_value.second;
7450  //for (const auto &cluster : CC.Qclusters) {
7451  // cout << "Clusters of coupled variables:" << endl;
7452  // for (const auto &varid : cluster) cout << "/" << CC.Qvars[varid];
7453  // cout << "/" << endl;
7454  //}
7455  size_type Qsize = CC.Qvars.size();
7456 
7457  // Jclusters will hold all J variables each cluster is coupled to
7458  CC.Jclusters.resize(CC.Qclusters.size());
7459 
7460  CC.cluster_of_Qvar.resize(Qsize);
7461  for (size_type i=0; i < CC.Qclusters.size(); ++i)
7462  for (const size_type &var : CC.Qclusters[i])
7463  CC.cluster_of_Qvar[var] = i;
7464 
7465  // Qvars: all condensed variables
7466  // Qclusters: definition of clusters of intercoupled variables of Qvars
7467  // cluster_of_Qvar: dictionary for which cluster each variable belongs to
7468  CC.KQQ.resize(Qsize, Qsize);
7469  CC.RQpr.resize(Qsize);
7470  for (size_type q=0; q < Qsize; ++q) {
7471  bgeot::multi_index mi(1);
7472  mi[0] = workspace.associated_im_data(CC.Qvars[q]) ->nb_tensor_elem();
7473  gis.condensation_tensors.push_back // memory allocation
7474  (std::make_shared<base_tensor>(mi));
7475  CC.RQpr[q] = gis.condensation_tensors.back().get();
7476  }
7477  }
7478  } // if (condensation && order == 2)
7479 
7480  std::array<ga_workspace::operation_type,3>
7481  phases{ga_workspace::PRE_ASSIGNMENT,
7482  ga_workspace::ASSEMBLY,
7483  ga_workspace::POST_ASSIGNMENT};
7484  for (const auto &phase : phases) {
7485 
7486  for (size_type i = 0; i < workspace.nb_trees(); ++i) {
7487  ga_workspace::tree_description &td = workspace.tree_info(i);
7488  if (td.operation != phase)
7489  continue; // skip this tree in this phase
7490 
7491  if (td.order == order || td.order == size_type(-1)) {
7492  std::list<ga_tree> &trees = (phase == ga_workspace::ASSEMBLY)
7493  ? gis.trees
7494  : gis.interpolation_trees;
7495  trees.push_back(*(td.ptree));
7496  // Semantic analysis mainly to evaluate fixed size variables and data
7497  ga_semantic_analysis(trees.back(), workspace, td.mim->linked_mesh(),
7498  ref_elt_dim_of_mesh(td.mim->linked_mesh(),*(td.rg)),
7499  true, false);
7500  pga_tree_node root = trees.back().root;
7501  if (root) {
7502  // Compile tree
7503  // cout << "Will compile "; ga_print_node(root, cout); cout << endl;
7504 
7505  psecondary_domain psd(0);
7506  if (trees.back().secondary_domain.size())
7507  psd = workspace.secondary_domain(trees.back().secondary_domain);
7508  ga_instruction_set::region_mim rm(td.mim, td.rg, psd);
7509  auto &rmi = gis.all_instructions[rm];
7510  rmi.m = td.m;
7511  rmi.im = td.mim;
7512  // rmi.interpolate_infos.clear();
7513  ga_compile_interpolate_trans(root, workspace, gis, rmi, *(td.m));
7514  ga_compile_node(root, workspace, gis, rmi, *(td.m), false,
7515  rmi.current_hierarchy);
7516  // cout << "compilation finished "; ga_print_node(root, cout);
7517  // cout << endl;
7518 
7519  if (phase != ga_workspace::ASSEMBLY) { // Assignment/interpolation
7520  if (!td.varname_interpolation.empty()) {
7521  auto *imd
7522  = workspace.associated_im_data(td.varname_interpolation);
7523  auto &V = const_cast<model_real_plain_vector &>
7524  (workspace.value(td.varname_interpolation));
7525  GMM_ASSERT1(imd, "Internal error");
7526  auto pgai = std::make_shared<ga_instruction_assignment>
7527  (root->tensor(), V, gis.ctx, imd);
7528  rmi.instructions.push_back(std::move(pgai));
7529  }
7530  } else { // Addition of an assembly instruction
7531  pga_instruction pgai;
7532  switch(order) {
7533  case 0: {
7534  workspace.assembled_tensor() = root->tensor();
7535  pgai = std::make_shared<ga_instruction_add_to_coeff>
7536  (workspace.assembled_tensor(), root->tensor(), gis.coeff);
7537  break;
7538  }
7539  case 1: {
7540  GMM_ASSERT1(root->tensor_proper_size() == 1,
7541  "Invalid vector or tensor quantity. An order 1 "
7542  "weak form has to be a scalar quantity");
7543  const mesh_fem * const
7544  mf = workspace.associated_mf(root->name_test1);
7545  const im_data * const
7546  imd = workspace.associated_im_data(root->name_test1);
7547  workspace.add_temporary_interval_for_unreduced_variable
7548  (root->name_test1);
7549 
7550  base_vector &Vu = workspace.unreduced_vector(),
7551  &Vr = workspace.assembled_vector();
7552  if (mf) {
7553  const std::string &intn1 = root->interpolate_name_test1;
7554  bool secondary = !intn1.empty() &&
7555  workspace.secondary_domain_exists(intn1);
7556  fem_interpolation_context
7557  &ctx = intn1.empty() ? gis.ctx
7558  : (secondary ? rmi.secondary_domain_infos.ctx
7559  : rmi.interpolate_infos[intn1].ctx);
7560  bool interpolate =
7561  !(intn1.empty() || intn1 == "neighbor_element"
7562  || intn1 == "neighbour_elt" || secondary);
7563 
7564  if (intn1.size() && !secondary &&
7565  workspace.variable_group_exists(root->name_test1)) {
7566  ga_instruction_set::variable_group_info
7567  &vgi = rmi.interpolate_infos[intn1]
7568  .groups_info[root->name_test1];
7569  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
7570  (root->tensor(), Vr, Vu, ctx,
7571  vgi.I, vgi.mf, vgi.reduced_mf,
7572  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7573  for (const std::string &name
7574  : workspace.variable_group(root->name_test1))
7575  gis.unreduced_terms.emplace(name, "");
7576  } else {
7577  base_vector &V = mf->is_reduced() ? Vu : Vr;
7578  const gmm::sub_interval
7579  &I = mf->is_reduced()
7580  ? workspace.temporary_interval_of_variable
7581  (root->name_test1)
7582  : workspace.interval_of_variable(root->name_test1);
7583  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
7584  (root->tensor(), V, ctx, I, *mf,
7585  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7586  if (mf->is_reduced())
7587  gis.unreduced_terms.emplace(root->name_test1, "");
7588  }
7589  } else if (imd) {
7590  GMM_ASSERT1(root->interpolate_name_test1.size() == 0,
7591  "Interpolate transformation on integration "
7592  "point variable");
7593  if (!workspace.is_internal_variable(root->name_test1) ||
7594  condensation)
7595  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
7596  (root->tensor(), Vr, gis.ctx,
7597  workspace.interval_of_variable(root->name_test1),
7598  *imd, gis.coeff, gis.ipt);
7599  // Variable root->name_test1 can be internal or not
7600  } else {
7601  pgai = std::make_shared<ga_instruction_vector_assembly>
7602  (root->tensor(), Vr,
7603  workspace.interval_of_variable(root->name_test1),
7604  gis.coeff);
7605  }
7606  break;
7607  }
7608  case 2: {
7609  GMM_ASSERT1(root->tensor_proper_size() == 1,
7610  "Invalid vector or tensor quantity. An order 2 "
7611  "weak form has to be a scalar quantity");
7612  const mesh_fem *mf1=workspace.associated_mf(root->name_test1),
7613  *mf2=workspace.associated_mf(root->name_test2);
7614  const im_data
7615  *imd1 = workspace.associated_im_data(root->name_test1),
7616  *imd2 = workspace.associated_im_data(root->name_test2);
7617  const std::string &intn1 = root->interpolate_name_test1,
7618  &intn2 = root->interpolate_name_test2;
7619  bool secondary1 = intn1.size() &&
7620  workspace.secondary_domain_exists(intn1);
7621  bool secondary2 = intn2.size() &&
7622  workspace.secondary_domain_exists(intn2);
7623  fem_interpolation_context
7624  &ctx1 = intn1.empty() ? gis.ctx
7625  : (secondary1 ? rmi.secondary_domain_infos.ctx
7626  : rmi.interpolate_infos[intn1].ctx),
7627  &ctx2 = intn2.empty() ? gis.ctx
7628  : (secondary2 ? rmi.secondary_domain_infos.ctx
7629  : rmi.interpolate_infos[intn2].ctx);
7630  bool interpolate = !(intn1.empty() || intn1 == "neighbor_element"
7631  || intn1 == "neighbour_elt"
7632  || secondary1) ||
7633  !(intn2.empty() || intn2 == "neighbor_element"
7634  || intn2 == "neighbour_elt"
7635  || secondary2);
7636 
7637  workspace.add_temporary_interval_for_unreduced_variable
7638  (root->name_test1);
7639  workspace.add_temporary_interval_for_unreduced_variable
7640  (root->name_test2);
7641 
7642  bool has_var_group1 = (!intn1.empty() && !secondary1 &&
7643  workspace.variable_group_exists
7644  (root->name_test1));
7645  bool has_var_group2 = (!intn2.empty() && !secondary2 &&
7646  workspace.variable_group_exists
7647  (root->name_test2));
7648  bool simple = !interpolate &&
7649  !has_var_group1 && !has_var_group2 &&
7650  mf1 && !(mf1->is_reduced()) &&
7651  mf2 && !(mf2->is_reduced());
7652 
7653  // ga instructions write into one of the following matrices
7654  auto &Krr = workspace.assembled_matrix();
7655  auto &Kru = workspace.col_unreduced_matrix();
7656  auto &Kur = workspace.row_unreduced_matrix();
7657  auto &Kuu = workspace.row_col_unreduced_matrix();
7658 
7659  if (simple) { // --> Krr
7660  const gmm::sub_interval
7661  &I1 = workspace.interval_of_variable(root->name_test1),
7662  &I2 = workspace.interval_of_variable(root->name_test2);
7663  const scalar_type
7664  &alpha1 = workspace.factor_of_variable(root->name_test1),
7665  &alpha2 = workspace.factor_of_variable(root->name_test2);
7666  if (mf1->get_qdim() == 1 && mf2->get_qdim() == 1)
7667  pgai = std::make_shared
7668  <ga_instruction_matrix_assembly_standard_scalar>
7669  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7670  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7671  else if (root->sparsity() == 10 && root->t.qdim() == 2)
7672  pgai = std::make_shared
7673  <ga_instruction_matrix_assembly_standard_vector_opt10<2>>
7674  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7675  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7676  else if (root->sparsity() == 10 && root->t.qdim() == 3)
7677  pgai = std::make_shared
7678  <ga_instruction_matrix_assembly_standard_vector_opt10<3>>
7679  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7680  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7681  else
7682  pgai = std::make_shared
7683  <ga_instruction_matrix_assembly_standard_vector>
7684  (root->tensor(), Krr, ctx1, ctx2, I1, I2, mf1, mf2,
7685  alpha1, alpha2, gis.coeff, gis.nbpt, gis.ipt);
7686  } else if (condensation &&
7687  workspace.is_internal_variable(root->name_test1) &&
7688  workspace.is_internal_variable(root->name_test2)) {
7689  // diagonal condensation matrix KQQ
7690  // Only memory allocation, gathering of relevant pointers
7691  // and data summation instructions
7692  GMM_ASSERT1(imd1 && imd2, "Internal error");
7693  GMM_ASSERT1(!interpolate, "Internal error");
7694  size_type s1 = imd1->nb_tensor_elem();
7695  size_type s2 = imd2->nb_tensor_elem();
7696 
7697  condensation_description &CC = condensations[rm];
7698  GMM_ASSERT1(CC.Qvars.count(root->name_test1) > 0 &&
7699  CC.Qvars.count(root->name_test2) > 0,
7700  "Internal error");
7701  size_type q1 = CC.Qvars[root->name_test1],
7702  q2 = CC.Qvars[root->name_test2];
7703  if (!CC.KQQ(q1,q2)) {
7704  // allocate a new matrix
7705  gis.condensation_tensors.push_back
7706  (std::make_shared<base_tensor>(s1,s2));
7707  CC.KQQ(q1,q2) = gis.condensation_tensors.back().get();
7708  pgai = std::make_shared<ga_instruction_copy_vect>
7709  (CC.KQQ(q1,q2)->as_vector(), root->tensor().as_vector());
7710  } else {
7711  // addition instruction to the previously allocated matrix
7712  pgai = std::make_shared<ga_instruction_add_to>
7713  (*CC.KQQ(q1,q2), root->tensor());
7714  }
7715  rmi.instructions.push_back(std::move(pgai));
7716  } else if (condensation &&
7717  workspace.is_internal_variable(root->name_test1)) {
7718  // subdiagonal condensation matrix KQJ
7719  // Only memory allocation, gathering of relevant pointers
7720  // and data summation instructions
7721  GMM_ASSERT1(imd1, "Internal error");
7722  GMM_ASSERT1(!interpolate, "Internal error");
7723  size_type s1 = imd1->nb_tensor_elem();
7724 
7725  condensation_description &CC = condensations[rm];
7726  GMM_ASSERT1(CC.Qvars.count(root->name_test1),
7727  "Internal error");
7728  size_type q1 = CC.Qvars[root->name_test1],
7729  j2 = CC.Jvars[root->name_test2];
7730  CC.Jclusters[CC.cluster_of_Qvar[q1]].insert(j2);
7731  if (q1 >= CC.KQJ.nrows() || j2 >= CC.KQJ.ncols())
7732  CC.KQJ.resize(std::max(CC.KQJ.nrows(), q1+1),
7733  std::max(CC.KQJ.ncols(), j2+1));
7734  if (!CC.KQJ(q1,j2)) {
7735  // allocate a new matrix. Here we do not know the size as
7736  // it may change dynamically, but for now, just use the
7737  // size of root->tensor()
7738  gis.condensation_tensors.push_back
7739  (std::make_shared<base_tensor>(root->tensor()));
7740  GMM_ASSERT1(root->tensor().size(0) == s1, "Internal error");
7741  CC.KQJ(q1,j2) = gis.condensation_tensors.back().get();
7742  pgai = std::make_shared<ga_instruction_copy_vect>
7743  (CC.KQJ(q1,j2)->as_vector(), root->tensor().as_vector());
7744  } else {
7745  // an extra matrix for this entry has already been
7746  // allocated, so just add the current tensor to it
7747  pgai = std::make_shared<ga_instruction_add_to>
7748  (*CC.KQJ(q1,j2), root->tensor());
7749  }
7750  rmi.instructions.push_back(std::move(pgai));
7751  } else if (condensation &&
7752  workspace.is_internal_variable(root->name_test2)) {
7753  // superdiagonal condensation matrix KIQ
7754  // Only memory allocation, gathering of relevant pointers
7755  // and data summation instructions
7756  GMM_ASSERT1(imd2, "Internal error");
7757  GMM_ASSERT1(!interpolate, "Internal error");
7758  size_type s2 = imd2->nb_tensor_elem();
7759 
7760  condensation_description &CC = condensations[rm];
7761  GMM_ASSERT1(CC.Qvars.count(root->name_test2),
7762  "Internal error");
7763  size_type i1 = CC.Ivars[root->name_test1],
7764  q2 = CC.Qvars[root->name_test2];
7765  if (i1 >= CC.KIQ.nrows() || q2 >= CC.KIQ.ncols())
7766  CC.KIQ.resize(std::max(CC.KIQ.nrows(), i1+1),
7767  std::max(CC.KIQ.ncols(), q2+1));
7768  if (!CC.KIQ(i1,q2)) {
7769  // allocate a new matrix. Here we do not know the size as
7770  // it may change dynamically, but for now, just use the
7771  // size of root->tensor()
7772  gis.condensation_tensors.push_back
7773  (std::make_shared<base_tensor>(root->tensor()));
7774  GMM_ASSERT1(root->tensor().size(1) == s2,
7775  "Internal error");
7776  CC.KIQ(i1,q2) = gis.condensation_tensors.back().get();
7777  pgai = std::make_shared<ga_instruction_copy_vect>
7778  (CC.KIQ(i1,q2)->as_vector(), root->tensor().as_vector());
7779  } else {
7780  // an extra matrix for this entry has already been
7781  // allocated, so just add the current tensor to it
7782  pgai = std::make_shared<ga_instruction_add_to>
7783  (*CC.KIQ(i1,q2), root->tensor());
7784  }
7785  rmi.instructions.push_back(std::move(pgai));
7786  } else if (!workspace.is_internal_variable(root->name_test1) &&
7787  !workspace.is_internal_variable(root->name_test2)) {
7788 
7789  if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced())
7790  || has_var_group1 || has_var_group2)
7791  gis.unreduced_terms.emplace(root->name_test1,
7792  root->name_test2);
7793 
7794  auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
7795  auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
7796  auto &Kux = (mf2 && mf2->is_reduced()) ? Kuu : Kur;
7797  auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
7798  auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
7799 
7800  const scalar_type
7801  &alpha1 = workspace.factor_of_variable(root->name_test1),
7802  &alpha2 = workspace.factor_of_variable(root->name_test2);
7803 
7804  if (has_var_group1) {
7805  ga_instruction_set::variable_group_info
7806  &vgi1 = rmi.interpolate_infos[intn1]
7807  .groups_info[root->name_test1];
7808  if (has_var_group2) {
7809  ga_instruction_set::variable_group_info
7810  &vgi2 = rmi.interpolate_infos[intn2]
7811  .groups_info[root->name_test2];
7812  pgai = std::make_shared
7813  <ga_instruction_matrix_assembly_mf_mf>
7814  (root->tensor(), Krr, Kru, Kur, Kuu, ctx1, ctx2,
7815  vgi1, vgi2,
7816  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7817  } else {
7818  const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
7819  ? workspace.temporary_interval_of_variable
7820  (root->name_test2)
7821  : workspace.interval_of_variable(root->name_test2);
7822  if (mf2)
7823  pgai = std::make_shared
7824  <ga_instruction_matrix_assembly_mf_mf>
7825  (root->tensor(), Krx, Kux, ctx1, ctx2,
7826  vgi1, I2, *mf2, alpha2,
7827  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7828  else // for global variable imd2 == 0
7829  pgai = std::make_shared
7830  <ga_instruction_matrix_assembly_mf_imd>
7831  (root->tensor(), Krr, Kur, ctx1, ctx2,
7832  vgi1, I2, imd2, alpha2, gis.coeff, gis.ipt);
7833  }
7834  } else { // !has_var_group1
7835  const gmm::sub_interval &I1 = mf1 && mf1->is_reduced()
7836  ? workspace.temporary_interval_of_variable
7837  (root->name_test1)
7838  : workspace.interval_of_variable(root->name_test1);
7839  if (has_var_group2) {
7840  ga_instruction_set::variable_group_info
7841  &vgi2 = rmi.interpolate_infos[intn2]
7842  .groups_info[root->name_test2];
7843  if (mf1)
7844  pgai = std::make_shared
7845  <ga_instruction_matrix_assembly_mf_mf>
7846  (root->tensor(), Kxr, Kxu, ctx1, ctx2,
7847  I1, *mf1, alpha1, vgi2,
7848  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7849  else // for global variable imd1 == 0
7850  pgai = std::make_shared
7851  <ga_instruction_matrix_assembly_imd_mf>
7852  (root->tensor(), Krr, Kru, ctx1, ctx2,
7853  I1, imd1, alpha1, vgi2, gis.coeff, gis.ipt);
7854  } else { // !has_var_group2
7855  const gmm::sub_interval &I2 = mf2 && mf2->is_reduced()
7856  ? workspace.temporary_interval_of_variable
7857  (root->name_test2)
7858  : workspace.interval_of_variable(root->name_test2);
7859  if (mf1 && mf2)
7860  pgai = std::make_shared
7861  <ga_instruction_matrix_assembly_mf_mf>
7862  (root->tensor(), Kxx, ctx1, ctx2,
7863  I1, *mf1, alpha1, I2, *mf2, alpha2,
7864  gis.coeff, gis.nbpt, gis.ipt, interpolate);
7865  else if (mf1) // for global variable imd2 == 0
7866  pgai = std::make_shared
7867  <ga_instruction_matrix_assembly_mf_imd>
7868  (root->tensor(), Kxr, ctx1, ctx2,
7869  I1, *mf1, alpha1, I2, imd2, alpha2,
7870  gis.coeff, gis.ipt);
7871  else if (mf2)
7872  pgai = std::make_shared
7873  <ga_instruction_matrix_assembly_imd_mf>
7874  (root->tensor(), Krx, ctx1, ctx2,
7875  I1, imd1, alpha1, I2, *mf2, alpha2,
7876  gis.coeff, gis.ipt);
7877  else
7878  pgai = std::make_shared
7879  <ga_instruction_matrix_assembly_imd_imd>
7880  (root->tensor(), Krr, ctx1, ctx2,
7881  I1, imd1, alpha1, I2, imd2, alpha2,
7882  gis.coeff, gis.ipt);
7883  }
7884  }
7885  } // if (!simple)
7886  break;
7887  } // case 2
7888  } // switch(order)
7889  if (pgai)
7890  rmi.instructions.push_back(std::move(pgai));
7891  }
7892  } // if (root)
7893  } // if (td.order == order || td.order == size_type(-1))
7894  } // for (const ga_workspace::tree_description &td : trees_of_current_phase)
7895 
7896  if (condensation && order == 2 && phase == ga_workspace::ASSEMBLY) {
7897 
7898  auto &Krr = workspace.assembled_matrix();
7899  auto &Kru = workspace.col_unreduced_matrix();
7900  auto &Kur = workspace.row_unreduced_matrix();
7901  auto &Kuu = workspace.row_col_unreduced_matrix();
7902 
7903  for (auto &&key_val : condensations) {
7904  const ga_instruction_set::region_mim rm = key_val.first;
7905  condensation_description &CC = key_val.second;
7906  auto &rmi = gis.all_instructions[rm];
7907 
7908  CC.KQJpr.resize(CC.KQJ.nrows(), CC.KQJ.ncols());
7909  for (size_type k=0; k < CC.KQJpr.size(); ++k) {
7910  gis.condensation_tensors.push_back // memory allocation
7911  (std::make_shared<base_tensor>(2,2));
7912  CC.KQJpr[k] = gis.condensation_tensors.back().get();
7913  }
7914 
7915  pga_instruction pgai;
7916 
7917  // Add one diagonal/subdiagonal condensation instruction per cluster
7918  for (size_type k=0; k < CC.Qclusters.size(); ++k) {
7919  // extract condensed variables residuals from
7920  // workspace.assembled_vector() into RQpr
7921  for (size_type q1 : CC.Qclusters[k]) {
7922  std::string name_test1 = CC.Qvars[q1];
7923  const im_data *imd1 = workspace.associated_im_data(name_test1);
7924  const gmm::sub_interval
7925  &I1 = workspace.interval_of_variable(name_test1);
7926  pgai =
7927  std::make_shared<ga_instruction_extract_residual_on_imd_dofs>
7928  (*(CC.RQpr[q1]), workspace.cached_vector(), // cached_V --> CC.RQpr[q1]
7929  gis.ctx, I1, *imd1, gis.ipt);
7930  rmi.instructions.push_back(std::move(pgai));
7931  }
7932 
7933  // the exec() of this instruction calculates KQJpr including any
7934  // necessary size update to match the sizes of KQJ, upon size change
7935  // of primary variables J
7936  pgai = std::make_shared<ga_instruction_condensation_sub>
7937  (CC.KQJpr, CC.RQpr, CC.KQQ, CC.KQJ, CC.Qclusters[k], gis.coeff); // factor_of_variable()?
7938  rmi.instructions.push_back(std::move(pgai));
7939 
7940  // assemble/store KQJpr/RQpr matrices/vectors into the
7941  // corresponding global matrix/vector
7942  for (size_type q1 : CC.Qclusters[k]) {
7943  std::string name_test1 = CC.Qvars[q1];
7944  const im_data *imd1 = workspace.associated_im_data(name_test1);
7945 // const scalar_type
7946 // &alpha1 = workspace.factor_of_variable(name_test1); // TODO
7947  const gmm::sub_interval
7948  &I1 = workspace.interval_of_variable(name_test1);
7949  GMM_ASSERT1(imd1, "Internal error");
7950  for (size_type j2 : CC.Jclusters[k]) {
7951  std::string name_test2 = CC.Jvars[j2];
7952  const mesh_fem *mf2 = workspace.associated_mf(name_test2); // TODO: name_test2 variable group
7953  const im_data *imd2 = workspace.associated_im_data(name_test2);
7954 // const std::string &intn2 = root->interpolate_name_test2;
7955 // GMM_ASSERT1(intn2.empty(), "Coupling of internal variables "
7956 // "with interpolated variables not "
7957 // "implemented yet");
7958 // const scalar_type
7959 // &alpha2 = workspace.factor_of_variable(name_test2); // TODO
7960  const gmm::sub_interval
7961  &I2 = mf2 && mf2->is_reduced()
7962  ? workspace.temporary_interval_of_variable(name_test2)
7963  : workspace.interval_of_variable(name_test2);
7964  const base_tensor &Kq1j2pr = *(CC.KQJpr(q1,j2)); // <- input
7965  model_real_sparse_matrix
7966  &KQJpr = mf2 && mf2->is_reduced()
7967  ? workspace.col_unreduced_matrix()
7968  : workspace.internal_coupling_matrix(); // <- output
7969  if (mf2) {
7970  pgai =
7971  std::make_shared<ga_instruction_matrix_assembly_imd_mf>
7972  (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
7973  I1, imd1, gis.ONE, I2, *mf2, gis.ONE, gis.ONE, gis.ipt); // without gis.coeff
7974  // TODO: name_test2 variable group
7975  if (mf2->is_reduced())
7976  gis.unreduced_terms.emplace(name_test1, name_test2);
7977  } else // for global variable imd2 == 0
7978  pgai =
7979  std::make_shared<ga_instruction_matrix_assembly_imd_imd>
7980  (Kq1j2pr, KQJpr, gis.ctx, gis.ctx,
7981  I1, imd1, gis.ONE, I2, imd2, gis.ONE, gis.ONE, gis.ipt); // without gis.coeff
7982  rmi.instructions.push_back(std::move(pgai));
7983  } // for j2
7984  const bool initialize = true;
7985  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
7986  (*(CC.RQpr[q1]), workspace.assembled_vector(), // <- overwriting internal variables residual with internal solution
7987  gis.ctx, I1, *imd1, gis.ONE, gis.ipt, initialize); // without gis.coeff
7988  rmi.instructions.push_back(std::move(pgai));
7989  } // for q1
7990  }
7991 
7992  // Add superdiagonal condensation instructions
7993  for (size_type i1=0; i1 < CC.Ivars.size(); ++i1) {
7994 
7995  std::string name_test1 = CC.Ivars[i1];
7996  const mesh_fem *mf1 = workspace.associated_mf(name_test1); // TODO: name_test1 variable group
7997  const im_data *imd1 = workspace.associated_im_data(name_test1);
7998  const scalar_type
7999  &alpha1 = workspace.factor_of_variable(name_test1);
8000  const gmm::sub_interval
8001  &I1 = mf1 && mf1->is_reduced()
8002  ? workspace.temporary_interval_of_variable(name_test1)
8003  : workspace.interval_of_variable(name_test1);
8004 
8005  // Q_of_J[j2] will hold all condensed variables q that couple
8006  // variable i1 to each variable j2
8007  std::vector<std::set<size_type>> Q_of_J(CC.Jvars.size());
8008  for (size_type q=0; q < CC.Qvars.size(); ++q)
8009  if (CC.KIQ(i1,q)) {
8010  size_type cid = CC.cluster_of_Qvar[q];
8011  for (size_type j : CC.Jclusters[cid])
8012  Q_of_J[j].insert(q);
8013  }
8014 
8015  for (size_type j2=0; j2 < CC.Jvars.size(); ++j2) {
8016  if (Q_of_J[j2].size()) { // a coupling between i1 and j2 exists
8017  std::vector<base_tensor *> Ki1Q, KQj2;
8018  for (size_type q : Q_of_J[j2]) {
8019  Ki1Q.push_back(CC.KIQ(i1,q));
8020  KQj2.push_back(CC.KQJpr(q,j2));
8021  }
8022  // allocate a tensor for storing the coupling between i1 and j2
8023  gis.condensation_tensors.push_back
8024  (std::make_shared<base_tensor>());
8025  base_tensor &Kij = *gis.condensation_tensors.back();
8026  pgai = std::make_shared<ga_instruction_condensation_super_K>
8027  (Kij, Ki1Q, KQj2);
8028  rmi.instructions.push_back(std::move(pgai));
8029  // add assembly instruction
8030  std::string name_test2 = CC.Jvars[j2];
8031  const mesh_fem *mf2 = workspace.associated_mf(name_test2); // TODO: name_test2 variable group
8032  const im_data *imd2 = workspace.associated_im_data(name_test2);
8033  // Here assuming interpolate_name_test1.empty() &&
8034  // interpolate_name_test2.empty() &&
8035  // !(secondary1 || secondary2) && !interpolate;
8036  const scalar_type
8037  &alpha2 = workspace.factor_of_variable(name_test2);
8038  const gmm::sub_interval
8039  &I2 = mf2 && mf2->is_reduced()
8040  ? workspace.temporary_interval_of_variable(name_test2)
8041  : workspace.interval_of_variable(name_test2);
8042 
8043  auto &Kxu = (mf1 && mf1->is_reduced()) ? Kuu : Kru;
8044  auto &Kxr = (mf1 && mf1->is_reduced()) ? Kur : Krr;
8045  auto &Krx = (mf2 && mf2->is_reduced()) ? Kru : Krr;
8046  auto &Kxx = (mf2 && mf2->is_reduced()) ? Kxu : Kxr;
8047 
8048  if ((mf1 && mf1->is_reduced()) || (mf2 && mf2->is_reduced()))
8049  gis.unreduced_terms.emplace(name_test1, name_test2);
8050 
8051  if (mf1 && mf2) // TODO: name_test1 or name_test2 variable group
8052  pgai = std::make_shared
8053  <ga_instruction_matrix_assembly_mf_mf>
8054  (Kij, Kxx, gis.ctx, gis.ctx,
8055  I1, *mf1, alpha1, I2, *mf2, alpha2,
8056  gis.coeff, gis.nbpt, gis.ipt, false);
8057  else if (mf1) // for global variable imd2 == 0
8058  pgai = std::make_shared
8059  <ga_instruction_matrix_assembly_mf_imd>
8060  (Kij, Kxr, gis.ctx, gis.ctx,
8061  I1, *mf1, alpha1, I2, imd2, alpha2,
8062  gis.coeff, gis.ipt);
8063  else if (mf2)
8064  pgai = std::make_shared
8065  <ga_instruction_matrix_assembly_imd_mf>
8066  (Kij, Krx, gis.ctx, gis.ctx,
8067  I1, imd1, alpha1, I2, *mf2, alpha2,
8068  gis.coeff, gis.ipt);
8069  else
8070  pgai = std::make_shared
8071  <ga_instruction_matrix_assembly_imd_imd>
8072  (Kij, Krr, gis.ctx, gis.ctx,
8073  I1, imd1, alpha1, I2, imd2, alpha2,
8074  gis.coeff, gis.ipt);
8075  rmi.instructions.push_back(std::move(pgai));
8076  } // if (Q_of_J[j2].size())
8077  } // for j2
8078 
8079  // RHS condensation instructions
8080  std::vector<base_tensor *> Ki1Q, RQpr;
8081  for (size_type q=0; q < CC.Qvars.size(); ++q)
8082  if (CC.KIQ(i1,q)) {
8083  Ki1Q.push_back(CC.KIQ(i1,q));
8084  RQpr.push_back(CC.RQpr[q]);
8085  }
8086  gis.condensation_tensors.push_back
8087  (std::make_shared<base_tensor>());
8088  base_tensor &Ri = *gis.condensation_tensors.back();
8089  pgai = std::make_shared<ga_instruction_condensation_super_R>
8090  (Ri, Ki1Q, RQpr);
8091  rmi.instructions.push_back(std::move(pgai));
8092 
8093  base_vector &R = mf1->is_reduced() ? workspace.unreduced_vector()
8094  : workspace.assembled_vector();
8095  if (mf1)
8096  pgai = std::make_shared<ga_instruction_vector_assembly_mf>
8097  (Ri, R, gis.ctx, I1, *mf1, gis.coeff, gis.nbpt, gis.ipt, false);
8098  else if (imd1)
8099  pgai = std::make_shared<ga_instruction_vector_assembly_imd>
8100  (Ri, R, gis.ctx, I1, *imd1, gis.coeff, gis.ipt);
8101  else
8102  pgai = std::make_shared<ga_instruction_vector_assembly>
8103  (Ri, R, I1, gis.coeff);
8104  rmi.instructions.push_back(std::move(pgai));
8105  } // for i1
8106  } // for (const auto &key_val : condensations)
8107  } // if (phase == ga_workspace::ASSEMBLY)
8108  } // for (const auto &phase : phases)
8109 
8110  } // ga_compile(...)
8111 
8112 
8113 
8114  //=========================================================================
8115  // Execution of a compiled set of assembly terms
8116  //=========================================================================
8117 
8118 
8119  void ga_function_exec(ga_instruction_set &gis) {
8120 
8121  for (auto &&instr : gis.all_instructions) {
8122  const auto &gil = instr.second.instructions;
8123  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8124  }
8125  }
8126 
8127  void ga_interpolation_exec(ga_instruction_set &gis,
8128  ga_workspace &workspace,
8129  ga_interpolation_context &gic) {
8130  base_matrix G;
8131  base_small_vector un, up;
8132 
8133  for (const std::string &t : gis.transformations)
8134  workspace.interpolate_transformation(t)->init(workspace);
8135 
8136  for (auto &&instr : gis.all_instructions) {
8137 
8138  const getfem::mesh_im &mim = *(instr.first.mim());
8139  const mesh_region &region = *(instr.first.region());
8140  const getfem::mesh &m = *(instr.second.m);
8141  GMM_ASSERT1(&m == &(gic.linked_mesh()),
8142  "Incompatibility of meshes in interpolation");
8143  const auto &gilb = instr.second.begin_instructions;
8144  const auto &gile = instr.second.elt_instructions;
8145  const auto &gil = instr.second.instructions;
8146 
8147  // iteration on elements (or faces of elements)
8148  std::vector<size_type> ind;
8149  auto pai_old = papprox_integration{};
8150  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
8151  if (gic.use_mim()) {
8152  if (!mim.convex_index().is_in(v.cv())) continue;
8153  gis.pai = mim.int_method_of_element(v.cv())->approx_method();
8154  } else
8155  gis.pai = 0;
8156 
8157  ind.resize(0);
8158  bgeot::pstored_point_tab pspt
8159  = gic.ppoints_for_element(v.cv(), v.f(), ind);
8160 
8161  if (pspt.get() && ind.size() && pspt->size()) {
8162  m.points_of_convex(v.cv(), G);
8163  bgeot::pgeometric_trans pgt = m.trans_of_convex(v.cv());
8164  up.resize(G.nrows());
8165  un.resize(pgt->dim());
8166 
8167  if (gis.ctx.have_pgp() && gis.ctx.pgt() == pgt && pai_old == gis.pai) {
8168  gis.ctx.change(gis.ctx.pgp(), 0, 0, G, v.cv(), v.f());
8169  } else {
8170  if (!(gic.use_pgp(v.cv()))) {
8171  gis.ctx.change(pgt, 0, (*pspt)[0], G, v.cv(), v.f());
8172  } else {
8173  gis.ctx.change(gis.gp_pool(pgt, pspt), 0, 0, G, v.cv(), v.f());
8174  }
8175  }
8176  pai_old = gis.pai;
8177 
8178  if (gis.need_elt_size)
8179  gis.elt_size = m.convex_radius_estimate(v.cv()) * scalar_type(2);
8180 
8181  // iterations on interpolation points
8182  gis.nbpt = pspt->size();
8183  for (size_type ii = 0; ii < ind.size(); ++ii) {
8184  gis.ipt = ii;
8185  if (gis.ctx.have_pgp()) gis.ctx.set_ii(ind[ii]);
8186  else gis.ctx.set_xref((*pspt)[gis.ipt]);
8187 
8188  if (ii == 0 || !(pgt->is_linear())) {
8189  // Computation of unit normal vector in case of a boundary
8190  if (v.f() != short_type(-1)) {
8191  const base_matrix& B = gis.ctx.B();
8192  gmm::copy(pgt->normals()[v.f()], un);
8193  gmm::mult(B, un, up);
8194  scalar_type nup = gmm::vect_norm2(up);
8195  gmm::scale(up,1.0/nup);
8196  gmm::clean(up, 1e-13);
8197  gis.Normal = up;
8198  } else gis.Normal.resize(0);
8199  }
8200  gmm::clear(workspace.assembled_tensor().as_vector());
8201  if (ii == 0) {
8202  for (size_type j = 0; j < gilb.size(); ++j) j += gilb[j]->exec();
8203  for (size_type j = 0; j < gile.size(); ++j) j += gile[j]->exec();
8204  }
8205  for (size_type j = 0; j < gil.size(); ++j) j += gil[j]->exec();
8206  gic.store_result(v.cv(), ind[ii], workspace.assembled_tensor());
8207  }
8208  }
8209  }
8210  }
8211  for (const std::string &t : gis.transformations)
8212  workspace.interpolate_transformation(t)->finalize();
8213 
8214  gic.finalize();
8215  }
8216 
8217  void ga_exec(ga_instruction_set &gis, ga_workspace &workspace) {
8218  base_matrix G1, G2;
8219  base_small_vector un;
8220  scalar_type J1(0), J2(0);
8221 
8222  for (const std::string &t : gis.transformations)
8223  workspace.interpolate_transformation(t)->init(workspace);
8224 
8225  for (auto &instr : gis.all_instructions) {
8226  const getfem::mesh_im &mim = *(instr.first.mim());
8227  psecondary_domain psd = instr.first.psd();
8228  const getfem::mesh &m = *(instr.second.m);
8229  GMM_ASSERT1(&m == &(mim.linked_mesh()), "Incompatibility of meshes");
8230  const auto &gilb = instr.second.begin_instructions;
8231  const auto &gile = instr.second.elt_instructions;
8232  const auto &gil = instr.second.instructions;
8233 
8234  // if (gilb.size()) cout << "Begin instructions\n";
8235  // for (size_type j = 0; j < gilb.size(); ++j)
8236  // cout << typeid(*(gilb[j])).name() << endl;
8237  // if (gile.size()) cout << "\nElement instructions\n";
8238  // for (size_type j = 0; j < gile.size(); ++j)
8239  // cout << typeid(*(gile[j])).name() << endl;
8240  // cout << "\nGauss pt instructions\n";
8241  // for (size_type j = 0; j < gil.size(); ++j)
8242  // cout << typeid(*(gil[j])).name() << endl;
8243 
8244  if (!psd) { // standard integration on a single domain
8245 
8246  const mesh_region &region = *(instr.first.region());
8247 
8248  // iteration on elements (or faces of elements)
8249  size_type old_cv = size_type(-1);
8250  bgeot::pgeometric_trans pgt = 0, pgt_old = 0;
8251  pintegration_method pim = 0;
8252  papprox_integration pai = 0;
8253  bgeot::pstored_point_tab pspt = 0, old_pspt = 0;
8254  bgeot::pgeotrans_precomp pgp = 0;
8255  bool first_gp = true;
8256  for (getfem::mr_visitor v(region, m, true); !v.finished(); ++v) {
8257  if (mim.convex_index().is_in(v.cv())) {
8258  // cout << "proceed with elt " << v.cv() << " face " << v.f()<<endl;
8259  if (v.cv() != old_cv) {
8260  pgt = m.trans_of_convex(v.cv());
8261  pim = mim.int_method_of_element(v.cv());
8262  m.points_of_convex(v.cv(), G1);
8263 
8264  if (pim->type() == IM_NONE) continue;
8265  GMM_ASSERT1(pim->type() == IM_APPROX, "Sorry, exact methods "
8266  "cannot be used in high level generic assembly");
8267  pai = pim->approx_method();
8268  pspt = pai->pintegration_points();
8269  if (pspt->size()) {
8270  if (pgp && gis.pai == pai && pgt_old == pgt) {
8271  gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8272  } else {
8273  if (pai->is_built_on_the_fly()) {
8274  gis.ctx.change(pgt, 0, (*pspt)[0], G1, v.cv(), v.f());
8275  pgp = 0;
8276  } else {
8277  pgp = gis.gp_pool(pgt, pspt);
8278  gis.ctx.change(pgp, 0, 0, G1, v.cv(), v.f());
8279  }
8280  pgt_old = pgt; gis.pai = pai;
8281  }
8282  if (gis.need_elt_size)
8283  gis.elt_size = convex_radius_estimate(pgt, G1)*scalar_type(2);
8284  }
8285  old_cv = v.cv();
8286  } else {
8287  if (pim->type() == IM_NONE) continue;
8288  gis.ctx.set_face_num(v.f());
8289  }
8290  if (pspt != old_pspt) { first_gp = true; old_pspt = pspt; }
8291  if (pspt->size()) {
8292  // Iterations on Gauss points
8293  size_type first_ind = 0;
8294  if (v.f() != short_type(-1)) {
8295  gis.nbpt = pai->nb_points_on_face(v.f());
8296  first_ind = pai->ind_first_point_on_face(v.f());
8297  } else {
8298  gis.nbpt = pai->nb_points_on_convex();
8299  }
8300  for (gis.ipt = 0; gis.ipt < gis.nbpt; ++(gis.ipt)) {
8301  if (pgp) gis.ctx.set_ii(first_ind+gis.ipt);
8302  else gis.ctx.set_xref((*pspt)[first_ind+gis.ipt]);
8303  if (gis.ipt == 0 || !(pgt->is_linear())) {
8304  J1 = gis.ctx.J();
8305  // Computation of unit normal vector in case of a boundary
8306  if (v.f() != short_type(-1)) {
8307  gis.Normal.resize(G1.nrows());
8308  un.resize(pgt->dim());
8309  gmm::copy(pgt->normals()[v.f()], un);
8310  gmm::mult(gis.ctx.B(), un, gis.Normal);
8311  scalar_type nup = gmm::vect_norm2(gis.Normal);
8312  J1 *= nup;
8313  gmm::scale(gis.Normal, 1.0/nup);
8314  gmm::clean(gis.Normal, 1e-13);
8315  } else gis.Normal.resize(0);
8316  }
8317  auto ipt_coeff = pai->coeff(first_ind+gis.ipt);
8318  gis.coeff = J1 * ipt_coeff;
8319  bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8320  workspace.include_empty_int_points());
8321  if (!enable_ipt) gis.coeff = scalar_type(0);
8322  if (first_gp) {
8323  for (size_type j=0; j < gilb.size(); ++j) j+=gilb[j]->exec();
8324  first_gp = false;
8325  }
8326  if (gis.ipt == 0) {
8327  for (size_type j=0; j < gile.size(); ++j) j+=gile[j]->exec();
8328  }
8329  if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8330  for (size_type j=0; j < gil.size(); ++j) j+=gil[j]->exec();
8331  }
8332  GA_DEBUG_INFO("");
8333  }
8334  }
8335  }
8336  }
8337  GA_DEBUG_INFO("-----------------------------");
8338 
8339  } else { // Integration on the product of two domains (secondary domain)
8340 
8341  auto &sdi = instr.second.secondary_domain_infos;
8342  const mesh_region &region1 = *(instr.first.region());
8343 
8344  // iteration on elements (or faces of elements)
8345  size_type old_cv1=size_type(-1), old_cv2=size_type(-1);
8346  size_type nbpt1 = 0, nbpt2 = 0;
8347  bgeot::pgeometric_trans pgt1 = 0, pgt1_old = 0, pgt2 = 0, pgt2_old = 0;
8348  pintegration_method pim1 = 0, pim2 = 0;
8349  papprox_integration pai1 = 0, pai2 = 0;
8350  bgeot::pstored_point_tab pspt1=0, old_pspt1=0, pspt2=0, old_pspt2=0;
8351  bgeot::pgeotrans_precomp pgp1 = 0, pgp2 = 0;
8352  bool first_gp = true;
8353  for (getfem::mr_visitor v1(region1, m, true); !v1.finished(); ++v1) {
8354  if (mim.convex_index().is_in(v1.cv())) {
8355  // cout << "proceed with elt " << v1.cv()<<" face " << v1.f()<<endl;
8356  if (v1.cv() != old_cv1) {
8357  pgt1 = m.trans_of_convex(v1.cv());
8358  pim1 = mim.int_method_of_element(v1.cv());
8359  m.points_of_convex(v1.cv(), G1);
8360 
8361  if (pim1->type() == IM_NONE) continue;
8362  GMM_ASSERT1(pim1->type() == IM_APPROX, "Sorry, exact methods "
8363  "cannot be used in high level generic assembly");
8364  pai1 = pim1->approx_method();
8365  pspt1 = pai1->pintegration_points();
8366  if (pspt1->size()) {
8367  if (pgp1 && gis.pai == pai1 && pgt1_old == pgt1) {
8368  gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8369  } else {
8370  if (pai1->is_built_on_the_fly()) {
8371  gis.ctx.change(pgt1, 0, (*pspt1)[0], G1, v1.cv(), v1.f());
8372  pgp1 = 0;
8373  } else {
8374  pgp1 = gis.gp_pool(pgt1, pspt1);
8375  gis.ctx.change(pgp1, 0, 0, G1, v1.cv(), v1.f());
8376  }
8377  pgt1_old = pgt1; gis.pai = pai1;
8378  }
8379  if (gis.need_elt_size)
8380  gis.elt_size = convex_radius_estimate(pgt1,G1)*scalar_type(2);
8381  }
8382  old_cv1 = v1.cv();
8383  } else {
8384  if (pim1->type() == IM_NONE) continue;
8385  gis.ctx.set_face_num(v1.f());
8386  }
8387  if (pspt1 != old_pspt1) { first_gp = true; old_pspt1 = pspt1; }
8388  if (pspt1->size()) {
8389  // iterations on Gauss points
8390  size_type first_ind1 = 0;
8391  if (v1.f() != short_type(-1)) {
8392  nbpt1 = pai1->nb_points_on_face(v1.f());
8393  first_ind1 = pai1->ind_first_point_on_face(v1.f());
8394  } else {
8395  nbpt1 = pai1->nb_points_on_convex();
8396  }
8397 
8398  const mesh &m2 = psd->mim().linked_mesh();
8399  const mesh_region &region2 = psd->give_region(m, v1.cv(), v1.f());
8400  for (getfem::mr_visitor v2(region2, m2, true);
8401  !v2.finished(); ++v2) {
8402  if (v2.cv() != old_cv2) {
8403  pgt2 = m2.trans_of_convex(v2.cv());
8404  pim2 = psd->mim().int_method_of_element(v2.cv());
8405  m2.points_of_convex(v2.cv(), G2);
8406 
8407  if (pim2->type() == IM_NONE) continue;
8408  GMM_ASSERT1(pim2->type() == IM_APPROX, "Sorry, exact methods "
8409  "cannot be used in high level generic assembly");
8410  pai2 = pim2->approx_method();
8411  pspt2 = pai2->pintegration_points();
8412  if (pspt2->size()) {
8413  if (pgp2 && sdi.pai == pai2 && pgt2_old == pgt2) {
8414  sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8415  } else {
8416  if (pai2->is_built_on_the_fly()) {
8417  sdi.ctx.change(pgt2, 0, (*pspt2)[0], G2,v2.cv(),v2.f());
8418  pgp2 = 0;
8419  } else {
8420  pgp2 = gis.gp_pool(pgt2, pspt2);
8421  sdi.ctx.change(pgp2, 0, 0, G2, v2.cv(), v2.f());
8422  }
8423  pgt2_old = pgt2; sdi.pai = pai2;
8424  }
8425  }
8426  old_cv2 = v2.cv();
8427  } else {
8428  if (pim2->type() == IM_NONE) continue;
8429  sdi.ctx.set_face_num(v2.f());
8430  }
8431  if (pspt2 != old_pspt2) { first_gp = true; old_pspt2 = pspt2; }
8432  if (pspt2->size()) {
8433  // iterations on Gauss points
8434  size_type first_ind2 = 0;
8435  if (v2.f() != short_type(-1)) {
8436  nbpt2 = pai2->nb_points_on_face(v2.f());
8437  first_ind2 = pai2->ind_first_point_on_face(v2.f());
8438  } else {
8439  nbpt2 = gis.nbpt = pai2->nb_points_on_convex();
8440  }
8441  gis.nbpt = nbpt1 * nbpt2;
8442  gis.ipt = 0;
8443  for (size_type ipt1=0; ipt1 < nbpt1; ++ipt1) {
8444  for (size_type ipt2=0; ipt2 < nbpt2; ++ipt2, ++(gis.ipt)) {
8445 
8446  if (pgp1) gis.ctx.set_ii(first_ind1+ipt1);
8447  else gis.ctx.set_xref((*pspt1)[first_ind1+ipt1]);
8448  if (pgp2) sdi.ctx.set_ii(first_ind2+ipt2);
8449  else sdi.ctx.set_xref((*pspt2)[first_ind2+ipt2]);
8450 
8451  if (gis.ipt == 0 || !(pgt1->is_linear())) {
8452  J1 = gis.ctx.J();
8453  if (v1.f() != short_type(-1)) {
8454  gis.Normal.resize(G1.nrows());
8455  un.resize(pgt1->dim());
8456  gmm::copy(pgt1->normals()[v1.f()], un);
8457  gmm::mult(gis.ctx.B(), un, gis.Normal);
8458  scalar_type nup = gmm::vect_norm2(gis.Normal);
8459  J1 *= nup;
8460  gmm::scale(gis.Normal, 1.0/nup);
8461  gmm::clean(gis.Normal, 1e-13);
8462  } else gis.Normal.resize(0);
8463  }
8464 
8465  if (gis.ipt == 0 || !(pgt2->is_linear())) {
8466  J2 = sdi.ctx.J();
8467  if (v2.f() != short_type(-1)) {
8468  sdi.Normal.resize(G2.nrows());
8469  un.resize(pgt2->dim());
8470  gmm::copy(pgt2->normals()[v2.f()], un);
8471  gmm::mult(sdi.ctx.B(), un, sdi.Normal);
8472  scalar_type nup = gmm::vect_norm2(sdi.Normal);
8473  J2 *= nup;
8474  gmm::scale(sdi.Normal, 1.0/nup);
8475  gmm::clean(sdi.Normal, 1e-13);
8476  } else sdi.Normal.resize(0);
8477  }
8478 
8479  auto ipt_coeff = pai1->coeff(first_ind1+ipt1)
8480  * pai2->coeff(first_ind2+ipt2);
8481  gis.coeff = J1 * J2 * ipt_coeff;
8482  bool enable_ipt = (gmm::abs(ipt_coeff) > 0.0 ||
8483  workspace.include_empty_int_points());
8484  if (!enable_ipt) gis.coeff = scalar_type(0);
8485 
8486  if (first_gp) {
8487  for (size_type j=0; j < gilb.size(); ++j)
8488  j+=gilb[j]->exec();
8489  first_gp = false;
8490  }
8491  if (gis.ipt == 0) {
8492  for (size_type j=0; j < gile.size(); ++j)
8493  j+=gile[j]->exec();
8494  }
8495  if (enable_ipt || gis.ipt == 0 || gis.ipt == gis.nbpt-1) {
8496  for (size_type j=0; j < gil.size(); ++j)
8497  j+=gil[j]->exec();
8498  }
8499  GA_DEBUG_INFO("");
8500  }
8501  }
8502  }
8503  }
8504  }
8505  }
8506  }
8507  GA_DEBUG_INFO("-----------------------------");
8508  }
8509 
8510  }
8511 
8512  for (const std::string &t : gis.transformations)
8513  workspace.interpolate_transformation(t)->finalize();
8514  }
8515 
8516 
8517 } /* end of namespace */
dal::singleton::instance
static T & instance()
Instance from the current thread.
Definition: dal_singleton.h:165
bgeot::compute_normal
base_small_vector compute_normal(const geotrans_interpolation_context &c, size_type face)
norm of returned vector is the ratio between the face surface on the real element and the face surfac...
Definition: bgeot_geometric_trans.cc:1082
bgeot::geotrans_inv_convex
does the inversion of the geometric transformation for a given convex
Definition: bgeot_geotrans_inv.h:64
getfem::mesh_im::convex_index
const dal::bit_vector & convex_index(void) const
Get the set of convexes where an integration method has been assigned.
Definition: getfem_mesh_im.h:73
gmm::resize
void resize(M &v, size_type m, size_type n)
*‍/
Definition: gmm_blas.h:231
bgeot::size_type
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:49
gmm::clear
void clear(L &l)
clear (fill with zeros) a vector or matrix.
Definition: gmm_blas.h:59
getfem::mesh_im
Describe an integration method linked to a mesh.
Definition: getfem_mesh_im.h:47
getfem_generic_assembly_semantic.h
Semantic analysis of assembly trees and semantic manipulations.
getfem_generic_assembly_tree.h
Compilation and execution operations.
getfem::convex_radius_estimate
scalar_type APIDECL convex_radius_estimate(bgeot::pgeometric_trans pgt, const base_matrix &pts)
rough estimate of the radius of the convex using the largest eigenvalue of the jacobian of the geomet...
Definition: getfem_mesh.cc:798
bgeot::geotrans_precomp_pool
The object geotrans_precomp_pool Allow to allocate a certain number of geotrans_precomp and automatic...
Definition: bgeot_geometric_trans.h:383
bgeot::short_type
gmm::uint16_type short_type
used as the common short type integer in the library
Definition: bgeot_config.h:72
getfem
GEneric Tool for Finite Element Methods.
Definition: getfem_accumulated_distro.h:46
getfem::mesh_region::intersection
static mesh_region intersection(const mesh_region &a, const mesh_region &b)
return the intersection of two mesh regions
Definition: getfem_mesh_region.cc:391
getfem::mesh::convex_radius_estimate
virtual scalar_type convex_radius_estimate(size_type ic) const
Return an estimate of the convex largest dimension.
Definition: getfem_mesh.cc:461
getfem::mesh_region::visitor
"iterator" class for regions.
Definition: getfem_mesh_region.h:237
gmm::vect_norm2
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norm2(const V &v)
Euclidean norm of a vector.
Definition: gmm_blas.h:557
getfem::pfem
std::shared_ptr< const getfem::virtual_fem > pfem
type of pointer on a fem description
Definition: getfem_fem.h:244
gmm::rsvector
sparse vector built upon std::vector.
Definition: gmm_def.h:488
bgeot::alpha
size_type alpha(short_type n, short_type d)
Return the value of which is the number of monomials of a polynomial of variables and degree .
Definition: bgeot_poly.cc:47
getfem::slice_vector_on_basic_dof_of_element
void slice_vector_on_basic_dof_of_element(const mesh_fem &mf, const VEC1 &vec, size_type cv, VEC2 &coeff, size_type qmult1=size_type(-1), size_type qmult2=size_type(-1))
Given a mesh_fem.
Definition: getfem_mesh_fem.h:659
getfem::mesh
Describe a mesh (collection of convexes (elements) and points).
Definition: getfem_mesh.h:95
bgeot::pgeometric_trans
std::shared_ptr< const bgeot::geometric_trans > pgeometric_trans
pointer type for a geometric transformation
Definition: bgeot_geometric_trans.h:186
getfem::mesh_im::linked_mesh
const mesh & linked_mesh() const
Give a reference to the linked mesh of type mesh.
Definition: getfem_mesh_im.h:79
getfem::mesh_im::int_method_of_element
virtual pintegration_method int_method_of_element(size_type cv) const
return the integration method associated with an element (in no integration is associated,...
Definition: getfem_mesh_im.h:117
bgeot::geotrans_inv_convex::invert
bool invert(const base_node &n, base_node &n_ref, scalar_type IN_EPS=1e-12, bool project_into_element=false)
given the node on the real element, returns the node on the reference element (even if it is outside ...
Definition: bgeot_geotrans_inv.cc:61
bgeot::stored_point_tab
Point tab storage.
Definition: bgeot_convex_ref.h:49
getfem_mesh_im_level_set.h
a subclass of mesh_im which is conformal to a number of level sets.
gmm::vect_norminf
number_traits< typename linalg_traits< V >::value_type >::magnitude_type vect_norminf(const V &v)
Infinity norm of a vector.
Definition: gmm_blas.h:693