9. 矩阵和向量的组装#

Firedrake 通过 TSFC [HMLH18] 生成局部组装内核 (如单元刚度矩阵的组装), 然后使用 PyOP2 构建全局内核. TSFC 调用 FInAT [KM19] 生成基函数的求值公式, 并使用 loopy 生成代码.

9.1. 查看生成的 C 代码#

from firedrake import *
from textwrap import indent

mesh = RectangleMesh(10, 10, 1, 1)
V = FunctionSpace(mesh, 'CG', 1)
u, v = TrialFunction(V), TestFunction(V)

x, y = SpatialCoordinate(mesh)
f = Function(V, name='f').interpolate(sin(x))
f_tilde = conditional(f > 0.8, f, 0)

a = u*v*dx - f_tilde*v*dx

uh = Function(V, name='u_h')
prob = LinearVariationalProblem(lhs(a), rhs(a), uh)
solver = LinearVariationalSolver(prob)

solver.solve()
firedrake:WARNING OMP_NUM_THREADS is not set or is set to a value greater than 1, we suggest setting OMP_NUM_THREADS=1 to improve performance

9.1.1. 组装矩阵的 C 代码#

print("Code for mass matrix:\n")
assembler = solver._ctx._assemble_jac.__self__
for parloop in assembler.parloops(solver._ctx._jac):
    kernel = parloop.global_kernel
    local_kernel = kernel.local_kernel
    print(indent(kernel.code_to_compile, "  "))
Code for mass matrix:
  #include <complex.h>
  #include <math.h>
  #include <petsc.h>
  #include <petsc.h>
  #include <stdint.h>
  #include <stdbool.h>
  #include <math.h>

  static void form00_cell_integral(double *__restrict__ A, double const *__restrict__ coords);
  static void form00_cell_integral(double *__restrict__ A, double const *__restrict__ coords)
  {
    double t0[3 * 3] = { 0.6666666666666666, 0.1666666666666667, 0.16666666666666666, 0.16666666666666669, 0.16666666666666669, 0.6666666666666665, 0.16666666666666669, 0.6666666666666666, 0.16666666666666666 };
    double t1;
    double t2;
    double t3;
    double t4[3] = { 0.16666666666666666, 0.16666666666666666, 0.16666666666666666 };
    double t5;
    double t6;

    t1 = -1.0 * coords[0];
    t2 = -1.0 * coords[1];
    t3 = fabs((t1 + coords[2]) * (t2 + coords[5]) + -1.0 * (t1 + coords[4]) * (t2 + coords[3]));
    for (int32_t ip = 0; ip <= 2; ++ip)
    {
      t5 = t4[ip] * t3;
      for (int32_t j = 0; j <= 2; ++j)
      {
        t6 = t0[3 * ip + j] * t5;
        for (int32_t k = 0; k <= 2; ++k)
          A[3 * j + k] = A[3 * j + k] + t0[3 * ip + k] * t6;
      }
    }

  }

  void wrap_form00_cell_integral(int32_t const start, int32_t const end, Mat const mat0, double const *__restrict__ dat0, int32_t const *__restrict__ map0)
  {
    double t0[3 * 3];
    double t1[3 * 2];

    for (int32_t n = start; n <= -1 + end; ++n)
    {
      {
        int32_t const i21 = 0;

        for (int32_t i22 = 0; i22 <= 2; ++i22)
          for (int32_t i23 = 0; i23 <= 1; ++i23)
            t1[2 * i22 + i23] = dat0[2 * map0[3 * n + i22] + i23];
      }
      {
        int32_t const i15 = 0;

        for (int32_t i16 = 0; i16 <= 2; ++i16)
        {
          int32_t const i17 = 0;

          {
            int32_t const i18 = 0;

            for (int32_t i19 = 0; i19 <= 2; ++i19)
            {
              int32_t const i20 = 0;

              t0[3 * i16 + i19] = (double) (0.0);
            }
          }
        }
      }
      form00_cell_integral(&(t0[0]), &(t1[0]));
      MatSetValuesBlockedLocal(mat0, 3, &(map0[3 * n]), 3, &(map0[3 * n]), &(t0[0]), ADD_VALUES);
    }
  }

9.1.2. 组装右端项的 C 代码#

print("\n\nCode for right hand:\n")
residual_assembler = solver._ctx._assemble_residual.__self__
for parloop in residual_assembler.parloops(solver._ctx._F):
    kernel = parloop.global_kernel
    print(indent(kernel.code_to_compile, "  "))
Code for right hand:
  #include <complex.h>
  #include <math.h>
  #include <petsc.h>
  #include <stdbool.h>
  #include <stdint.h>
  #include <stdbool.h>
  #include <math.h>

  static void form0_cell_integral(double *__restrict__ A, double const *__restrict__ coords, double const *__restrict__ w_0, double const *__restrict__ w_1);
  static void form0_cell_integral(double *__restrict__ A, double const *__restrict__ coords, double const *__restrict__ w_0, double const *__restrict__ w_1)
  {
    double t0[3 * 3] = { 0.6666666666666666, 0.1666666666666667, 0.16666666666666666, 0.16666666666666669, 0.16666666666666669, 0.6666666666666665, 0.16666666666666669, 0.6666666666666666, 0.16666666666666666 };
    double t1;
    double t2;
    double t3;
    double t4[3] = { 0.16666666666666666, 0.16666666666666666, 0.16666666666666666 };
    double t5;
    double t6;

    t1 = -1.0 * coords[0];
    t2 = -1.0 * coords[1];
    t3 = fabs((t1 + coords[2]) * (t2 + coords[5]) + -1.0 * (t1 + coords[4]) * (t2 + coords[3]));
    for (int32_t ip = 0; ip <= 2; ++ip)
    {
      t5 = t0[3 * ip] * w_0[0] + t0[1 + 3 * ip] * w_0[1] + t0[2 + 3 * ip] * w_0[2];
      t6 = t4[ip] * t3 * (-1.0 * ((t5 > 0.8) ? t5 : (double) (0.0)) + t0[3 * ip] * w_1[0] + t0[1 + 3 * ip] * w_1[1] + t0[2 + 3 * ip] * w_1[2]);
      for (int32_t j = 0; j <= 2; ++j)
        A[j] = A[j] + t0[3 * ip + j] * t6;
    }

  }

  void wrap_form0_cell_integral(int32_t const start, int32_t const end, double *__restrict__ dat0, double const *__restrict__ dat1, double const *__restrict__ dat2, double const *__restrict__ dat3, int32_t const *__restrict__ map0)
  {
    double t0[3];
    double t1[3 * 2];
    double t2[3];
    double t3[3];

    for (int32_t n = start; n <= -1 + end; ++n)
    {
      for (int32_t i19 = 0; i19 <= 2; ++i19)
      {
        {
          int32_t const i23 = 0;

          {
            int32_t const i24 = 0;

            t3[i19] = dat3[map0[3 * n + i19]];
          }
        }
        {
          int32_t const i21 = 0;

          {
            int32_t const i22 = 0;

            t2[i19] = dat2[map0[3 * n + i19]];
          }
        }
        {
          int32_t const i18 = 0;

          for (int32_t i20 = 0; i20 <= 1; ++i20)
            t1[2 * i19 + i20] = dat1[2 * map0[3 * n + i19] + i20];
        }
      }
      {
        int32_t const i15 = 0;

        for (int32_t i16 = 0; i16 <= 2; ++i16)
        {
          int32_t const i17 = 0;

          t0[i16] = (double) (0.0);
        }
      }
      form0_cell_integral(&(t0[0]), &(t1[0]), &(t2[0]), &(t3[0]));
      {
        int32_t const i13 = 0;

        {
          int32_t const i14 = 0;

          for (int32_t i12 = 0; i12 <= 2; ++i12)
            dat0[map0[3 * n + i12]] = dat0[map0[3 * n + i12]] + t0[i12];
        }
      }
    }
  }

9.2. 生成向量和矩阵组装代码的代码#

  1. ParloopFormAssembler.local_kernels

  2. ParloopFormAssembler.parloopbuilder

  3. _GlobalKernelBuilder.build

以下代码来自 firedrake/preconditioner/patch.py, 它的作用为生成局部内核, 即单元上矩阵或残差的组装代码.

def matrix_funptr(form, state):
    from firedrake.tsfc_interface import compile_form
    test, trial = map(operator.methodcaller("function_space"), form.arguments())
    if test != trial:
        raise NotImplementedError("Only for matching test and trial spaces")

    if state is not None:
        interface = make_builder(dont_split=(state, ))
    else:
        interface = None

    kernels = compile_form(form, "subspace_form", split=False, interface=interface)

    cell_kernels = []
    int_facet_kernels = []
    for kernel in kernels:
        kinfo = kernel.kinfo

        # OK, now we've validated the kernel, let's build the callback
        args = []

        if kinfo.integral_type == "cell":
            get_map = operator.methodcaller("cell_node_map")
            kernels = cell_kernels
        elif kinfo.integral_type == "interior_facet":
            get_map = operator.methodcaller("interior_facet_node_map")
            kernels = int_facet_kernels
        else:
            get_map = None

        # build args
        # ...

        wrapper_knl_args = tuple(a.global_kernel_arg for a in args)
        mod = op2.GlobalKernel(kinfo.kernel, wrapper_knl_args, subset=True)
        kernels.append(CompiledKernel(mod.compile(iterset.comm), kinfo))

    return cell_kernels, int_facet_kernels

9.3. 双线性形式#

9.3.1. 创建双线性形式#

import firedrake as fd

mesh = fd.RectangleMesh(10, 10, 1, 1)
V1 = fd.FunctionSpace(mesh, 'CG', 1)
V2 = fd.FunctionSpace(mesh, 'CG', 2)

U = V1*V2

u, v = fd.TrialFunction(U), fd.TestFunction(U)

g = fd.Function(U)
h = fd.Function(V1)
r = fd.Function(V2)

form = fd.inner(fd.grad(u[0]*h*g[0]), fd.grad(v[0]))*fd.dx + fd.inner(u[1]*g[1]*r, v[1])*fd.dx

9.3.2. 查看 Form 表达式#

from ufl.formatting import ufl2unicode

# The unicode string can not show correctly in jupyter file
ustr = ufl2unicode.ufl2unicode(form)
print(ustr)
∬[rest of domain] v⃗[1] w₁₉ u⃗[1] w⃗₁₅[1] + ∑[i{subscript_number(ii._count)}]((𝐠𝐫𝐚𝐝 v⃗)[0,i{subscript_number(ii._count)}] [w⃗₁₅[0] [u⃗[0] (𝐠𝐫𝐚𝐝 w₁₇)[i{subscript_number(ii._count)}] + [(𝐠𝐫𝐚𝐝 u⃗)[0,i{subscript_number(ii._count)}] ∀ i{subscript_number(ii._count)}][i{subscript_number(ii._count)}] w₁₇ ∀ i{subscript_number(ii._count)}][i{subscript_number(ii._count)}] + [(𝐠𝐫𝐚𝐝 w⃗₁₅)[0,i{subscript_number(ii._count)}] ∀ i{subscript_number(ii._count)}][i{subscript_number(ii._count)}] u⃗[0] w₁₇ ∀ i{subscript_number(ii._count)}][i{subscript_number(ii._count)}]) 𝐝𝐱
from ufl.utils.formatting import tree_format
print(tree_format(form))
Form:
    Integral:
        integral type: cell
        subdomain id: everywhere
        integrand:
            Conj
                Inner
                (
                    Grad
                        Indexed
                        (
                            Argument(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 0, None)
                            MultiIndex((FixedIndex(0),))
                        )
                    Grad
                        Product
                        (
                            Indexed
                            (
                                Coefficient(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 15)
                                MultiIndex((FixedIndex(0),))
                            )
                            Product
                            (
                                Indexed
                                (
                                    Argument(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 1, None)
                                    MultiIndex((FixedIndex(0),))
                                )
                                Coefficient(WithGeometry(FunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 17)
                            )
                        )
                )
    Integral:
        integral type: cell
        subdomain id: everywhere
        integrand:
            Product
            (
                Product
                (
                    Coefficient(WithGeometry(FunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 19)
                    Product
                    (
                        Indexed
                        (
                            Argument(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 1, None)
                            MultiIndex((FixedIndex(1),))
                        )
                        Indexed
                        (
                            Coefficient(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 15)
                            MultiIndex((FixedIndex(1),))
                        )
                    )
                )
                Conj
                    Indexed
                    (
                        Argument(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 0, None)
                        MultiIndex((FixedIndex(1),))
                    )
            )

9.4. 生成局部内核#

相关代码: firedrake.tsfc_interface.compile_form

from firedrake.tsfc_interface import compile_form

kernels = compile_form(form, "subspace_form", split=False, interface=None)

通过 loopy 生成代码

import loopy

idx, kinfo = kernels[0]
code = loopy.generate_code_v2(kinfo[0].code)

# print the code
# print(code.device_code())

9.4.1. 具体构造过程#

from firedrake.parameters import parameters as default_parameters
from firedrake.tsfc_interface import TSFCKernel
from tsfc.ufl_utils import extract_firedrake_constants

parameters = default_parameters["form_compiler"].copy()

nargs = len(form.arguments())
iterable = ([(None, )*nargs, form], )

idx, f = iterable[0]
name = "test_form"

numbering = form.terminal_numbering()
coefficient_numbers = tuple(
    numbering[c] for c in form.coefficients()
)
constant_numbers = tuple(
    numbering[c] for c in extract_firedrake_constants(f)
)
prefix = name + "".join(map(str, (i for i in idx if i is not None)))

kinfos = TSFCKernel(f, prefix, parameters,
                    coefficient_numbers,
                    constant_numbers,
                    interface=None, diagonal=False).kernels

9.4.2. TSFCKernel.__init__#

from tsfc import compile_form as tsfc_compile_form
from firedrake.tsfc_interface import as_pyop2_local_kernel, KernelInfo

tree = tsfc_compile_form(form=f, prefix=name, parameters=parameters,
                                 interface=None,
                                 diagonal=False)

_kernels = []
for kernel in tree:
    coefficient_numbers_per_kernel = tuple(
        (coefficient_numbers[index], subindices)
        for index, subindices in kernel.coefficient_numbers
    )
    constant_numbers_per_kernel = constant_numbers
    events = (kernel.event,)
    pyop2_kernel = as_pyop2_local_kernel(kernel.ast, kernel.name,
                                         len(kernel.arguments),
                                         flop_count=kernel.flop_count,
                                         events=events)
    _kernels.append(KernelInfo(kernel=pyop2_kernel,
                              integral_type=kernel.integral_type,
                              oriented=kernel.oriented,
                              subdomain_id=kernel.subdomain_id,
                              domain_number=kernel.domain_number,
                              coefficient_numbers=coefficient_numbers_per_kernel,
                              constant_numbers=constant_numbers_per_kernel,
                              needs_cell_facets=False,
                              pass_layer_arg=False,
                              needs_cell_sizes=kernel.needs_cell_sizes,
                              arguments=kernel.arguments,
                              events=events))

9.4.3. tsfc.driver.compile_form#

from tsfc.driver import compile_integral
from tsfc.parameters import default_parameters as tsfc_default_parameters, is_complex
from tsfc import fem, ufl_utils

complex_mode = parameters and is_complex(parameters.get("scalar_type"))

# Preprocess UFL form in a format suitable for TSFC
fd = ufl_utils.compute_form_data(form, complex_mode=complex_mode)

kernels = []
for integral_data in fd.integral_data:
    kernel = compile_integral(integral_data, fd, prefix=name, parameters=parameters, interface=None, diagonal=False)
    if kernel is not None:
        kernels.append(kernel)
# fd.preprocessed_form.arguments()
print(type(fd.integral_data[0]))
print(fd.integral_data[0].integrals[0].metadata())
<class 'ufl.algorithms.domain_analysis.IntegralData'>
{'estimated_polynomial_degree': 8}

9.4.4. tsfc.driver.compile_integral#

编译 ufl 积分表达式为组装核

interfaceNone 时, 默认使用 KernelBuilder 进行构建.

from tsfc.kernel_interface.firedrake_loopy import KernelBuilder

KernelBuilder.compile_integrand 调用 tsfc.kernel_interface.common.set_quad_rule 设置数值积分公式, 然后调用 tsfc.fem.compile_ufl 转化 ufl 表达式为 GEM [HMLH18]. 这里会调用 FInAT 生成基函数在积分点处的求值公式.

builder.construct_integrals 应用数值积分公式生成相应表达式.

9.5. 生成全局内核#

from pyop2 import op2
from firedrake.utils import cached_property, complex_mode, IntType
from firedrake.preconditioners.patch import LocalDat, LocalMat

import operator
import numpy

test, trial = map(operator.methodcaller("function_space"), form.arguments())

准备全局参数

args = []

if kinfo.integral_type == "cell":
    get_map = operator.methodcaller("cell_node_map")
elif kinfo.integral_type == "interior_facet":
    get_map = operator.methodcaller("interior_facet_node_map")
else:
    get_map = None

toset = op2.Set(1, comm=test.comm)
dofset = op2.DataSet(toset, 1)
arity = sum(m.arity*s.cdim
            for m, s in zip(get_map(test),
                            test.dof_dset))

iterset = get_map(test).iterset
entity_node_map = op2.Map(iterset,
                          toset, arity,
                          values=numpy.zeros(iterset.total_size*arity, dtype=IntType))
mat = LocalMat(dofset)

arg = mat(op2.INC, (entity_node_map, entity_node_map))
args.append(arg)

mesh = form.ufl_domains()[kinfo.domain_number]
arg = mesh.coordinates.dat(op2.READ, get_map(mesh.coordinates))
args.append(arg)
for n, indices in kinfo.coefficient_numbers:
    c = form.coefficients()[n]
    for ind in indices:
        c_ = c.subfunctions[ind]
        map_ = get_map(c_)
        arg = c_.dat(op2.READ, map_)
        args.append(arg)

if kinfo.integral_type == "interior_facet":
    arg = test.ufl_domain().interior_facets.local_facet_dat(op2.READ)
    args.append(arg)

wrapper_knl_args = tuple(a.global_kernel_arg for a in args)

构建全局组装内核

mod = op2.GlobalKernel(kinfo.kernel, wrapper_knl_args, subset=True)

输出代码

print(mod.code_to_compile)

References

[HMLH18] (1,2)

Miklós Homolya, Lawrence Mitchell, Fabio Luporini, and David A. Ham. TSFC: a structure-preserving form compiler. SIAM Journal on Scientific Computing, 40(3):C401–C428, jan 2018. doi:10.1137/17m1130642.

[KM19]

Robert C. Kirby and Lawrence Mitchell. Code generation for generally mapped finite elements. ACM Transactions on Mathematical Software, 45(4):1–23, dec 2019. doi:10.1145/3361745.