9. 矩阵和向量的组装#
Firedrake 通过 TSFC [HMLH18] 生成局部组装内核 (如单元刚度矩阵的组装), 然后使用 PyOP2 构建全局内核. TSFC 调用 FInAT
[KM19] 生成基函数的求值公式, 并使用 loopy 生成代码.
9.1. 查看生成的 C 代码#
from firedrake import *
from textwrap import indent
mesh = RectangleMesh(10, 10, 1, 1)
V = FunctionSpace(mesh, 'CG', 1)
u, v = TrialFunction(V), TestFunction(V)
x, y = SpatialCoordinate(mesh)
f = Function(V, name='f').interpolate(sin(x))
f_tilde = conditional(f > 0.8, f, 0)
a = u*v*dx - f_tilde*v*dx
uh = Function(V, name='u_h')
prob = LinearVariationalProblem(lhs(a), rhs(a), uh)
solver = LinearVariationalSolver(prob)
solver.solve()
firedrake:WARNING OMP_NUM_THREADS is not set or is set to a value greater than 1, we suggest setting OMP_NUM_THREADS=1 to improve performance
9.1.1. 组装矩阵的 C 代码#
print("Code for mass matrix:\n")
assembler = solver._ctx._assemble_jac.__self__
for parloop in assembler.parloops(solver._ctx._jac):
kernel = parloop.global_kernel
local_kernel = kernel.local_kernel
print(indent(kernel.code_to_compile, " "))
Code for mass matrix:
#include <complex.h>
#include <math.h>
#include <petsc.h>
#include <petsc.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
static void form00_cell_integral(double *__restrict__ A, double const *__restrict__ coords);
static void form00_cell_integral(double *__restrict__ A, double const *__restrict__ coords)
{
double t0[3 * 3] = { 0.6666666666666666, 0.1666666666666667, 0.16666666666666666, 0.16666666666666669, 0.16666666666666669, 0.6666666666666665, 0.16666666666666669, 0.6666666666666666, 0.16666666666666666 };
double t1;
double t2;
double t3;
double t4[3] = { 0.16666666666666666, 0.16666666666666666, 0.16666666666666666 };
double t5;
double t6;
t1 = -1.0 * coords[0];
t2 = -1.0 * coords[1];
t3 = fabs((t1 + coords[2]) * (t2 + coords[5]) + -1.0 * (t1 + coords[4]) * (t2 + coords[3]));
for (int32_t ip = 0; ip <= 2; ++ip)
{
t5 = t4[ip] * t3;
for (int32_t j = 0; j <= 2; ++j)
{
t6 = t0[3 * ip + j] * t5;
for (int32_t k = 0; k <= 2; ++k)
A[3 * j + k] = A[3 * j + k] + t0[3 * ip + k] * t6;
}
}
}
void wrap_form00_cell_integral(int32_t const start, int32_t const end, Mat const mat0, double const *__restrict__ dat0, int32_t const *__restrict__ map0)
{
double t0[3 * 3];
double t1[3 * 2];
for (int32_t n = start; n <= -1 + end; ++n)
{
{
int32_t const i21 = 0;
for (int32_t i22 = 0; i22 <= 2; ++i22)
for (int32_t i23 = 0; i23 <= 1; ++i23)
t1[2 * i22 + i23] = dat0[2 * map0[3 * n + i22] + i23];
}
{
int32_t const i15 = 0;
for (int32_t i16 = 0; i16 <= 2; ++i16)
{
int32_t const i17 = 0;
{
int32_t const i18 = 0;
for (int32_t i19 = 0; i19 <= 2; ++i19)
{
int32_t const i20 = 0;
t0[3 * i16 + i19] = (double) (0.0);
}
}
}
}
form00_cell_integral(&(t0[0]), &(t1[0]));
MatSetValuesBlockedLocal(mat0, 3, &(map0[3 * n]), 3, &(map0[3 * n]), &(t0[0]), ADD_VALUES);
}
}
9.1.2. 组装右端项的 C 代码#
print("\n\nCode for right hand:\n")
residual_assembler = solver._ctx._assemble_residual.__self__
for parloop in residual_assembler.parloops(solver._ctx._F):
kernel = parloop.global_kernel
print(indent(kernel.code_to_compile, " "))
Code for right hand:
#include <complex.h>
#include <math.h>
#include <petsc.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
static void form0_cell_integral(double *__restrict__ A, double const *__restrict__ coords, double const *__restrict__ w_0, double const *__restrict__ w_1);
static void form0_cell_integral(double *__restrict__ A, double const *__restrict__ coords, double const *__restrict__ w_0, double const *__restrict__ w_1)
{
double t0[3 * 3] = { 0.6666666666666666, 0.1666666666666667, 0.16666666666666666, 0.16666666666666669, 0.16666666666666669, 0.6666666666666665, 0.16666666666666669, 0.6666666666666666, 0.16666666666666666 };
double t1;
double t2;
double t3;
double t4[3] = { 0.16666666666666666, 0.16666666666666666, 0.16666666666666666 };
double t5;
double t6;
t1 = -1.0 * coords[0];
t2 = -1.0 * coords[1];
t3 = fabs((t1 + coords[2]) * (t2 + coords[5]) + -1.0 * (t1 + coords[4]) * (t2 + coords[3]));
for (int32_t ip = 0; ip <= 2; ++ip)
{
t5 = t0[3 * ip] * w_0[0] + t0[1 + 3 * ip] * w_0[1] + t0[2 + 3 * ip] * w_0[2];
t6 = t4[ip] * t3 * (-1.0 * ((t5 > 0.8) ? t5 : (double) (0.0)) + t0[3 * ip] * w_1[0] + t0[1 + 3 * ip] * w_1[1] + t0[2 + 3 * ip] * w_1[2]);
for (int32_t j = 0; j <= 2; ++j)
A[j] = A[j] + t0[3 * ip + j] * t6;
}
}
void wrap_form0_cell_integral(int32_t const start, int32_t const end, double *__restrict__ dat0, double const *__restrict__ dat1, double const *__restrict__ dat2, double const *__restrict__ dat3, int32_t const *__restrict__ map0)
{
double t0[3];
double t1[3 * 2];
double t2[3];
double t3[3];
for (int32_t n = start; n <= -1 + end; ++n)
{
for (int32_t i19 = 0; i19 <= 2; ++i19)
{
{
int32_t const i23 = 0;
{
int32_t const i24 = 0;
t3[i19] = dat3[map0[3 * n + i19]];
}
}
{
int32_t const i21 = 0;
{
int32_t const i22 = 0;
t2[i19] = dat2[map0[3 * n + i19]];
}
}
{
int32_t const i18 = 0;
for (int32_t i20 = 0; i20 <= 1; ++i20)
t1[2 * i19 + i20] = dat1[2 * map0[3 * n + i19] + i20];
}
}
{
int32_t const i15 = 0;
for (int32_t i16 = 0; i16 <= 2; ++i16)
{
int32_t const i17 = 0;
t0[i16] = (double) (0.0);
}
}
form0_cell_integral(&(t0[0]), &(t1[0]), &(t2[0]), &(t3[0]));
{
int32_t const i13 = 0;
{
int32_t const i14 = 0;
for (int32_t i12 = 0; i12 <= 2; ++i12)
dat0[map0[3 * n + i12]] = dat0[map0[3 * n + i12]] + t0[i12];
}
}
}
}
9.2. 生成向量和矩阵组装代码的代码#
以下代码来自 firedrake/preconditioner/patch.py
, 它的作用为生成局部内核, 即单元上矩阵或残差的组装代码.
def matrix_funptr(form, state):
from firedrake.tsfc_interface import compile_form
test, trial = map(operator.methodcaller("function_space"), form.arguments())
if test != trial:
raise NotImplementedError("Only for matching test and trial spaces")
if state is not None:
interface = make_builder(dont_split=(state, ))
else:
interface = None
kernels = compile_form(form, "subspace_form", split=False, interface=interface)
cell_kernels = []
int_facet_kernels = []
for kernel in kernels:
kinfo = kernel.kinfo
# OK, now we've validated the kernel, let's build the callback
args = []
if kinfo.integral_type == "cell":
get_map = operator.methodcaller("cell_node_map")
kernels = cell_kernels
elif kinfo.integral_type == "interior_facet":
get_map = operator.methodcaller("interior_facet_node_map")
kernels = int_facet_kernels
else:
get_map = None
# build args
# ...
wrapper_knl_args = tuple(a.global_kernel_arg for a in args)
mod = op2.GlobalKernel(kinfo.kernel, wrapper_knl_args, subset=True)
kernels.append(CompiledKernel(mod.compile(iterset.comm), kinfo))
return cell_kernels, int_facet_kernels
9.3. 双线性形式#
9.3.1. 创建双线性形式#
import firedrake as fd
mesh = fd.RectangleMesh(10, 10, 1, 1)
V1 = fd.FunctionSpace(mesh, 'CG', 1)
V2 = fd.FunctionSpace(mesh, 'CG', 2)
U = V1*V2
u, v = fd.TrialFunction(U), fd.TestFunction(U)
g = fd.Function(U)
h = fd.Function(V1)
r = fd.Function(V2)
form = fd.inner(fd.grad(u[0]*h*g[0]), fd.grad(v[0]))*fd.dx + fd.inner(u[1]*g[1]*r, v[1])*fd.dx
9.3.2. 查看 Form 表达式#
from ufl.formatting import ufl2unicode
# The unicode string can not show correctly in jupyter file
ustr = ufl2unicode.ufl2unicode(form)
print(ustr)
∬[rest of domain] v⃗[1] w₁₉ u⃗[1] w⃗₁₅[1] + ∑[i{subscript_number(ii._count)}]((𝐠𝐫𝐚𝐝 v⃗)[0,i{subscript_number(ii._count)}] [w⃗₁₅[0] [u⃗[0] (𝐠𝐫𝐚𝐝 w₁₇)[i{subscript_number(ii._count)}] + [(𝐠𝐫𝐚𝐝 u⃗)[0,i{subscript_number(ii._count)}] ∀ i{subscript_number(ii._count)}][i{subscript_number(ii._count)}] w₁₇ ∀ i{subscript_number(ii._count)}][i{subscript_number(ii._count)}] + [(𝐠𝐫𝐚𝐝 w⃗₁₅)[0,i{subscript_number(ii._count)}] ∀ i{subscript_number(ii._count)}][i{subscript_number(ii._count)}] u⃗[0] w₁₇ ∀ i{subscript_number(ii._count)}][i{subscript_number(ii._count)}]) 𝐝𝐱
from ufl.utils.formatting import tree_format
print(tree_format(form))
Form:
Integral:
integral type: cell
subdomain id: everywhere
integrand:
Conj
Inner
(
Grad
Indexed
(
Argument(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 0, None)
MultiIndex((FixedIndex(0),))
)
Grad
Product
(
Indexed
(
Coefficient(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 15)
MultiIndex((FixedIndex(0),))
)
Product
(
Indexed
(
Argument(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 1, None)
MultiIndex((FixedIndex(0),))
)
Coefficient(WithGeometry(FunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 17)
)
)
)
Integral:
integral type: cell
subdomain id: everywhere
integrand:
Product
(
Product
(
Coefficient(WithGeometry(FunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 19)
Product
(
Indexed
(
Argument(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 1, None)
MultiIndex((FixedIndex(1),))
)
Indexed
(
Coefficient(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 15)
MultiIndex((FixedIndex(1),))
)
)
)
Conj
Indexed
(
Argument(WithGeometry(MixedFunctionSpace(IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 1), name=None, index=0, component=None), IndexedProxyFunctionSpace(<firedrake.mesh.MeshTopology object at 0x14d168c80>, FiniteElement('Lagrange', triangle, 2), name=None, index=1, component=None), name='None_None'), Mesh(VectorElement(FiniteElement('Lagrange', triangle, 1), dim=2), 8)), 0, None)
MultiIndex((FixedIndex(1),))
)
)
9.4. 生成局部内核#
相关代码: firedrake.tsfc_interface.compile_form
from firedrake.tsfc_interface import compile_form
kernels = compile_form(form, "subspace_form", split=False, interface=None)
通过 loopy
生成代码
import loopy
idx, kinfo = kernels[0]
code = loopy.generate_code_v2(kinfo[0].code)
# print the code
# print(code.device_code())
9.4.1. 具体构造过程#
from firedrake.parameters import parameters as default_parameters
from firedrake.tsfc_interface import TSFCKernel
from tsfc.ufl_utils import extract_firedrake_constants
parameters = default_parameters["form_compiler"].copy()
nargs = len(form.arguments())
iterable = ([(None, )*nargs, form], )
idx, f = iterable[0]
name = "test_form"
numbering = form.terminal_numbering()
coefficient_numbers = tuple(
numbering[c] for c in form.coefficients()
)
constant_numbers = tuple(
numbering[c] for c in extract_firedrake_constants(f)
)
prefix = name + "".join(map(str, (i for i in idx if i is not None)))
kinfos = TSFCKernel(f, prefix, parameters,
coefficient_numbers,
constant_numbers,
interface=None, diagonal=False).kernels
9.4.2. TSFCKernel.__init__
#
from tsfc import compile_form as tsfc_compile_form
from firedrake.tsfc_interface import as_pyop2_local_kernel, KernelInfo
tree = tsfc_compile_form(form=f, prefix=name, parameters=parameters,
interface=None,
diagonal=False)
_kernels = []
for kernel in tree:
coefficient_numbers_per_kernel = tuple(
(coefficient_numbers[index], subindices)
for index, subindices in kernel.coefficient_numbers
)
constant_numbers_per_kernel = constant_numbers
events = (kernel.event,)
pyop2_kernel = as_pyop2_local_kernel(kernel.ast, kernel.name,
len(kernel.arguments),
flop_count=kernel.flop_count,
events=events)
_kernels.append(KernelInfo(kernel=pyop2_kernel,
integral_type=kernel.integral_type,
oriented=kernel.oriented,
subdomain_id=kernel.subdomain_id,
domain_number=kernel.domain_number,
coefficient_numbers=coefficient_numbers_per_kernel,
constant_numbers=constant_numbers_per_kernel,
needs_cell_facets=False,
pass_layer_arg=False,
needs_cell_sizes=kernel.needs_cell_sizes,
arguments=kernel.arguments,
events=events))
9.4.3. tsfc.driver.compile_form
#
from tsfc.driver import compile_integral
from tsfc.parameters import default_parameters as tsfc_default_parameters, is_complex
from tsfc import fem, ufl_utils
complex_mode = parameters and is_complex(parameters.get("scalar_type"))
# Preprocess UFL form in a format suitable for TSFC
fd = ufl_utils.compute_form_data(form, complex_mode=complex_mode)
kernels = []
for integral_data in fd.integral_data:
kernel = compile_integral(integral_data, fd, prefix=name, parameters=parameters, interface=None, diagonal=False)
if kernel is not None:
kernels.append(kernel)
# fd.preprocessed_form.arguments()
print(type(fd.integral_data[0]))
print(fd.integral_data[0].integrals[0].metadata())
<class 'ufl.algorithms.domain_analysis.IntegralData'>
{'estimated_polynomial_degree': 8}
9.4.4. tsfc.driver.compile_integral
#
编译 ufl
积分表达式为组装核
interface
为 None
时, 默认使用 KernelBuilder
进行构建.
from tsfc.kernel_interface.firedrake_loopy import KernelBuilder
KernelBuilder.compile_integrand
调用
tsfc.kernel_interface.common.set_quad_rule
设置数值积分公式, 然后调用 tsfc.fem.compile_ufl
转化 ufl
表达式为 GEM
[HMLH18]. 这里会调用 FInAT
生成基函数在积分点处的求值公式.
builder.construct_integrals
应用数值积分公式生成相应表达式.
9.5. 生成全局内核#
from pyop2 import op2
from firedrake.utils import cached_property, complex_mode, IntType
from firedrake.preconditioners.patch import LocalDat, LocalMat
import operator
import numpy
test, trial = map(operator.methodcaller("function_space"), form.arguments())
准备全局参数
args = []
if kinfo.integral_type == "cell":
get_map = operator.methodcaller("cell_node_map")
elif kinfo.integral_type == "interior_facet":
get_map = operator.methodcaller("interior_facet_node_map")
else:
get_map = None
toset = op2.Set(1, comm=test.comm)
dofset = op2.DataSet(toset, 1)
arity = sum(m.arity*s.cdim
for m, s in zip(get_map(test),
test.dof_dset))
iterset = get_map(test).iterset
entity_node_map = op2.Map(iterset,
toset, arity,
values=numpy.zeros(iterset.total_size*arity, dtype=IntType))
mat = LocalMat(dofset)
arg = mat(op2.INC, (entity_node_map, entity_node_map))
args.append(arg)
mesh = form.ufl_domains()[kinfo.domain_number]
arg = mesh.coordinates.dat(op2.READ, get_map(mesh.coordinates))
args.append(arg)
for n, indices in kinfo.coefficient_numbers:
c = form.coefficients()[n]
for ind in indices:
c_ = c.subfunctions[ind]
map_ = get_map(c_)
arg = c_.dat(op2.READ, map_)
args.append(arg)
if kinfo.integral_type == "interior_facet":
arg = test.ufl_domain().interior_facets.local_facet_dat(op2.READ)
args.append(arg)
wrapper_knl_args = tuple(a.global_kernel_arg for a in args)
构建全局组装内核
mod = op2.GlobalKernel(kinfo.kernel, wrapper_knl_args, subset=True)
输出代码
print(mod.code_to_compile)
References
Miklós Homolya, Lawrence Mitchell, Fabio Luporini, and David A. Ham. TSFC: a structure-preserving form compiler. SIAM Journal on Scientific Computing, 40(3):C401–C428, jan 2018. doi:10.1137/17m1130642.
Robert C. Kirby and Lawrence Mitchell. Code generation for generally mapped finite elements. ACM Transactions on Mathematical Software, 45(4):1–23, dec 2019. doi:10.1145/3361745.