diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py index 6399f12559..c52075afb9 100644 --- a/devito/ir/clusters/algorithms.py +++ b/devito/ir/clusters/algorithms.py @@ -259,10 +259,18 @@ def guard(clusters): # Separate out the indirect ConditionalDimensions, which only serve # the purpose of protecting from OOB accesses cds = [d for d in cds if not d.indirect] + modes = [cd.relation for cd in cds] + if modes.count('strict') > 1: + print(modes, cds, {m == 'strict' for m in modes}) + raise CompilationError("Only one `strict` condition" + "can be used in an equation") + elif 'strict' in modes: + mode = 'strict' + else: + mode = sympy.And if sympy.And in modes else sympy.Or # Chain together all `cds` conditions from all expressions in `c` guards = {} - mode = sympy.Or for cd in cds: # `BOTTOM` parent implies a guard that lives outside of # any iteration space, which corresponds to the placeholder None @@ -279,7 +287,6 @@ def guard(clusters): # Pull `cd` from any expr condition = guards.setdefault(k, []) - mode = mode and cd.relation for e in exprs: try: condition.append(e.conditionals[cd]) @@ -296,7 +303,10 @@ def guard(clusters): # Combination `mode` is And by default. # If all conditions are Or then Or combination `mode` is used. - guards = {d: mode(*v, evaluate=False) for d, v in guards.items()} + if mode == 'strict': + guards = {d: v[0] for d, v in guards.items()} + else: + guards = {d: mode(*v, evaluate=False) for d, v in guards.items()} # Construct a guarded Cluster processed.append(c.rebuild(exprs=exprs, guards=guards)) diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py index 8d72704b79..bbf37fb836 100644 --- a/devito/ir/equations/equation.py +++ b/devito/ir/equations/equation.py @@ -11,7 +11,9 @@ ) from devito.symbolics import IntDiv, limits_mapper, uxreplace from devito.tools import Pickable, Tag, frozendict -from devito.types import Eq, Inc, ReduceMax, ReduceMin, ReduceMinMax, relational_min +from devito.types import ( + Eq, Inc, ReduceMax, ReduceMin, ReduceMinMax, relational_min, relational_shift +) __all__ = [ 'ClusterizedEq', @@ -222,7 +224,7 @@ def __new__(cls, *args, **kwargs): relations=ordering.relations, mode='partial') ispace = IterationSpace(intervals, iterators) - # Construct the conditionals and replace the ConditionalDimensions in `expr` + # Construct the conditionals conditionals = {} for d in ordering: if not d.is_Conditional: @@ -234,13 +236,31 @@ def __new__(cls, *args, **kwargs): if d._factor is not None: cond = d.relation(cond, GuardFactor(d)) conditionals[d] = cond + + # Merge conditionals when possible. E.g if we have an implicit_dim + # and there is a dimension with the same parent, we ca merged + # its condition + for d in input_expr.implicit_dims: + if d not in conditionals: + continue + for cd in dict(conditionals): + if cd.parent == d.parent and cd != d: + cond = conditionals.pop(d) + if d.relation == 'strict': + conditionals[cd] = conditionals[d] = cond + else: + mode = cd.relation and d.relation + conditionals[cd] = mode(cond, conditionals[cd]) + break + + # Replace the ConditionalDimensions in `expr` + for d, cond in conditionals.items(): # Replace dimension with index index = d.index if d.condition is not None and d in expr.free_symbols: - index = index - relational_min(d.condition, d.parent) - expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor)}) - - conditionals = frozendict(conditionals) + index = index - relational_min(cond, d.parent) + shift = relational_shift(cond, d.parent) + expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor) + shift}) # Lower all Differentiable operations into SymPy operations rhs = diff2sympy(expr.rhs) diff --git a/devito/ir/support/vector.py b/devito/ir/support/vector.py index 02e26e2a02..79d84605ba 100644 --- a/devito/ir/support/vector.py +++ b/devito/ir/support/vector.py @@ -128,6 +128,7 @@ def __lt__(self, other): return True elif q_positive(i): return False + raise TypeError("Non-comparable index functions") from e return False @@ -164,6 +165,7 @@ def __gt__(self, other): return True elif q_negative(i): return False + raise TypeError("Non-comparable index functions") from e return False @@ -203,6 +205,7 @@ def __le__(self, other): return True elif q_positive(i): return False + raise TypeError("Non-comparable index functions") from e # Note: unlike `__lt__`, if we end up here, then *it is* <=. For example, diff --git a/devito/passes/clusters/asynchrony.py b/devito/passes/clusters/asynchrony.py index e32190ddef..5bdc7b63b9 100644 --- a/devito/passes/clusters/asynchrony.py +++ b/devito/passes/clusters/asynchrony.py @@ -1,6 +1,6 @@ from collections import defaultdict -from sympy import true +from sympy import Mod, true from devito.ir import ( Backward, Forward, GuardBoundNext, PrefetchUpdate, Queue, ReleaseLock, SyncArray, @@ -78,7 +78,8 @@ def callback(self, clusters, prefix): d = self.key0(c0) if d is not dim: continue - + if d in c0.guards and not c0.guards[d].has(Mod): + continue protected = self._schedule_waitlocks(c0, d, clusters, locks, syncs) self._schedule_withlocks(c0, d, protected, locks, syncs) @@ -193,7 +194,7 @@ def memcpy_prefetch(clusters, key0, sregistry): if c.properties.is_prefetchable(d._defines): _actions_from_update_memcpy(c, d, clusters, actions, sregistry) elif d.is_Custom and is_integer(c.ispace[d].size): - _actions_from_init(c, d, actions) + _actions_from_init(c, d, clusters, actions) # Attach the computed Actions processed = [] @@ -214,7 +215,7 @@ def memcpy_prefetch(clusters, key0, sregistry): return processed -def _actions_from_init(c, d, actions): +def _actions_from_init(c, d, clusters, actions): e = c.exprs[0] function = e.rhs.function target = e.lhs.function diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py index 2a8c78e7fc..3f6ea45da6 100644 --- a/devito/passes/clusters/buffering.py +++ b/devito/passes/clusters/buffering.py @@ -1,9 +1,9 @@ from collections import defaultdict, namedtuple from functools import cached_property -from itertools import chain +from itertools import chain, groupby import numpy as np -from sympy import S +from sympy import Mod, S, simplify from devito.exceptions import CompilationError from devito.ir import ( @@ -14,7 +14,7 @@ from devito.passes.clusters.utils import is_memcpy from devito.symbolics import IntDiv, retrieve_functions, uxreplace from devito.tools import ( - Stamp, as_mapper, as_tuple, filter_ordered, flatten, frozendict, is_integer, + Stamp, as_list, as_mapper, as_tuple, filter_ordered, flatten, frozendict, is_integer, timed_pass ) from devito.types import Array, CustomDimension, Eq, ModuloDimension @@ -142,18 +142,22 @@ def callback(self, clusters, prefix): return clusters d = prefix[-1].dim - key = lambda f, *args: f in self.mapper + key = lambda f, *args: any(f == ff for ff, _ in self.mapper) bfmap = map_buffered_functions(clusters, key) # A BufferDescriptor is a simple data structure storing additional # information about a buffer, harvested from the subset of `clusters` # that access it - descriptors = {b: BufferDescriptor(f, b, bfmap[f]) - for f, b in self.mapper.items() - if f in bfmap} + descriptors = {} + for (f, g), b in self.mapper.items(): + if f in bfmap: + fc = [c for c in bfmap[f] if c.guards == g] + descriptors.setdefault(b, []).append(BufferDescriptor(f, b, fc, g)) # Are we inside the right `d`? - descriptors = {b: v for b, v in descriptors.items() if d in v.itdims} + descriptors = {b: [vi for vi in v if d in vi.itdims] + for b, v in descriptors.items()} + descriptors = {b: v for b, v in descriptors.items() if v} if not descriptors: return clusters @@ -168,22 +172,29 @@ def callback(self, clusters, prefix): # Substitution rules to replace buffered Functions with buffers # E.g., `usave[time+1, x+1, y+1] -> ub0[t1, x+1, y+1]` subs = {} - for b, v in descriptors.items(): - accesses = chain(*[c.scope[v.f] for c in v.clusters]) - index_mapper = {i: mds[(v.xd, i)] for i in v.indices} - for a in accesses: - subs[a.access] = b.indexed[[index_mapper.get(i, i) for i in a]] + for b, vb in descriptors.items(): + for v in vb: + for c in v.clusters: + if c.guards.get(d) != v.guards.get(d): + continue + subs.setdefault(c, {}) + accesses = c.scope[v.f] + index_mapper = {i: mds[(v.xd, i)] for i in v.indices} + for a in accesses: + subs[c][a.access] = b.indexed[[index_mapper.get(i, i) for i in a]] processed = [] for c in clusters: # If a buffer is read but never written, then we need to add # an Eq to step through the next slot # E.g., `ub[0, x] = usave[time+2, x]` - for _, v in descriptors.items(): + for v in chain.from_iterable(descriptors.values()): if not v.is_readonly: continue if c not in v.firstread: continue + if c.guards.get(d) != v.guards.get(d): + continue idxf = v.last_idx[c] idxb = mds[(v.xd, idxf)] @@ -201,9 +212,9 @@ def callback(self, clusters, prefix): guards = c.guards.xandg(v.xd, GuardBound(0, v.first_idx.f)) else: guards = c.guards - properties = c.properties.sequentialize(d) - if not isinstance(d, BufferDimension): + g = c.guards.get(d, None) + if not isinstance(d, BufferDimension) and (g is None or g.has(Mod)): properties = properties.prefetchable(d) # `c` may be a HaloTouch Cluster, so with no vision of the `bdims` properties = properties.parallelize(v.bdims).affine(v.bdims) @@ -213,7 +224,7 @@ def callback(self, clusters, prefix): processed.append(Cluster(expr, ispace, guards, properties, syncs)) # Substitute the buffered Functions with the buffers - exprs = [uxreplace(e, subs) for e in c.exprs] + exprs = [uxreplace(e, subs.get(c, {})) for e in c.exprs] ispace = c.ispace.augment(subiters) properties = c.properties.sequentialize(d) processed.append( @@ -222,11 +233,13 @@ def callback(self, clusters, prefix): # Append the copy-back if `c` is the last-write of some buffers # E.g., `usave[time+1, x] = ub[t1, x]` - for _, v in descriptors.items(): + for v in chain.from_iterable(descriptors.values()): if v.is_readonly: continue if c not in v.lastwrite: continue + if c.guards.get(d) != v.guards.get(d): + continue idxf = v.last_idx[c] idxb = mds[(v.xd, idxf)] @@ -261,36 +274,37 @@ def callback(self, clusters, prefix): return init + processed def _optimize(self, clusters, descriptors): - for b, v in descriptors.items(): - if v.is_writeonly: - # `b` might be written by multiple, potentially mutually - # exclusive, equations. For example, two equations that have or - # will have complementary guards, hence only one will be - # executed. In such a case, we can split the equations over - # separate IterationSpaces - key0 = lambda: Stamp() - elif v.is_readonly: - # `b` is read multiple times -- this could just be the case of - # coupled equations, so we more cautiously perform a - # "buffer-wise" splitting of the IterationSpaces (i.e., only - # relevant if there are at least two read-only buffers) - stamp = Stamp() - key0 = lambda: stamp # noqa: B023 - else: - continue - - processed = [] - for c in clusters: - if b not in c.functions: - processed.append(c) + for b, vb in descriptors.items(): + for v in vb: + if v.is_writeonly: + # `b` might be written by multiple, potentially mutually + # exclusive, equations. For example, two equations that have or + # will have complementary guards, hence only one will be + # executed. In such a case, we can split the equations over + # separate IterationSpaces + key0 = lambda: Stamp() + elif v.is_readonly: + # `b` is read multiple times -- this could just be the case of + # coupled equations, so we more cautiously perform a + # "buffer-wise" splitting of the IterationSpaces (i.e., only + # relevant if there are at least two read-only buffers) + stamp = Stamp() + key0 = lambda: stamp # noqa: B023 + else: continue - key1 = lambda d: not d._defines & v.dim._defines # noqa: B023 - dims = c.ispace.project(key1).itdims - ispace = c.ispace.lift(dims, key0()) - processed.append(c.rebuild(ispace=ispace)) + processed = [] + for c in clusters: + if b not in c.functions: + processed.append(c) + continue + + key1 = lambda d: not d._defines & v.dim._defines # noqa: B023 + dims = c.ispace.project(key1).itdims + ispace = c.ispace.lift(dims, key0()) + processed.append(c.rebuild(ispace=ispace)) - clusters = processed + clusters = processed return clusters @@ -306,7 +320,7 @@ def _reuse(self, init, clusters, descriptors): cbk = lambda v: v mapper = as_mapper(descriptors, key=lambda b: b._signature) - mapper = {k: cbk(v) for k, v in mapper.items() if cbk(v)} + mapper = {k: as_list([cbk(v) for v in vb if cbk(v)]) for k, vb in mapper.items()} subs = {} drop = set() @@ -357,60 +371,75 @@ def generate_buffers(clusters, key, sregistry, options, **kwargs): # {buffered Function -> Buffer} xds = {} mapper = {} + extras = {} for f, clusters in bfmap.items(): - exprs = flatten(c.exprs for c in clusters) - - bdims = key(f, exprs) - - dims = [d for d in f.dimensions if d not in bdims] - if len(dims) != 1: - raise CompilationError(f"Unsupported multi-dimensional `buffering` " - f"required by `{f}`") - dim = dims.pop() - - if is_buffering(exprs): - # Multi-level buffering - # NOTE: a bit rudimentary (we could go through the exprs one by one - # instead), but it's much shorter this way - buffers = [f for f in retrieve_functions(exprs) if f.is_Array] - assert len(buffers) == 1, "Unexpected form of multi-level buffering" - buffer, = buffers - xd = buffer.indices[dim] - else: - size = infer_buffer_size(f, dim, clusters) - - if async_degree is not None: - if async_degree < size: - warning( - 'Ignoring provided asynchronous degree as it would be ' - f'too small for the required buffer (provided {async_degree}, ' - f'but need at least {size} for `{f.name}`)' - ) - else: - size = async_degree + for k, ck in groupby(clusters, key=lambda c: c.guards): + ck = list(ck) + exprs = flatten(c.exprs for c in ck) + + bdims = key(f, exprs) + + dims = [d for d in f.dimensions if d not in bdims] + if len(dims) != 1: + raise CompilationError(f"Unsupported multi-dimensional `buffering` " + f"required by `{f}`") + dim = dims.pop() + if k and not dim._defines & k.keys(): + extras.setdefault(f, []).append(k) + continue - # A special CustomDimension to use in place of `dim` in the buffer - try: - xd = xds[(dim, size)] - except KeyError: - name = sregistry.make_name(prefix='db') - xd = xds[(dim, size)] = BufferDimension(name, 0, size-1, size, dim) - - # The buffer dimensions - dimensions = list(f.dimensions) - assert dim in f.dimensions - dimensions[dimensions.index(dim)] = xd - - # Finally create the actual buffer - cls = callback or Array - name = sregistry.make_name(prefix=f'{f.name}b') - # We specify the padding to match the input Function's one, so that - # the array can be used in place of the Function with valid strides - # Plain Array do not track mapped so we default to no padding - padding = 0 if cls is Array else f.padding - mapper[f] = cls(name=name, dimensions=dimensions, dtype=f.dtype, - padding=padding, grid=f.grid, halo=f.halo, - space='mapped', mapped=f, f=f) + if is_buffering(exprs): + # Multi-level buffering + # NOTE: a bit rudimentary (we could go through the exprs one by one + # instead), but it's much shorter this way + buffers = [f for f in retrieve_functions(exprs) if f.is_Array] + assert len(buffers) == 1, "Unexpected form of multi-level buffering" + buffer, = buffers + xd = buffer.indices[dim] + else: + + size = infer_buffer_size(f, dim, ck) + + if async_degree is not None: + if async_degree < size: + warning( + 'Ignoring provided asynchronous degree as it would be ' + 'too small for the required buffer' + f' (provided {async_degree}, ' + f'but need at least {size} for `{f.name}`)' + ) + else: + size = async_degree + + # A special CustomDimension to use in place of `dim` in the buffer + try: + xd = xds[(dim, size, k)] + except KeyError: + name = sregistry.make_name(prefix='db') + xd = xds[(dim, size, k)] = BufferDimension(name, 0, size-1, size, dim) + + # The buffer dimensions + dimensions = list(f.dimensions) + assert dim in f.dimensions + dimensions[dimensions.index(dim)] = xd + + # Finally create the actual buffer + cls = callback or Array + name = sregistry.make_name(prefix=f'{f.name}b') + # We specify the padding to match the input Function's one, so that + # the array can be used in place of the Function with valid strides + # Plain Array do not track mapped so we default to no padding + padding = 0 if cls is Array else f.padding + mapper[(f, k)] = cls(name=name, dimensions=dimensions, dtype=f.dtype, + padding=padding, grid=f.grid, halo=f.halo, + space='mapped', mapped=f, f=f) + + for f, k in extras.items(): + for (ff, kk) in dict(mapper): + if f == ff: + for ki in k: + if ki.keys() & set(mapper[(ff, kk)].dimensions): + mapper[(f, ki)] = mapper[(ff, kk)] return mapper @@ -430,10 +459,11 @@ def map_buffered_functions(clusters, key): class BufferDescriptor: - def __init__(self, f, b, clusters): + def __init__(self, f, b, clusters, guards): self.f = f self.b = b self.clusters = clusters + self.guards = guards self.xd, = b.find(BufferDimension) self.bdims = tuple(d for d in b.dimensions if d is not self.xd) @@ -443,7 +473,7 @@ def __init__(self, f, b, clusters): self.indices = extract_indices(f, self.dim, clusters) def __repr__(self): - return f"Descriptor[{self.f} -> {self.b}]" + return f"Descriptor[{self.f} -> {self.b}], {self.guards}" @property def size(self): @@ -658,7 +688,7 @@ def make_mds(descriptors, prefix, sregistry): inspecting all buffers so that ModuloDimensions are reused when possible. """ mds = defaultdict(int) - for v in descriptors.values(): + for v in chain.from_iterable(descriptors.values()): size = v.xd.symbolic_size if size == 1: @@ -675,7 +705,7 @@ def make_mds(descriptors, prefix, sregistry): key = lambda i: -np.inf if i - p == 0 else (i - p) # noqa: B023 indices = sorted(v.indices, key=key) - for i in indices: + for k, i in enumerate(indices): k = (v.xd, i) if k in mds: continue @@ -700,42 +730,42 @@ def init_buffers(descriptors, options): init_onwrite = options['buf-init-onwrite'] init = [] - for b, v in descriptors.items(): - f = v.f - - if v.is_read: - # Special case: avoid initialization in the case of double (or - # multiple) buffering because it's completely unnecessary - if v.is_double_buffering: - continue - - lhs = b.indexify()._subs(v.xd, v.first_idx.b) - rhs = f.indexify()._subs(v.dim, v.first_idx.f) + for b, vb in descriptors.items(): + for v in vb: + f = v.f + + if v.is_read: + # Special case: avoid initialization in the case of double (or + # multiple) buffering because it's completely unnecessary + if v.is_double_buffering: + continue + lhs = b.indexify()._subs(v.xd, v.first_idx.b) + rhs = f.indexify()._subs(v.dim, v.first_idx.f) - elif v.is_write and init_onwrite(f): - lhs = b.indexify() - rhs = S.Zero + elif v.is_write and init_onwrite(f): + lhs = b.indexify() + rhs = S.Zero - else: - continue + else: + continue - expr = Eq(lhs, rhs) - expr = lower_exprs(expr) + expr = Eq(lhs, rhs) + expr = lower_exprs(expr) - ispace = v.write_to + ispace = v.write_to - guards = {} - guards[None] = GuardBound(v.dim.root.symbolic_min, v.dim.root.symbolic_max) - if v.is_read: - guards[v.xd] = GuardBound(0, v.first_idx.f) + guards = {} + guards[None] = GuardBound(v.dim.root.symbolic_min, v.dim.root.symbolic_max) + if v.is_read: + guards[v.xd] = GuardBound(0, v.first_idx.f) - properties = Properties() - properties = properties.affine(ispace.itdims) - properties = properties.parallelize(ispace.itdims) + properties = Properties() + properties = properties.affine(ispace.itdims) + properties = properties.parallelize(ispace.itdims) - syncs = {None: [InitArray(None, b)]} + syncs = {None: [InitArray(None, b)]} - init.append(Cluster(expr, ispace, guards, properties, syncs)) + init.append(Cluster(expr, ispace, guards, properties, syncs)) return init @@ -775,7 +805,7 @@ def infer_buffer_size(f, dim, clusters): slots = [Vector(i) for i in slots] size = int((vmax(*slots) - vmin(*slots) + 1)[0]) - return size + return simplify(size) def offset_from_centre(d, indices): diff --git a/devito/types/dimension.py b/devito/types/dimension.py index 41c1f90b88..f5bb1e3757 100644 --- a/devito/types/dimension.py +++ b/devito/types/dimension.py @@ -912,6 +912,45 @@ class ConditionalDimension(DerivedDimension): } } + A ConditionalDimension may also be combined with other ConditionalDimensions + appearing in the same equation (for instance via ``implicit_dims``). The + ``relation`` argument controls how the individual conditions are merged: + + * ``And`` (default): all conditions must hold (mutually exclusive merging). + * ``Or``: any one condition is enough for the if-branch to fire. + * ``'strict'``: this condition takes precedence over every other condition + attached to the same equation. At most one strict condition is allowed + per equation. + + The example below sets every even ``x`` index to 1 via subsampling, and + additionally forces the right ``x`` edge to be written by combining the two + conditions with ``Or``. + + >>> from sympy import Or + >>> from devito import Grid, CondEq + >>> grid = Grid(shape=(10, 10)) + >>> x, y = grid.dimensions + >>> c_even = ConditionalDimension(name='ceven', parent=x, factor=2, relation=Or) + >>> c_edge = ConditionalDimension(name='cedge', parent=x, + ... condition=CondEq(x, x.symbolic_max), + ... relation=Or) + >>> f = Function(name='f', grid=grid, dimensions=(c_even, y), + ... shape=(5, 10), space_order=0) + >>> op = Operator(Eq(f, 1, implicit_dims=c_edge)) + + The Operator generates the following for-loop (pseudocode), where the two + ``Or`` conditions are fused into a single guard. + + .. code-block:: C + + for (int x = x_m; x <= x_M; x += 1) { + if (x == x_M || x%cevenf == 0) { + for (int y = y_m; y <= y_M; y += 1) { + f[x / cevenf][y] = 1; + } + } + } + """ is_NonlinearDerived = True diff --git a/devito/types/relational.py b/devito/types/relational.py index 731ec29bc7..bf2e625a41 100644 --- a/devito/types/relational.py +++ b/devito/types/relational.py @@ -3,7 +3,8 @@ import sympy -__all__ = ['Ge', 'Gt', 'Le', 'Lt', 'Ne', 'relational_max', 'relational_min'] +__all__ = ['Ge', 'Gt', 'Le', 'Lt', 'Ne', 'relational_max', 'relational_min', + 'relational_shift'] class AbstractRel: @@ -37,7 +38,7 @@ class Le(AbstractRel, sympy.Le): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples @@ -72,7 +73,7 @@ class Lt(AbstractRel, sympy.Lt): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples @@ -107,7 +108,7 @@ class Ge(AbstractRel, sympy.Ge): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples @@ -142,7 +143,7 @@ class Gt(AbstractRel, sympy.Gt): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples @@ -177,7 +178,7 @@ class Ne(AbstractRel, sympy.Ne): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples @@ -291,3 +292,40 @@ def _(expr, s): return expr.gts else: return sympy.S.Infinity + + +def relational_shift(expr, s): + """ + Infer shift incurred by the expression. Generally only + applies when a CondEq is used as it adds a single value. + """ + if not expr.has(s): + return 0 + + return _relational_shift(expr, s) + + +@singledispatch +def _relational_shift(s, expr): + return 0 + + +@_relational_shift.register(sympy.Or) +@_relational_shift.register(sympy.And) +def _(expr, s): + return sum([_relational_shift(e, s) for e in expr.args]) + + +@_relational_shift.register(sympy.Eq) +def _(expr, s): + if isinstance(expr.lhs, sympy.Mod): + return 0 + + try: + # Not a stepping dimension, no shift + assert not expr.lhs.is_Time + return 0 + except (AttributeError, AssertionError): + # Stepping dimension (time), requires shift + from devito.symbolics.extended_dtypes import INT + return INT(Ge(*expr.args)) diff --git a/examples/userapi/05_conditional_dimension.ipynb b/examples/userapi/05_conditional_dimension.ipynb index f0a7a1b531..a8e628545b 100644 --- a/examples/userapi/05_conditional_dimension.ipynb +++ b/examples/userapi/05_conditional_dimension.ipynb @@ -231,6 +231,45 @@ " }\n", " }\n", "\n", + " A ConditionalDimension may also be combined with other ConditionalDimensions\n", + " appearing in the same equation (for instance via ``implicit_dims``). The\n", + " ``relation`` argument controls how the individual conditions are merged:\n", + "\n", + " * ``And`` (default): all conditions must hold (mutually exclusive merging).\n", + " * ``Or``: any one condition is enough for the if-branch to fire.\n", + " * ``'strict'``: this condition takes precedence over every other condition\n", + " attached to the same equation. At most one strict condition is allowed\n", + " per equation.\n", + "\n", + " The example below sets every even ``x`` index to 1 via subsampling, and\n", + " additionally forces the right ``x`` edge to be written by combining the two\n", + " conditions with ``Or``.\n", + "\n", + " >>> from sympy import Or\n", + " >>> from devito import Grid, CondEq\n", + " >>> grid = Grid(shape=(10, 10))\n", + " >>> x, y = grid.dimensions\n", + " >>> c_even = ConditionalDimension(name='ceven', parent=x, factor=2, relation=Or)\n", + " >>> c_edge = ConditionalDimension(name='cedge', parent=x,\n", + " ... condition=CondEq(x, x.symbolic_max),\n", + " ... relation=Or)\n", + " >>> f = Function(name='f', grid=grid, dimensions=(c_even, y),\n", + " ... shape=(5, 10), space_order=0)\n", + " >>> op = Operator(Eq(f, 1, implicit_dims=c_edge))\n", + "\n", + " The Operator generates the following for-loop (pseudocode), where the two\n", + " ``Or`` conditions are fused into a single guard.\n", + "\n", + " .. code-block:: C\n", + "\n", + " for (int x = x_m; x <= x_M; x += 1) {\n", + " if (x == x_M || x%cevenf == 0) {\n", + " for (int y = y_m; y <= y_M; y += 1) {\n", + " f[x / cevenf][y] = 1;\n", + " }\n", + " }\n", + " }\n", + "\n", " \n" ] } @@ -566,13 +605,6 @@ "execution_count": 9, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Operator `Kernel` ran in 0.01 s\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -589,7 +621,20 @@ " }\n", " }\n", "}\n", - "STOP(section0,timers)\n", + "STOP(section0,timers)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Operator `Kernel` ran in 0.01 s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", " Data in g \n", " [ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15.]\n", @@ -833,7 +878,7 @@ "source": [ "# Example C: Combining ConditionalDimension\n", "\n", - "In some cases, a `ConditionalDimension` might be used in combination with an implicit_dim to handle specific cases. This combination can be made mutually exclusive (And) or inclusive (Or).\n", + "In some cases, a `ConditionalDimension` might be used in combination with an implicit_dim to handle specific cases. This combination can be made mutually exclusive (And, default), inclusive (Or) or `strict`. When the relation is set to `strict` then this condition takes precedence over all other conditions. There cannot be more than one strict condition within an equation.\n", "\n", "As an example, let's consider the following case:\n", "\n", @@ -852,8 +897,12 @@ "\n", "x, y = grid.dimensions\n", "c_even = ConditionalDimension(name='ceven', parent=x, factor=2, relation=Or)\n", - "c_edge = ConditionalDimension(name='cedge', parent=grid.dimensions[0], condition=CondEq(x, x.symbolic_max), relation=Or)\n", - "c_edge_a = ConditionalDimension(name='cedge', parent=grid.dimensions[0], condition=CondEq(x, x.symbolic_max-1))\n", + "c_edge = ConditionalDimension(name='cedge', parent=grid.dimensions[0],\n", + " condition=CondEq(x, x.symbolic_max), relation=Or)\n", + "c_edge_a = ConditionalDimension(name='cedge', parent=grid.dimensions[0],\n", + " condition=CondEq(x, x.symbolic_max-1))\n", + "c_edge_s = ConditionalDimension(name='cedge', parent=grid.dimensions[0],\n", + " condition=CondEq(x, x.symbolic_max-2), relation='strict')\n", "\n", "f = Function(name=\"f\", grid=grid, dimensions=(c_even, y), shape=(5, 10), space_order=0)\n", "\n", @@ -943,7 +992,7 @@ "data": { "text/plain": [ "PerformanceSummary([(PerfKey(name='section0', rank=None),\n", - " PerfEntry(time=0.000124, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" + " PerfEntry(time=0.000153, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" ] }, "execution_count": 14, @@ -1062,7 +1111,7 @@ "data": { "text/plain": [ "PerformanceSummary([(PerfKey(name='section0', rank=None),\n", - " PerfEntry(time=0.000129, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" + " PerfEntry(time=0.00010899999999999999, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" ] }, "execution_count": 16, @@ -1101,6 +1150,130 @@ "assert np.all(f.data[-1, :] == 1)" ] }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Operator `Kernel` ran in 0.01 s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/* Devito generated code for Operator `Kernel` */\n", + "\n", + "#define _POSIX_C_SOURCE 200809L\n", + "#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);\n", + "#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;\n", + "\n", + "#include \"stdlib.h\"\n", + "#include \"math.h\"\n", + "#include \"sys/time.h\"\n", + "#include \"xmmintrin.h\"\n", + "#include \"pmmintrin.h\"\n", + "#include \"omp.h\"\n", + "\n", + "struct dataobj\n", + "{\n", + " void *restrict data;\n", + " int * size;\n", + " unsigned long nbytes;\n", + " unsigned long * npsize;\n", + " unsigned long * dsize;\n", + " int * hsize;\n", + " int * hofs;\n", + " int * oofs;\n", + " void * dmap;\n", + "} ;\n", + "\n", + "struct profiler\n", + "{\n", + " double section0;\n", + "} ;\n", + "\n", + "\n", + "int Kernel(struct dataobj *restrict f_vec, const int x_M, const int cevenf, const int x_m, const int y_M, const int y_m, const int nthreads, struct profiler * timers)\n", + "{\n", + " float (*restrict f)[f_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]]) f_vec->data;\n", + "\n", + " /* Flush denormal numbers to zero in hardware */\n", + " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);\n", + " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);\n", + "\n", + " START(section0)\n", + " #pragma omp parallel num_threads(nthreads)\n", + " {\n", + " #pragma omp for schedule(static,1)\n", + " for (int x = x_m; x <= x_M; x += 1)\n", + " {\n", + " if (x == x_M - 2)\n", + " {\n", + " #pragma omp simd aligned(f:64)\n", + " for (int y = y_m; y <= y_M; y += 1)\n", + " {\n", + " f[x / cevenf][y] = 1;\n", + " }\n", + " }\n", + " }\n", + " }\n", + " STOP(section0,timers)\n", + "\n", + " return 0;\n", + "}\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "PerformanceSummary([(PerfKey(name='section0', rank=None),\n", + " PerfEntry(time=0.000108, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# NBVAL_IGNORE_OUTPUT\n", + "# In this case, since the edge condition is strict, it will be the only conditions\n", + "op = Operator(Eq(f, 1, implicit_dims=c_edge_s))\n", + "print(op)\n", + "f.data.fill(0)\n", + "op()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]\n" + ] + } + ], + "source": [ + "print(f.data)\n", + "assert np.all(f.data[:-2, :] == 0)\n", + "assert np.all(f.data[-1, :] == 0)\n", + "assert np.all(f.data[-2, :] == 1)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1114,7 +1287,7 @@ "metadata": { "celltoolbar": "Tags", "kernelspec": { - "display_name": "devitoenv (3.11.0)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/tests/test_buffering.py b/tests/test_buffering.py index 0a0037c223..80e1f281ee 100644 --- a/tests/test_buffering.py +++ b/tests/test_buffering.py @@ -754,7 +754,7 @@ def test_buffer_reuse(): assert all(np.all(vsave.data[i-1] == i + 1) for i in range(1, nt + 1)) -def test_multi_cond(): +def test_multi_cond_v0(): grid = Grid((3, 3)) nt = 5 @@ -764,24 +764,99 @@ def test_multi_cond(): ntmod = (nt - 1) * factor + 1 ct1 = ConditionalDimension(name="ct1", parent=grid.time_dim, - factor=factor, relation=Or) + factor=factor) ctend = ConditionalDimension(name="ctend", parent=grid.time_dim, condition=CondEq(grid.time_dim, ntmod - 2), - relation=Or) + relation='strict') f = TimeFunction(grid=grid, name='f', time_order=0, space_order=0, save=nt, time_dim=ct1) T = TimeFunction(grid=grid, name='T', time_order=0, space_order=0) eqs = [Eq(T, grid.time_dim)] - # this to save times from 0 to nt - 2 + # This saves + # - All subsampled times since ct1 is the dimension of f eqs.append(Eq(f, T)) - # this to save the last time sample nt - 1 - eqs.append(Eq(f.forward, T+1, implicit_dims=ctend)) + # - The last time step (ntmod - 2) through ctend (since it's set as ct1 or ctend) + eqs.append(Eq(f, T, implicit_dims=ctend)) # run operator with buffering op = Operator(eqs, opt='buffering') op.apply(time_m=0, time_M=ntmod-2) - for i in range(nt): + for i in range(nt-1): assert np.allclose(f.data[i], i*2) + assert np.allclose(f.data[nt-1], ntmod - 2) + + +def test_multi_cond_v1(): + grid = Grid((3, 3)) + nt = 5 + + x, y = grid.dimensions + + factor = 2 + ntmod = (nt - 1) * factor + 1 + + ct1 = ConditionalDimension(name="ct1", parent=grid.time_dim, + factor=factor, relation=Or, + condition=CondEq(grid.time_dim, ntmod - 2)) + + f = TimeFunction(grid=grid, name='f', time_order=0, + space_order=0, save=nt, time_dim=ct1) + T = TimeFunction(grid=grid, name='T', time_order=0, space_order=0) + + eqs = [Eq(T, grid.time_dim)] + # This saves + # - All subsampled times since ct1 is the dimension of f with factor 2 + # - The last time step (ntmod - 2) since ct1 also has the condition for ntmod - 2 + eqs.append(Eq(f, T)) + + # run operator with buffering + op = Operator(eqs, opt='buffering') + op.apply(time_m=0, time_M=ntmod-2) + + for i in range(nt-1): + assert np.allclose(f.data[i], i*2) + assert np.allclose(f.data[nt-1], ntmod - 2) + + +@pytest.mark.parametrize("factor", [1, 2, 3]) +def test_buffering_multi_cond(factor): + grid = Grid((16, 16)) + + nt = 5 + ntmod = (nt - 1) * factor + 1 + + ct0 = ConditionalDimension(name="ct0", parent=grid.time_dim, factor=factor, + relation=Or) + f = TimeFunction(grid=grid, name='f', time_order=0, space_order=0, + time_dim=ct0, save=nt) + T = TimeFunction(grid=grid, name='T', time_order=0, space_order=0) + + eqs = [] + eqs.append(Eq(T, grid.time_dim)) + + # conditional dimension for the last sample in the operator + ctend = ConditionalDimension(name="ctend", parent=grid.time_dim, + condition=CondEq(grid.time_dim, ntmod - 2), + relation='strict') + + eqs.append(Eq(f, T)) # this to save times from 0 to nt - 2 + # this to save the last time sample nt - 1 + eqs.append(Eq(f, T+1, implicit_dims=ctend)) + + # run operator with serialization + op = Operator(eqs, opt='buffering') + op.apply(time_m=0, time_M=ntmod-2) + + # Now run backward as well with buffering + + f_all = TimeFunction(grid=grid, name='f_all', time_order=0, + space_order=0, time_dim=ct0, save=nt) + + eq_all = [Eq(f_all, f)] + eq_all.append(Eq(f_all, f, implicit_dims=ctend)) + op_all = Operator(eq_all, opt='buffering') + op_all.apply(time_m=0, time_M=ntmod-2) + assert np.allclose(f_all.data[:, 11, 11], factor * np.arange(nt))