From 5c778ca35bc530cfb97a10c83b54827633ec98c5 Mon Sep 17 00:00:00 2001 From: mloubout Date: Fri, 6 Feb 2026 10:35:34 -0500 Subject: [PATCH 1/6] compiler: fix multi-cond async --- devito/finite_differences/finite_difference.py | 2 ++ devito/ir/equations/equation.py | 8 +++++++- devito/passes/clusters/asynchrony.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/devito/finite_differences/finite_difference.py b/devito/finite_differences/finite_difference.py index 30199fb3d8..b2aa142e26 100644 --- a/devito/finite_differences/finite_difference.py +++ b/devito/finite_differences/finite_difference.py @@ -157,6 +157,8 @@ def first_derivative(expr, dim, fd_order, **kwargs): def make_derivative(expr, dim, fd_order, deriv_order, side, matvec, x0, coefficients, expand, weights=None): + if deriv_order == 0 and not expr.is_Add: + print(expr, dim, fd_order) # Always expand time derivatives to avoid issue with buffering and streaming. # Time derivative are almost always short stencils and won't benefit from # unexpansion in the rare case the derivative is not evaluated for time stepping. diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py index 8d72704b79..5bd243e333 100644 --- a/devito/ir/equations/equation.py +++ b/devito/ir/equations/equation.py @@ -238,7 +238,13 @@ def __new__(cls, *args, **kwargs): index = d.index if d.condition is not None and d in expr.free_symbols: index = index - relational_min(d.condition, d.parent) - expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor)}) + # If there is a condition we might access on a non-factor + # index and need to make sure we don't overwrite the previous + # index + num = index + d.symbolic_factor - 1 + else: + num = index + expr = uxreplace(expr, {d: IntDiv(num, d.symbolic_factor)}) conditionals = frozendict(conditionals) diff --git a/devito/passes/clusters/asynchrony.py b/devito/passes/clusters/asynchrony.py index e32190ddef..59cdcaaf48 100644 --- a/devito/passes/clusters/asynchrony.py +++ b/devito/passes/clusters/asynchrony.py @@ -240,7 +240,7 @@ def _actions_from_update_memcpy(c, d, clusters, actions, sregistry): fetch = e.rhs.indices[d] fshift = {Forward: 1, Backward: -1}.get(direction, 0) - findex = fetch + fshift if fetch.find(IntDiv) else fetch._subs(pd, pd + fshift) + findex = fetch._subs(pd, pd + fshift) # If fetching into e.g. `ub[t1]` we might need to prefetch into e.g. `ub[t0]` tindex0 = e.lhs.indices[d] From 8cbdc7f8c325660c7e6a2fa1b588062f630ddf73 Mon Sep 17 00:00:00 2001 From: mloubout Date: Fri, 6 Feb 2026 12:49:44 -0500 Subject: [PATCH 2/6] compiler: fix multi-cond for multi-layer --- devito/ir/equations/equation.py | 11 ++++------- devito/passes/clusters/asynchrony.py | 2 +- devito/passes/clusters/buffering.py | 4 ++-- devito/symbolics/extended_sympy.py | 6 ++++++ devito/types/relational.py | 17 ++++++++++++++++- 5 files changed, 29 insertions(+), 11 deletions(-) diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py index 5bd243e333..4501545691 100644 --- a/devito/ir/equations/equation.py +++ b/devito/ir/equations/equation.py @@ -11,7 +11,7 @@ ) from devito.symbolics import IntDiv, limits_mapper, uxreplace from devito.tools import Pickable, Tag, frozendict -from devito.types import Eq, Inc, ReduceMax, ReduceMin, ReduceMinMax, relational_min +from devito.types import Eq, Inc, ReduceMax, ReduceMin, ReduceMinMax, relational_min, relational_shift __all__ = [ 'ClusterizedEq', @@ -238,13 +238,10 @@ def __new__(cls, *args, **kwargs): index = d.index if d.condition is not None and d in expr.free_symbols: index = index - relational_min(d.condition, d.parent) - # If there is a condition we might access on a non-factor - # index and need to make sure we don't overwrite the previous - # index - num = index + d.symbolic_factor - 1 + shift = relational_shift(d.condition, d.parent) else: - num = index - expr = uxreplace(expr, {d: IntDiv(num, d.symbolic_factor)}) + shift = 0 + expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor) + shift}) conditionals = frozendict(conditionals) diff --git a/devito/passes/clusters/asynchrony.py b/devito/passes/clusters/asynchrony.py index 59cdcaaf48..e32190ddef 100644 --- a/devito/passes/clusters/asynchrony.py +++ b/devito/passes/clusters/asynchrony.py @@ -240,7 +240,7 @@ def _actions_from_update_memcpy(c, d, clusters, actions, sregistry): fetch = e.rhs.indices[d] fshift = {Forward: 1, Backward: -1}.get(direction, 0) - findex = fetch._subs(pd, pd + fshift) + findex = fetch + fshift if fetch.find(IntDiv) else fetch._subs(pd, pd + fshift) # If fetching into e.g. `ub[t1]` we might need to prefetch into e.g. `ub[t0]` tindex0 = e.lhs.indices[d] diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py index 2a8c78e7fc..57e0ad591a 100644 --- a/devito/passes/clusters/buffering.py +++ b/devito/passes/clusters/buffering.py @@ -3,7 +3,7 @@ from itertools import chain import numpy as np -from sympy import S +from sympy import S, simplify from devito.exceptions import CompilationError from devito.ir import ( @@ -775,7 +775,7 @@ def infer_buffer_size(f, dim, clusters): slots = [Vector(i) for i in slots] size = int((vmax(*slots) - vmin(*slots) + 1)[0]) - return size + return simplify(size) def offset_from_centre(d, indices): diff --git a/devito/symbolics/extended_sympy.py b/devito/symbolics/extended_sympy.py index 4523fbd2f4..3b3026a08b 100644 --- a/devito/symbolics/extended_sympy.py +++ b/devito/symbolics/extended_sympy.py @@ -18,6 +18,7 @@ ) from devito.types import Symbol from devito.types.basic import Basic +from devito.types.relational import Ge __all__ = ['CondEq', 'CondNe', 'BitwiseNot', 'BitwiseXor', 'BitwiseAnd', # noqa 'LeftShift', 'RightShift', 'IntDiv', 'Terminal', 'CallFromPointer', @@ -47,6 +48,11 @@ def canonical(self): def negated(self): return CondNe(*self.args, evaluate=False) + @property + def _as_min(self): + from devito.symbolics.extended_dtypes import INT + return INT(Ge(*self.args)) + class CondNe(sympy.Ne): diff --git a/devito/types/relational.py b/devito/types/relational.py index 731ec29bc7..e841d3db96 100644 --- a/devito/types/relational.py +++ b/devito/types/relational.py @@ -3,7 +3,8 @@ import sympy -__all__ = ['Ge', 'Gt', 'Le', 'Lt', 'Ne', 'relational_max', 'relational_min'] +__all__ = ['Ge', 'Gt', 'Le', 'Lt', 'Ne', 'relational_max', 'relational_min', + 'relational_shift'] class AbstractRel: @@ -291,3 +292,17 @@ def _(expr, s): return expr.gts else: return sympy.S.Infinity + + +def relational_shift(expr, s): + """ + Infer shift incurred by the expression. Generally only + applies when a CondEq is used as it adds a single value. + """ + if not expr.has(s): + return 0 + + try: + return expr._as_min + except (TypeError, AttributeError): + return 0 From 2e2cc4cf8e7ce321ddf703a64c9b5c08462e71b3 Mon Sep 17 00:00:00 2001 From: mloubout Date: Thu, 12 Feb 2026 09:16:03 -0500 Subject: [PATCH 3/6] api: fix interpolate with complex dtype --- devito/finite_differences/finite_difference.py | 2 -- devito/ir/equations/equation.py | 13 +++++++++++++ devito/passes/clusters/cse.py | 5 +++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/devito/finite_differences/finite_difference.py b/devito/finite_differences/finite_difference.py index b2aa142e26..30199fb3d8 100644 --- a/devito/finite_differences/finite_difference.py +++ b/devito/finite_differences/finite_difference.py @@ -157,8 +157,6 @@ def first_derivative(expr, dim, fd_order, **kwargs): def make_derivative(expr, dim, fd_order, deriv_order, side, matvec, x0, coefficients, expand, weights=None): - if deriv_order == 0 and not expr.is_Add: - print(expr, dim, fd_order) # Always expand time derivatives to avoid issue with buffering and streaming. # Time derivative are almost always short stencils and won't benefit from # unexpansion in the rare case the derivative is not evaluated for time stepping. diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py index 4501545691..a0af705050 100644 --- a/devito/ir/equations/equation.py +++ b/devito/ir/equations/equation.py @@ -243,6 +243,19 @@ def __new__(cls, *args, **kwargs): shift = 0 expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor) + shift}) + # Merge conditionals when possible. E.g if we have an implicit_dim + # and there is a dimension with the same parent, we ca merged + # its condition + for d in input_expr.implicit_dims: + if d not in conditionals: + continue + for cd in dict(conditionals): + if cd.parent == d.parent and cd != d: + cond = conditionals.pop(d) + mode = cd.relation and d.relation + conditionals[cd] = mode(cond, conditionals[cd]) + break + conditionals = frozendict(conditionals) # Lower all Differentiable operations into SymPy operations diff --git a/devito/passes/clusters/cse.py b/devito/passes/clusters/cse.py index b6ce7c58cd..cb96509f9d 100644 --- a/devito/passes/clusters/cse.py +++ b/devito/passes/clusters/cse.py @@ -103,9 +103,14 @@ def cse(cluster, sregistry=None, options=None, **kwargs): if cluster.is_fence: return cluster +<<<<<<< HEAD def make(e): edtype = cse_dtype(e.dtype, dtype) return CTemp(name=sregistry.make_name(), dtype=edtype) +======= + make_dtype = lambda e: cse_dtype(e.dtype, dtype) + make = lambda e: CTemp(name=sregistry.make_name(), dtype=make_dtype(e)) +>>>>>>> 54c5e49e2 (api: fix interpolate with complex dtype) exprs = _cse(cluster, make, min_cost=min_cost, mode=mode) From fa278599f8adfb3f06ff71ef78080facf167a2cf Mon Sep 17 00:00:00 2001 From: mloubout Date: Mon, 16 Feb 2026 13:48:57 -0500 Subject: [PATCH 4/6] api: fix buffering with multiple conditions --- devito/ir/equations/equation.py | 22 +++++++++-------- devito/ir/support/vector.py | 3 +++ devito/passes/clusters/cse.py | 5 ---- devito/types/relational.py | 21 +++++++++++++--- tests/test_buffering.py | 43 +++++++++++++++++++++++++++++---- 5 files changed, 71 insertions(+), 23 deletions(-) diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py index a0af705050..d68d56ac7a 100644 --- a/devito/ir/equations/equation.py +++ b/devito/ir/equations/equation.py @@ -11,7 +11,9 @@ ) from devito.symbolics import IntDiv, limits_mapper, uxreplace from devito.tools import Pickable, Tag, frozendict -from devito.types import Eq, Inc, ReduceMax, ReduceMin, ReduceMinMax, relational_min, relational_shift +from devito.types import ( + Eq, Inc, ReduceMax, ReduceMin, ReduceMinMax, relational_min, relational_shift +) __all__ = [ 'ClusterizedEq', @@ -222,7 +224,7 @@ def __new__(cls, *args, **kwargs): relations=ordering.relations, mode='partial') ispace = IterationSpace(intervals, iterators) - # Construct the conditionals and replace the ConditionalDimensions in `expr` + # Construct the conditionals conditionals = {} for d in ordering: if not d.is_Conditional: @@ -234,14 +236,6 @@ def __new__(cls, *args, **kwargs): if d._factor is not None: cond = d.relation(cond, GuardFactor(d)) conditionals[d] = cond - # Replace dimension with index - index = d.index - if d.condition is not None and d in expr.free_symbols: - index = index - relational_min(d.condition, d.parent) - shift = relational_shift(d.condition, d.parent) - else: - shift = 0 - expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor) + shift}) # Merge conditionals when possible. E.g if we have an implicit_dim # and there is a dimension with the same parent, we ca merged @@ -258,6 +252,14 @@ def __new__(cls, *args, **kwargs): conditionals = frozendict(conditionals) + # Replace the ConditionalDimensions in `expr` + for d, cond in conditionals.items(): + # Replace dimension with index + index = d.index + index = index - relational_min(cond, d.parent) + shift = relational_shift(cond, d.parent) + expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor) + shift}) + # Lower all Differentiable operations into SymPy operations rhs = diff2sympy(expr.rhs) diff --git a/devito/ir/support/vector.py b/devito/ir/support/vector.py index 02e26e2a02..79d84605ba 100644 --- a/devito/ir/support/vector.py +++ b/devito/ir/support/vector.py @@ -128,6 +128,7 @@ def __lt__(self, other): return True elif q_positive(i): return False + raise TypeError("Non-comparable index functions") from e return False @@ -164,6 +165,7 @@ def __gt__(self, other): return True elif q_negative(i): return False + raise TypeError("Non-comparable index functions") from e return False @@ -203,6 +205,7 @@ def __le__(self, other): return True elif q_positive(i): return False + raise TypeError("Non-comparable index functions") from e # Note: unlike `__lt__`, if we end up here, then *it is* <=. For example, diff --git a/devito/passes/clusters/cse.py b/devito/passes/clusters/cse.py index cb96509f9d..b6ce7c58cd 100644 --- a/devito/passes/clusters/cse.py +++ b/devito/passes/clusters/cse.py @@ -103,14 +103,9 @@ def cse(cluster, sregistry=None, options=None, **kwargs): if cluster.is_fence: return cluster -<<<<<<< HEAD def make(e): edtype = cse_dtype(e.dtype, dtype) return CTemp(name=sregistry.make_name(), dtype=edtype) -======= - make_dtype = lambda e: cse_dtype(e.dtype, dtype) - make = lambda e: CTemp(name=sregistry.make_name(), dtype=make_dtype(e)) ->>>>>>> 54c5e49e2 (api: fix interpolate with complex dtype) exprs = _cse(cluster, make, min_cost=min_cost, mode=mode) diff --git a/devito/types/relational.py b/devito/types/relational.py index e841d3db96..1fb3e19355 100644 --- a/devito/types/relational.py +++ b/devito/types/relational.py @@ -302,7 +302,22 @@ def relational_shift(expr, s): if not expr.has(s): return 0 - try: - return expr._as_min - except (TypeError, AttributeError): + return _relational_shift(expr, s) + + +@singledispatch +def _relational_shift(s, expr): + return 0 + + +@_relational_shift.register(sympy.Or) +@_relational_shift.register(sympy.And) +def _(expr, s): + return sum([_relational_shift(e, s) for e in expr.args]) + + +@_relational_shift.register(sympy.Eq) +def _(expr, s): + if isinstance(expr.lhs, sympy.Mod): return 0 + return expr._as_min diff --git a/tests/test_buffering.py b/tests/test_buffering.py index 0a0037c223..f89f7262d4 100644 --- a/tests/test_buffering.py +++ b/tests/test_buffering.py @@ -754,7 +754,7 @@ def test_buffer_reuse(): assert all(np.all(vsave.data[i-1] == i + 1) for i in range(1, nt + 1)) -def test_multi_cond(): +def test_multi_cond_v0(): grid = Grid((3, 3)) nt = 5 @@ -774,14 +774,47 @@ def test_multi_cond(): T = TimeFunction(grid=grid, name='T', time_order=0, space_order=0) eqs = [Eq(T, grid.time_dim)] - # this to save times from 0 to nt - 2 + # This saves + # - All subsampled times since ct1 is the dimension of f + # - The last time step (ntmod - 2) through ctend (since it's set as ct1 or ctend) + eqs.append(Eq(f, T, implicit_dims=ctend)) + + # run operator with buffering + op = Operator(eqs, opt='buffering') + op.apply(time_m=0, time_M=ntmod-2) + + for i in range(nt-1): + assert np.allclose(f.data[i], i*2) + assert np.allclose(f.data[nt-1], ntmod - 2) + + +def test_multi_cond_v1(): + grid = Grid((3, 3)) + nt = 5 + + x, y = grid.dimensions + + factor = 2 + ntmod = (nt - 1) * factor + 1 + + ct1 = ConditionalDimension(name="ct1", parent=grid.time_dim, + factor=factor, relation=Or, + condition=CondEq(grid.time_dim, ntmod - 2)) + + f = TimeFunction(grid=grid, name='f', time_order=0, + space_order=0, save=nt, time_dim=ct1) + T = TimeFunction(grid=grid, name='T', time_order=0, space_order=0) + + eqs = [Eq(T, grid.time_dim)] + # This saves + # - All subsampled times since ct1 is the dimension of f with factor 2 + # - The last time step (ntmod - 2) since ct1 also has the condition for ntmod - 2 eqs.append(Eq(f, T)) - # this to save the last time sample nt - 1 - eqs.append(Eq(f.forward, T+1, implicit_dims=ctend)) # run operator with buffering op = Operator(eqs, opt='buffering') op.apply(time_m=0, time_M=ntmod-2) - for i in range(nt): + for i in range(nt-1): assert np.allclose(f.data[i], i*2) + assert np.allclose(f.data[nt-1], ntmod - 2) From 9cc52bb7919ad9bda8a00789a1c8c6f9deadf0d4 Mon Sep 17 00:00:00 2001 From: mloubout Date: Fri, 22 May 2026 11:05:05 -0400 Subject: [PATCH 5/6] compiler: fix various corner case of multi buffering --- devito/ir/equations/equation.py | 24 ++--- devito/passes/clusters/asynchrony.py | 4 +- devito/passes/clusters/buffering.py | 135 +++++++++++++++------------ devito/symbolics/extended_sympy.py | 6 -- devito/types/relational.py | 3 +- tests/test_buffering.py | 42 +++++++++ 6 files changed, 132 insertions(+), 82 deletions(-) diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py index d68d56ac7a..832a436986 100644 --- a/devito/ir/equations/equation.py +++ b/devito/ir/equations/equation.py @@ -237,6 +237,14 @@ def __new__(cls, *args, **kwargs): cond = d.relation(cond, GuardFactor(d)) conditionals[d] = cond + # Replace the ConditionalDimensions in `expr` + for d, cond in conditionals.items(): + # Replace dimension with index + index = d.index + index = index - relational_min(cond, d.parent) + shift = relational_shift(cond, d.parent) + expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor) + shift}) + # Merge conditionals when possible. E.g if we have an implicit_dim # and there is a dimension with the same parent, we ca merged # its condition @@ -247,19 +255,13 @@ def __new__(cls, *args, **kwargs): if cd.parent == d.parent and cd != d: cond = conditionals.pop(d) mode = cd.relation and d.relation - conditionals[cd] = mode(cond, conditionals[cd]) + if issubclass(mode, sympy.Or): + conditionals[d] = cond + conditionals.pop(cd) + else: + conditionals[cd] = mode(cond, conditionals[cd]) break - conditionals = frozendict(conditionals) - - # Replace the ConditionalDimensions in `expr` - for d, cond in conditionals.items(): - # Replace dimension with index - index = d.index - index = index - relational_min(cond, d.parent) - shift = relational_shift(cond, d.parent) - expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor) + shift}) - # Lower all Differentiable operations into SymPy operations rhs = diff2sympy(expr.rhs) diff --git a/devito/passes/clusters/asynchrony.py b/devito/passes/clusters/asynchrony.py index e32190ddef..7b7f06c403 100644 --- a/devito/passes/clusters/asynchrony.py +++ b/devito/passes/clusters/asynchrony.py @@ -193,7 +193,7 @@ def memcpy_prefetch(clusters, key0, sregistry): if c.properties.is_prefetchable(d._defines): _actions_from_update_memcpy(c, d, clusters, actions, sregistry) elif d.is_Custom and is_integer(c.ispace[d].size): - _actions_from_init(c, d, actions) + _actions_from_init(c, d, clusters, actions) # Attach the computed Actions processed = [] @@ -214,7 +214,7 @@ def memcpy_prefetch(clusters, key0, sregistry): return processed -def _actions_from_init(c, d, actions): +def _actions_from_init(c, d, clusters, actions): e = c.exprs[0] function = e.rhs.function target = e.lhs.function diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py index 57e0ad591a..cde3281bf6 100644 --- a/devito/passes/clusters/buffering.py +++ b/devito/passes/clusters/buffering.py @@ -1,9 +1,9 @@ from collections import defaultdict, namedtuple from functools import cached_property -from itertools import chain +from itertools import chain, groupby import numpy as np -from sympy import S, simplify +from sympy import Mod, S, simplify from devito.exceptions import CompilationError from devito.ir import ( @@ -116,7 +116,7 @@ def key(f): # Then we inject them into the Clusters. This involves creating the # initializing Clusters, and replacing the buffered Functions with the buffers clusters = InjectBuffers(mapper, sregistry, options).process(clusters) - + print(clusters) return clusters @@ -142,14 +142,18 @@ def callback(self, clusters, prefix): return clusters d = prefix[-1].dim - key = lambda f, *args: f in self.mapper + def key(f, *args): + for (ff, _) in self.mapper: + if f == ff: + return True + return False bfmap = map_buffered_functions(clusters, key) # A BufferDescriptor is a simple data structure storing additional # information about a buffer, harvested from the subset of `clusters` # that access it - descriptors = {b: BufferDescriptor(f, b, bfmap[f]) - for f, b in self.mapper.items() + descriptors = {b: BufferDescriptor(f, b, bfmap[f], g) + for (f, g), b in self.mapper.items() if f in bfmap} # Are we inside the right `d`? @@ -184,6 +188,8 @@ def callback(self, clusters, prefix): continue if c not in v.firstread: continue + if not c.guards.get(d) == v.guards.get(d): + continue idxf = v.last_idx[c] idxb = mds[(v.xd, idxf)] @@ -203,7 +209,7 @@ def callback(self, clusters, prefix): guards = c.guards properties = c.properties.sequentialize(d) - if not isinstance(d, BufferDimension): + if not isinstance(d, BufferDimension) and c.guards[d].has(Mod): properties = properties.prefetchable(d) # `c` may be a HaloTouch Cluster, so with no vision of the `bdims` properties = properties.parallelize(v.bdims).affine(v.bdims) @@ -227,6 +233,8 @@ def callback(self, clusters, prefix): continue if c not in v.lastwrite: continue + if not c.guards.get(d) == v.guards.get(d): + continue idxf = v.last_idx[c] idxb = mds[(v.xd, idxf)] @@ -358,59 +366,60 @@ def generate_buffers(clusters, key, sregistry, options, **kwargs): xds = {} mapper = {} for f, clusters in bfmap.items(): - exprs = flatten(c.exprs for c in clusters) - - bdims = key(f, exprs) - - dims = [d for d in f.dimensions if d not in bdims] - if len(dims) != 1: - raise CompilationError(f"Unsupported multi-dimensional `buffering` " - f"required by `{f}`") - dim = dims.pop() - - if is_buffering(exprs): - # Multi-level buffering - # NOTE: a bit rudimentary (we could go through the exprs one by one - # instead), but it's much shorter this way - buffers = [f for f in retrieve_functions(exprs) if f.is_Array] - assert len(buffers) == 1, "Unexpected form of multi-level buffering" - buffer, = buffers - xd = buffer.indices[dim] - else: - size = infer_buffer_size(f, dim, clusters) - - if async_degree is not None: - if async_degree < size: - warning( - 'Ignoring provided asynchronous degree as it would be ' - f'too small for the required buffer (provided {async_degree}, ' - f'but need at least {size} for `{f.name}`)' - ) - else: - size = async_degree - - # A special CustomDimension to use in place of `dim` in the buffer - try: - xd = xds[(dim, size)] - except KeyError: - name = sregistry.make_name(prefix='db') - xd = xds[(dim, size)] = BufferDimension(name, 0, size-1, size, dim) - - # The buffer dimensions - dimensions = list(f.dimensions) - assert dim in f.dimensions - dimensions[dimensions.index(dim)] = xd - - # Finally create the actual buffer - cls = callback or Array - name = sregistry.make_name(prefix=f'{f.name}b') - # We specify the padding to match the input Function's one, so that - # the array can be used in place of the Function with valid strides - # Plain Array do not track mapped so we default to no padding - padding = 0 if cls is Array else f.padding - mapper[f] = cls(name=name, dimensions=dimensions, dtype=f.dtype, - padding=padding, grid=f.grid, halo=f.halo, - space='mapped', mapped=f, f=f) + for k, ck in groupby(clusters, key=lambda c: c.guards): + exprs = flatten(c.exprs for c in ck) + + bdims = key(f, exprs) + + dims = [d for d in f.dimensions if d not in bdims] + if len(dims) != 1: + raise CompilationError(f"Unsupported multi-dimensional `buffering` " + f"required by `{f}`") + dim = dims.pop() + + if is_buffering(exprs): + # Multi-level buffering + # NOTE: a bit rudimentary (we could go through the exprs one by one + # instead), but it's much shorter this way + buffers = [f for f in retrieve_functions(exprs) if f.is_Array] + assert len(buffers) == 1, "Unexpected form of multi-level buffering" + buffer, = buffers + xd = buffer.indices[dim] + else: + size = infer_buffer_size(f, dim, clusters) + + if async_degree is not None: + if async_degree < size: + warning( + 'Ignoring provided asynchronous degree as it would be ' + f'too small for the required buffer (provided {async_degree}, ' + f'but need at least {size} for `{f.name}`)' + ) + else: + size = async_degree + + # A special CustomDimension to use in place of `dim` in the buffer + try: + xd = xds[(dim, size, k)] + except KeyError: + name = sregistry.make_name(prefix='db') + xd = xds[(dim, size, k)] = BufferDimension(name, 0, size-1, size, dim) + + # The buffer dimensions + dimensions = list(f.dimensions) + assert dim in f.dimensions + dimensions[dimensions.index(dim)] = xd + + # Finally create the actual buffer + cls = callback or Array + name = sregistry.make_name(prefix=f'{f.name}b') + # We specify the padding to match the input Function's one, so that + # the array can be used in place of the Function with valid strides + # Plain Array do not track mapped so we default to no padding + padding = 0 if cls is Array else f.padding + mapper[(f, k)] = cls(name=name, dimensions=dimensions, dtype=f.dtype, + padding=padding, grid=f.grid, halo=f.halo, + space='mapped', mapped=f, f=f) return mapper @@ -430,10 +439,11 @@ def map_buffered_functions(clusters, key): class BufferDescriptor: - def __init__(self, f, b, clusters): + def __init__(self, f, b, clusters, guards): self.f = f self.b = b self.clusters = clusters + self.guards = guards self.xd, = b.find(BufferDimension) self.bdims = tuple(d for d in b.dimensions if d is not self.xd) @@ -674,8 +684,9 @@ def make_mds(descriptors, prefix, sregistry): # same strategy is also applied in clusters/algorithms/Stepper key = lambda i: -np.inf if i - p == 0 else (i - p) # noqa: B023 indices = sorted(v.indices, key=key) + v_mds = None - for i in indices: + for k, i in enumerate(indices): k = (v.xd, i) if k in mds: continue diff --git a/devito/symbolics/extended_sympy.py b/devito/symbolics/extended_sympy.py index 3b3026a08b..4523fbd2f4 100644 --- a/devito/symbolics/extended_sympy.py +++ b/devito/symbolics/extended_sympy.py @@ -18,7 +18,6 @@ ) from devito.types import Symbol from devito.types.basic import Basic -from devito.types.relational import Ge __all__ = ['CondEq', 'CondNe', 'BitwiseNot', 'BitwiseXor', 'BitwiseAnd', # noqa 'LeftShift', 'RightShift', 'IntDiv', 'Terminal', 'CallFromPointer', @@ -48,11 +47,6 @@ def canonical(self): def negated(self): return CondNe(*self.args, evaluate=False) - @property - def _as_min(self): - from devito.symbolics.extended_dtypes import INT - return INT(Ge(*self.args)) - class CondNe(sympy.Ne): diff --git a/devito/types/relational.py b/devito/types/relational.py index 1fb3e19355..70c3e39103 100644 --- a/devito/types/relational.py +++ b/devito/types/relational.py @@ -320,4 +320,5 @@ def _(expr, s): def _(expr, s): if isinstance(expr.lhs, sympy.Mod): return 0 - return expr._as_min + from devito.symbolics.extended_dtypes import INT + return INT(Ge(*expr.args)) diff --git a/tests/test_buffering.py b/tests/test_buffering.py index f89f7262d4..525ee72150 100644 --- a/tests/test_buffering.py +++ b/tests/test_buffering.py @@ -818,3 +818,45 @@ def test_multi_cond_v1(): for i in range(nt-1): assert np.allclose(f.data[i], i*2) assert np.allclose(f.data[nt-1], ntmod - 2) + + +@pytest.mark.parametrize("factor", [1, 2, 3]) +def test_buffering_multi_cond(factor): + grid = Grid((16, 16)) + + nt = 5 + ntmod = (nt - 1) * factor + 1 + + ct0 = ConditionalDimension(name="ct0", parent=grid.time_dim, factor=factor, + relation=Or) + f = TimeFunction(grid=grid, name='f', time_order=0, space_order=0, + time_dim=ct0, save=nt) + T = TimeFunction(grid=grid, name='T', time_order=0, space_order=0) + + eqs = [] + eqs.append(Eq(T, grid.time_dim)) + + # conditional dimension for the last sample in the operator + ctend = ConditionalDimension(name="ctend", parent=grid.time_dim, + condition=CondEq(grid.time_dim, ntmod - 2), + relation=Or) + + eqs.append(Eq(f, T)) # this to save times from 0 to nt - 2 + # this to save the last time sample nt - 1 + eqs.append(Eq(f.forward, T+1, implicit_dims=ctend)) + + # run operator with serialization + op = Operator(eqs, opt='buffering') + op.apply(time_m=0, time_M=ntmod-2) + + # Now run backward as well with buffering + + f_all = TimeFunction(grid=grid, name='f_all', time_order=0, + space_order=0, time_dim=ct0, save=nt) + + eq_all = [Eq(f_all, f)] + eq_all.append(Eq(f_all.forward, f.forward, implicit_dims=ctend)) + op_all = Operator(eq_all, opt='buffering') + op_all.apply(time_m=0, time_M=ntmod-2) + + assert np.allclose(f_all.data[:, 11, 11], factor * np.arange(nt)) From 30790f06cbfbd6402e57e2da0a954c64ecf3c72a Mon Sep 17 00:00:00 2001 From: mloubout Date: Thu, 28 May 2026 10:59:03 -0400 Subject: [PATCH 6/6] compiler: support mutli-buffering --- devito/ir/clusters/algorithms.py | 15 +- devito/ir/equations/equation.py | 21 +- devito/passes/clusters/asynchrony.py | 5 +- devito/passes/clusters/buffering.py | 187 ++++++++-------- devito/types/dimension.py | 39 ++++ devito/types/relational.py | 10 +- .../userapi/05_conditional_dimension.ipynb | 201 ++++++++++++++++-- tests/test_buffering.py | 7 +- 8 files changed, 364 insertions(+), 121 deletions(-) diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py index 6399f12559..017409e68f 100644 --- a/devito/ir/clusters/algorithms.py +++ b/devito/ir/clusters/algorithms.py @@ -259,10 +259,17 @@ def guard(clusters): # Separate out the indirect ConditionalDimensions, which only serve # the purpose of protecting from OOB accesses cds = [d for d in cds if not d.indirect] + modes = [cd.relation for cd in cds] + if len({m == 'strict' for m in modes}) > 1: + raise CompilationError("Only one `strict` condition" + "can be used in an equation") + elif 'strict' in modes: + mode = 'strict' + else: + mode = sympy.And if sympy.And in modes else sympy.Or # Chain together all `cds` conditions from all expressions in `c` guards = {} - mode = sympy.Or for cd in cds: # `BOTTOM` parent implies a guard that lives outside of # any iteration space, which corresponds to the placeholder None @@ -279,7 +286,6 @@ def guard(clusters): # Pull `cd` from any expr condition = guards.setdefault(k, []) - mode = mode and cd.relation for e in exprs: try: condition.append(e.conditionals[cd]) @@ -296,7 +302,10 @@ def guard(clusters): # Combination `mode` is And by default. # If all conditions are Or then Or combination `mode` is used. - guards = {d: mode(*v, evaluate=False) for d, v in guards.items()} + if mode == 'strict': + guards = {d: v[0] for d, v in guards.items()} + else: + guards = {d: mode(*v, evaluate=False) for d, v in guards.items()} # Construct a guarded Cluster processed.append(c.rebuild(exprs=exprs, guards=guards)) diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py index 832a436986..6fb89ce02d 100644 --- a/devito/ir/equations/equation.py +++ b/devito/ir/equations/equation.py @@ -237,14 +237,6 @@ def __new__(cls, *args, **kwargs): cond = d.relation(cond, GuardFactor(d)) conditionals[d] = cond - # Replace the ConditionalDimensions in `expr` - for d, cond in conditionals.items(): - # Replace dimension with index - index = d.index - index = index - relational_min(cond, d.parent) - shift = relational_shift(cond, d.parent) - expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor) + shift}) - # Merge conditionals when possible. E.g if we have an implicit_dim # and there is a dimension with the same parent, we ca merged # its condition @@ -254,14 +246,23 @@ def __new__(cls, *args, **kwargs): for cd in dict(conditionals): if cd.parent == d.parent and cd != d: cond = conditionals.pop(d) - mode = cd.relation and d.relation - if issubclass(mode, sympy.Or): + if d.relation == 'strict': conditionals[d] = cond conditionals.pop(cd) else: + mode = cd.relation and d.relation conditionals[cd] = mode(cond, conditionals[cd]) break + # Replace the ConditionalDimensions in `expr` + for d, cond in conditionals.items(): + # Replace dimension with index + index = d.index + if d.condition is not None and d in expr.free_symbols: + index = index - relational_min(cond, d.parent) + shift = relational_shift(cond, d.parent) + expr = uxreplace(expr, {d: IntDiv(index, d.symbolic_factor) + shift}) + # Lower all Differentiable operations into SymPy operations rhs = diff2sympy(expr.rhs) diff --git a/devito/passes/clusters/asynchrony.py b/devito/passes/clusters/asynchrony.py index 7b7f06c403..5bdc7b63b9 100644 --- a/devito/passes/clusters/asynchrony.py +++ b/devito/passes/clusters/asynchrony.py @@ -1,6 +1,6 @@ from collections import defaultdict -from sympy import true +from sympy import Mod, true from devito.ir import ( Backward, Forward, GuardBoundNext, PrefetchUpdate, Queue, ReleaseLock, SyncArray, @@ -78,7 +78,8 @@ def callback(self, clusters, prefix): d = self.key0(c0) if d is not dim: continue - + if d in c0.guards and not c0.guards[d].has(Mod): + continue protected = self._schedule_waitlocks(c0, d, clusters, locks, syncs) self._schedule_withlocks(c0, d, protected, locks, syncs) diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py index cde3281bf6..b30e98059b 100644 --- a/devito/passes/clusters/buffering.py +++ b/devito/passes/clusters/buffering.py @@ -14,7 +14,7 @@ from devito.passes.clusters.utils import is_memcpy from devito.symbolics import IntDiv, retrieve_functions, uxreplace from devito.tools import ( - Stamp, as_mapper, as_tuple, filter_ordered, flatten, frozendict, is_integer, + Stamp, as_list, as_mapper, as_tuple, filter_ordered, flatten, frozendict, is_integer, timed_pass ) from devito.types import Array, CustomDimension, Eq, ModuloDimension @@ -116,7 +116,7 @@ def key(f): # Then we inject them into the Clusters. This involves creating the # initializing Clusters, and replacing the buffered Functions with the buffers clusters = InjectBuffers(mapper, sregistry, options).process(clusters) - print(clusters) + return clusters @@ -142,22 +142,21 @@ def callback(self, clusters, prefix): return clusters d = prefix[-1].dim - def key(f, *args): - for (ff, _) in self.mapper: - if f == ff: - return True - return False + key = lambda f, *args: any(f == ff for ff, _ in self.mapper) bfmap = map_buffered_functions(clusters, key) # A BufferDescriptor is a simple data structure storing additional # information about a buffer, harvested from the subset of `clusters` # that access it - descriptors = {b: BufferDescriptor(f, b, bfmap[f], g) - for (f, g), b in self.mapper.items() - if f in bfmap} + descriptors = {} + for (f, g), b in self.mapper.items(): + if f in bfmap: + descriptors.setdefault(b, []).append(BufferDescriptor(f, b, bfmap[f], g)) # Are we inside the right `d`? - descriptors = {b: v for b, v in descriptors.items() if d in v.itdims} + descriptors = {b: [vi for vi in v if d in vi.itdims] + for b, v in descriptors.items()} + descriptors = {b: v for b, v in descriptors.items() if v} if not descriptors: return clusters @@ -172,23 +171,28 @@ def key(f, *args): # Substitution rules to replace buffered Functions with buffers # E.g., `usave[time+1, x+1, y+1] -> ub0[t1, x+1, y+1]` subs = {} - for b, v in descriptors.items(): - accesses = chain(*[c.scope[v.f] for c in v.clusters]) - index_mapper = {i: mds[(v.xd, i)] for i in v.indices} - for a in accesses: - subs[a.access] = b.indexed[[index_mapper.get(i, i) for i in a]] + for b, vb in descriptors.items(): + for v in vb: + for c in v.clusters: + if c.guards.get(d) != v.guards.get(d): + continue + subs.setdefault(c, {}) + accesses = c.scope[v.f] + index_mapper = {i: mds[(v.xd, i)] for i in v.indices} + for a in accesses: + subs[c][a.access] = b.indexed[[index_mapper.get(i, i) for i in a]] processed = [] for c in clusters: # If a buffer is read but never written, then we need to add # an Eq to step through the next slot # E.g., `ub[0, x] = usave[time+2, x]` - for _, v in descriptors.items(): + for v in chain.from_iterable(descriptors.values()): if not v.is_readonly: continue if c not in v.firstread: continue - if not c.guards.get(d) == v.guards.get(d): + if c.guards.get(d) != v.guards.get(d): continue idxf = v.last_idx[c] @@ -207,9 +211,9 @@ def key(f, *args): guards = c.guards.xandg(v.xd, GuardBound(0, v.first_idx.f)) else: guards = c.guards - properties = c.properties.sequentialize(d) - if not isinstance(d, BufferDimension) and c.guards[d].has(Mod): + g = c.guards.get(d, None) + if not isinstance(d, BufferDimension) and (g is None or g.has(Mod)): properties = properties.prefetchable(d) # `c` may be a HaloTouch Cluster, so with no vision of the `bdims` properties = properties.parallelize(v.bdims).affine(v.bdims) @@ -219,7 +223,7 @@ def key(f, *args): processed.append(Cluster(expr, ispace, guards, properties, syncs)) # Substitute the buffered Functions with the buffers - exprs = [uxreplace(e, subs) for e in c.exprs] + exprs = [uxreplace(e, subs.get(c, {})) for e in c.exprs] ispace = c.ispace.augment(subiters) properties = c.properties.sequentialize(d) processed.append( @@ -228,12 +232,12 @@ def key(f, *args): # Append the copy-back if `c` is the last-write of some buffers # E.g., `usave[time+1, x] = ub[t1, x]` - for _, v in descriptors.items(): + for v in chain.from_iterable(descriptors.values()): if v.is_readonly: continue if c not in v.lastwrite: continue - if not c.guards.get(d) == v.guards.get(d): + if c.guards.get(d) != v.guards.get(d): continue idxf = v.last_idx[c] @@ -269,36 +273,37 @@ def key(f, *args): return init + processed def _optimize(self, clusters, descriptors): - for b, v in descriptors.items(): - if v.is_writeonly: - # `b` might be written by multiple, potentially mutually - # exclusive, equations. For example, two equations that have or - # will have complementary guards, hence only one will be - # executed. In such a case, we can split the equations over - # separate IterationSpaces - key0 = lambda: Stamp() - elif v.is_readonly: - # `b` is read multiple times -- this could just be the case of - # coupled equations, so we more cautiously perform a - # "buffer-wise" splitting of the IterationSpaces (i.e., only - # relevant if there are at least two read-only buffers) - stamp = Stamp() - key0 = lambda: stamp # noqa: B023 - else: - continue - - processed = [] - for c in clusters: - if b not in c.functions: - processed.append(c) + for b, vb in descriptors.items(): + for v in vb: + if v.is_writeonly: + # `b` might be written by multiple, potentially mutually + # exclusive, equations. For example, two equations that have or + # will have complementary guards, hence only one will be + # executed. In such a case, we can split the equations over + # separate IterationSpaces + key0 = lambda: Stamp() + elif v.is_readonly: + # `b` is read multiple times -- this could just be the case of + # coupled equations, so we more cautiously perform a + # "buffer-wise" splitting of the IterationSpaces (i.e., only + # relevant if there are at least two read-only buffers) + stamp = Stamp() + key0 = lambda: stamp # noqa: B023 + else: continue - key1 = lambda d: not d._defines & v.dim._defines # noqa: B023 - dims = c.ispace.project(key1).itdims - ispace = c.ispace.lift(dims, key0()) - processed.append(c.rebuild(ispace=ispace)) + processed = [] + for c in clusters: + if b not in c.functions: + processed.append(c) + continue - clusters = processed + key1 = lambda d: not d._defines & v.dim._defines # noqa: B023 + dims = c.ispace.project(key1).itdims + ispace = c.ispace.lift(dims, key0()) + processed.append(c.rebuild(ispace=ispace)) + + clusters = processed return clusters @@ -314,7 +319,7 @@ def _reuse(self, init, clusters, descriptors): cbk = lambda v: v mapper = as_mapper(descriptors, key=lambda b: b._signature) - mapper = {k: cbk(v) for k, v in mapper.items() if cbk(v)} + mapper = {k: as_list([cbk(v) for v in vb if cbk(v)]) for k, vb in mapper.items()} subs = {} drop = set() @@ -365,8 +370,10 @@ def generate_buffers(clusters, key, sregistry, options, **kwargs): # {buffered Function -> Buffer} xds = {} mapper = {} + extras = {} for f, clusters in bfmap.items(): for k, ck in groupby(clusters, key=lambda c: c.guards): + ck = list(ck) exprs = flatten(c.exprs for c in ck) bdims = key(f, exprs) @@ -374,8 +381,11 @@ def generate_buffers(clusters, key, sregistry, options, **kwargs): dims = [d for d in f.dimensions if d not in bdims] if len(dims) != 1: raise CompilationError(f"Unsupported multi-dimensional `buffering` " - f"required by `{f}`") + f"required by `{f}`") dim = dims.pop() + if k and not dim._defines & k.keys(): + extras.setdefault(f, []).append(k) + continue if is_buffering(exprs): # Multi-level buffering @@ -386,13 +396,15 @@ def generate_buffers(clusters, key, sregistry, options, **kwargs): buffer, = buffers xd = buffer.indices[dim] else: - size = infer_buffer_size(f, dim, clusters) + + size = infer_buffer_size(f, dim, ck) if async_degree is not None: if async_degree < size: warning( 'Ignoring provided asynchronous degree as it would be ' - f'too small for the required buffer (provided {async_degree}, ' + 'too small for the required buffer' + f' (provided {async_degree}, ' f'but need at least {size} for `{f.name}`)' ) else: @@ -421,6 +433,13 @@ def generate_buffers(clusters, key, sregistry, options, **kwargs): padding=padding, grid=f.grid, halo=f.halo, space='mapped', mapped=f, f=f) + for f, k in extras.items(): + for (ff, kk) in dict(mapper): + if f == ff: + for ki in k: + if ki.keys() & set(mapper[(ff, kk)].dimensions): + mapper[(f, ki)] = mapper[(ff, kk)] + return mapper @@ -453,7 +472,7 @@ def __init__(self, f, b, clusters, guards): self.indices = extract_indices(f, self.dim, clusters) def __repr__(self): - return f"Descriptor[{self.f} -> {self.b}]" + return f"Descriptor[{self.f} -> {self.b}], {self.guards}" @property def size(self): @@ -668,7 +687,7 @@ def make_mds(descriptors, prefix, sregistry): inspecting all buffers so that ModuloDimensions are reused when possible. """ mds = defaultdict(int) - for v in descriptors.values(): + for v in chain.from_iterable(descriptors.values()): size = v.xd.symbolic_size if size == 1: @@ -684,7 +703,6 @@ def make_mds(descriptors, prefix, sregistry): # same strategy is also applied in clusters/algorithms/Stepper key = lambda i: -np.inf if i - p == 0 else (i - p) # noqa: B023 indices = sorted(v.indices, key=key) - v_mds = None for k, i in enumerate(indices): k = (v.xd, i) @@ -711,42 +729,43 @@ def init_buffers(descriptors, options): init_onwrite = options['buf-init-onwrite'] init = [] - for b, v in descriptors.items(): - f = v.f - - if v.is_read: - # Special case: avoid initialization in the case of double (or - # multiple) buffering because it's completely unnecessary - if v.is_double_buffering: - continue + for b, vb in descriptors.items(): + for v in vb: + f = v.f + + if v.is_read: + # Special case: avoid initialization in the case of double (or + # multiple) buffering because it's completely unnecessary + if v.is_double_buffering: + continue - lhs = b.indexify()._subs(v.xd, v.first_idx.b) - rhs = f.indexify()._subs(v.dim, v.first_idx.f) + lhs = b.indexify()._subs(v.xd, v.first_idx.b) + rhs = f.indexify()._subs(v.dim, v.first_idx.f) - elif v.is_write and init_onwrite(f): - lhs = b.indexify() - rhs = S.Zero + elif v.is_write and init_onwrite(f): + lhs = b.indexify() + rhs = S.Zero - else: - continue + else: + continue - expr = Eq(lhs, rhs) - expr = lower_exprs(expr) + expr = Eq(lhs, rhs) + expr = lower_exprs(expr) - ispace = v.write_to + ispace = v.write_to - guards = {} - guards[None] = GuardBound(v.dim.root.symbolic_min, v.dim.root.symbolic_max) - if v.is_read: - guards[v.xd] = GuardBound(0, v.first_idx.f) + guards = {} + guards[None] = GuardBound(v.dim.root.symbolic_min, v.dim.root.symbolic_max) + if v.is_read: + guards[v.xd] = GuardBound(0, v.first_idx.f) - properties = Properties() - properties = properties.affine(ispace.itdims) - properties = properties.parallelize(ispace.itdims) + properties = Properties() + properties = properties.affine(ispace.itdims) + properties = properties.parallelize(ispace.itdims) - syncs = {None: [InitArray(None, b)]} + syncs = {None: [InitArray(None, b)]} - init.append(Cluster(expr, ispace, guards, properties, syncs)) + init.append(Cluster(expr, ispace, guards, properties, syncs)) return init diff --git a/devito/types/dimension.py b/devito/types/dimension.py index 41c1f90b88..f5bb1e3757 100644 --- a/devito/types/dimension.py +++ b/devito/types/dimension.py @@ -912,6 +912,45 @@ class ConditionalDimension(DerivedDimension): } } + A ConditionalDimension may also be combined with other ConditionalDimensions + appearing in the same equation (for instance via ``implicit_dims``). The + ``relation`` argument controls how the individual conditions are merged: + + * ``And`` (default): all conditions must hold (mutually exclusive merging). + * ``Or``: any one condition is enough for the if-branch to fire. + * ``'strict'``: this condition takes precedence over every other condition + attached to the same equation. At most one strict condition is allowed + per equation. + + The example below sets every even ``x`` index to 1 via subsampling, and + additionally forces the right ``x`` edge to be written by combining the two + conditions with ``Or``. + + >>> from sympy import Or + >>> from devito import Grid, CondEq + >>> grid = Grid(shape=(10, 10)) + >>> x, y = grid.dimensions + >>> c_even = ConditionalDimension(name='ceven', parent=x, factor=2, relation=Or) + >>> c_edge = ConditionalDimension(name='cedge', parent=x, + ... condition=CondEq(x, x.symbolic_max), + ... relation=Or) + >>> f = Function(name='f', grid=grid, dimensions=(c_even, y), + ... shape=(5, 10), space_order=0) + >>> op = Operator(Eq(f, 1, implicit_dims=c_edge)) + + The Operator generates the following for-loop (pseudocode), where the two + ``Or`` conditions are fused into a single guard. + + .. code-block:: C + + for (int x = x_m; x <= x_M; x += 1) { + if (x == x_M || x%cevenf == 0) { + for (int y = y_m; y <= y_M; y += 1) { + f[x / cevenf][y] = 1; + } + } + } + """ is_NonlinearDerived = True diff --git a/devito/types/relational.py b/devito/types/relational.py index 70c3e39103..08c4d8a144 100644 --- a/devito/types/relational.py +++ b/devito/types/relational.py @@ -38,7 +38,7 @@ class Le(AbstractRel, sympy.Le): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples @@ -73,7 +73,7 @@ class Lt(AbstractRel, sympy.Lt): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples @@ -108,7 +108,7 @@ class Ge(AbstractRel, sympy.Ge): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples @@ -143,7 +143,7 @@ class Gt(AbstractRel, sympy.Gt): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples @@ -178,7 +178,7 @@ class Ne(AbstractRel, sympy.Ne): rhs : expr-like, optional The right-hand side. Defaults to 0. subdomain : SubDomain, optional - To restrict the evalaution of the relation to a particular sub-region in the + To restrict the evaluation of the relation to a particular sub-region in the computational domain. Examples diff --git a/examples/userapi/05_conditional_dimension.ipynb b/examples/userapi/05_conditional_dimension.ipynb index f0a7a1b531..a8e628545b 100644 --- a/examples/userapi/05_conditional_dimension.ipynb +++ b/examples/userapi/05_conditional_dimension.ipynb @@ -231,6 +231,45 @@ " }\n", " }\n", "\n", + " A ConditionalDimension may also be combined with other ConditionalDimensions\n", + " appearing in the same equation (for instance via ``implicit_dims``). The\n", + " ``relation`` argument controls how the individual conditions are merged:\n", + "\n", + " * ``And`` (default): all conditions must hold (mutually exclusive merging).\n", + " * ``Or``: any one condition is enough for the if-branch to fire.\n", + " * ``'strict'``: this condition takes precedence over every other condition\n", + " attached to the same equation. At most one strict condition is allowed\n", + " per equation.\n", + "\n", + " The example below sets every even ``x`` index to 1 via subsampling, and\n", + " additionally forces the right ``x`` edge to be written by combining the two\n", + " conditions with ``Or``.\n", + "\n", + " >>> from sympy import Or\n", + " >>> from devito import Grid, CondEq\n", + " >>> grid = Grid(shape=(10, 10))\n", + " >>> x, y = grid.dimensions\n", + " >>> c_even = ConditionalDimension(name='ceven', parent=x, factor=2, relation=Or)\n", + " >>> c_edge = ConditionalDimension(name='cedge', parent=x,\n", + " ... condition=CondEq(x, x.symbolic_max),\n", + " ... relation=Or)\n", + " >>> f = Function(name='f', grid=grid, dimensions=(c_even, y),\n", + " ... shape=(5, 10), space_order=0)\n", + " >>> op = Operator(Eq(f, 1, implicit_dims=c_edge))\n", + "\n", + " The Operator generates the following for-loop (pseudocode), where the two\n", + " ``Or`` conditions are fused into a single guard.\n", + "\n", + " .. code-block:: C\n", + "\n", + " for (int x = x_m; x <= x_M; x += 1) {\n", + " if (x == x_M || x%cevenf == 0) {\n", + " for (int y = y_m; y <= y_M; y += 1) {\n", + " f[x / cevenf][y] = 1;\n", + " }\n", + " }\n", + " }\n", + "\n", " \n" ] } @@ -566,13 +605,6 @@ "execution_count": 9, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Operator `Kernel` ran in 0.01 s\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -589,7 +621,20 @@ " }\n", " }\n", "}\n", - "STOP(section0,timers)\n", + "STOP(section0,timers)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Operator `Kernel` ran in 0.01 s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", " Data in g \n", " [ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15.]\n", @@ -833,7 +878,7 @@ "source": [ "# Example C: Combining ConditionalDimension\n", "\n", - "In some cases, a `ConditionalDimension` might be used in combination with an implicit_dim to handle specific cases. This combination can be made mutually exclusive (And) or inclusive (Or).\n", + "In some cases, a `ConditionalDimension` might be used in combination with an implicit_dim to handle specific cases. This combination can be made mutually exclusive (And, default), inclusive (Or) or `strict`. When the relation is set to `strict` then this condition takes precedence over all other conditions. There cannot be more than one strict condition within an equation.\n", "\n", "As an example, let's consider the following case:\n", "\n", @@ -852,8 +897,12 @@ "\n", "x, y = grid.dimensions\n", "c_even = ConditionalDimension(name='ceven', parent=x, factor=2, relation=Or)\n", - "c_edge = ConditionalDimension(name='cedge', parent=grid.dimensions[0], condition=CondEq(x, x.symbolic_max), relation=Or)\n", - "c_edge_a = ConditionalDimension(name='cedge', parent=grid.dimensions[0], condition=CondEq(x, x.symbolic_max-1))\n", + "c_edge = ConditionalDimension(name='cedge', parent=grid.dimensions[0],\n", + " condition=CondEq(x, x.symbolic_max), relation=Or)\n", + "c_edge_a = ConditionalDimension(name='cedge', parent=grid.dimensions[0],\n", + " condition=CondEq(x, x.symbolic_max-1))\n", + "c_edge_s = ConditionalDimension(name='cedge', parent=grid.dimensions[0],\n", + " condition=CondEq(x, x.symbolic_max-2), relation='strict')\n", "\n", "f = Function(name=\"f\", grid=grid, dimensions=(c_even, y), shape=(5, 10), space_order=0)\n", "\n", @@ -943,7 +992,7 @@ "data": { "text/plain": [ "PerformanceSummary([(PerfKey(name='section0', rank=None),\n", - " PerfEntry(time=0.000124, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" + " PerfEntry(time=0.000153, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" ] }, "execution_count": 14, @@ -1062,7 +1111,7 @@ "data": { "text/plain": [ "PerformanceSummary([(PerfKey(name='section0', rank=None),\n", - " PerfEntry(time=0.000129, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" + " PerfEntry(time=0.00010899999999999999, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" ] }, "execution_count": 16, @@ -1101,6 +1150,130 @@ "assert np.all(f.data[-1, :] == 1)" ] }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Operator `Kernel` ran in 0.01 s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/* Devito generated code for Operator `Kernel` */\n", + "\n", + "#define _POSIX_C_SOURCE 200809L\n", + "#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);\n", + "#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;\n", + "\n", + "#include \"stdlib.h\"\n", + "#include \"math.h\"\n", + "#include \"sys/time.h\"\n", + "#include \"xmmintrin.h\"\n", + "#include \"pmmintrin.h\"\n", + "#include \"omp.h\"\n", + "\n", + "struct dataobj\n", + "{\n", + " void *restrict data;\n", + " int * size;\n", + " unsigned long nbytes;\n", + " unsigned long * npsize;\n", + " unsigned long * dsize;\n", + " int * hsize;\n", + " int * hofs;\n", + " int * oofs;\n", + " void * dmap;\n", + "} ;\n", + "\n", + "struct profiler\n", + "{\n", + " double section0;\n", + "} ;\n", + "\n", + "\n", + "int Kernel(struct dataobj *restrict f_vec, const int x_M, const int cevenf, const int x_m, const int y_M, const int y_m, const int nthreads, struct profiler * timers)\n", + "{\n", + " float (*restrict f)[f_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]]) f_vec->data;\n", + "\n", + " /* Flush denormal numbers to zero in hardware */\n", + " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);\n", + " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);\n", + "\n", + " START(section0)\n", + " #pragma omp parallel num_threads(nthreads)\n", + " {\n", + " #pragma omp for schedule(static,1)\n", + " for (int x = x_m; x <= x_M; x += 1)\n", + " {\n", + " if (x == x_M - 2)\n", + " {\n", + " #pragma omp simd aligned(f:64)\n", + " for (int y = y_m; y <= y_M; y += 1)\n", + " {\n", + " f[x / cevenf][y] = 1;\n", + " }\n", + " }\n", + " }\n", + " }\n", + " STOP(section0,timers)\n", + "\n", + " return 0;\n", + "}\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "PerformanceSummary([(PerfKey(name='section0', rank=None),\n", + " PerfEntry(time=0.000108, gflopss=0.0, gpointss=0.0, oi=0.0, ops=0, itershapes=[]))])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# NBVAL_IGNORE_OUTPUT\n", + "# In this case, since the edge condition is strict, it will be the only conditions\n", + "op = Operator(Eq(f, 1, implicit_dims=c_edge_s))\n", + "print(op)\n", + "f.data.fill(0)\n", + "op()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]\n" + ] + } + ], + "source": [ + "print(f.data)\n", + "assert np.all(f.data[:-2, :] == 0)\n", + "assert np.all(f.data[-1, :] == 0)\n", + "assert np.all(f.data[-2, :] == 1)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1114,7 +1287,7 @@ "metadata": { "celltoolbar": "Tags", "kernelspec": { - "display_name": "devitoenv (3.11.0)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/tests/test_buffering.py b/tests/test_buffering.py index 525ee72150..29984b7121 100644 --- a/tests/test_buffering.py +++ b/tests/test_buffering.py @@ -764,10 +764,10 @@ def test_multi_cond_v0(): ntmod = (nt - 1) * factor + 1 ct1 = ConditionalDimension(name="ct1", parent=grid.time_dim, - factor=factor, relation=Or) + factor=factor) ctend = ConditionalDimension(name="ctend", parent=grid.time_dim, condition=CondEq(grid.time_dim, ntmod - 2), - relation=Or) + relation='strict') f = TimeFunction(grid=grid, name='f', time_order=0, space_order=0, save=nt, time_dim=ct1) @@ -776,8 +776,9 @@ def test_multi_cond_v0(): eqs = [Eq(T, grid.time_dim)] # This saves # - All subsampled times since ct1 is the dimension of f + eqs.append(Eq(f, T)) # - The last time step (ntmod - 2) through ctend (since it's set as ct1 or ctend) - eqs.append(Eq(f, T, implicit_dims=ctend)) + eqs.append(Eq(f.forward, T, implicit_dims=ctend)) # run operator with buffering op = Operator(eqs, opt='buffering')