diff --git a/devito/ir/cgen/printer.py b/devito/ir/cgen/printer.py
index 3f87496c62..dec3792b31 100644
--- a/devito/ir/cgen/printer.py
+++ b/devito/ir/cgen/printer.py
@@ -212,7 +212,12 @@ def _print_Pow(self, expr):
         suffix = self.func_literal(expr)
         base = self._print(expr.base)
         if equal_valued(expr.exp, -1):
-            return self._print_Float(Float(1.0)) + '/' + \
+            # Pick the literal precision from this Pow's dtype rather than
+            # the printer default, so e.g. ``Pow(DOUBLE(h), -1)`` emits
+            # ``1.0/(double)h`` not ``1.0F/(double)h``. This branch only
+            # fires when the surrounding Mul printer chose not to group
+            # the Pow into a denominator (e.g. lone reciprocal).
+            return f'1.0{self.prec_literal(expr)}/' + \
                 self.parenthesize(expr.base, PREC)
         elif equal_valued(expr.exp, 0.5):
             return f'{self.ns(expr)}sqrt{suffix}({base})'
diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py
index cbf206b3ff..8d880501e2 100644
--- a/devito/ir/clusters/cluster.py
+++ b/devito/ir/clusters/cluster.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from devito.ir.equations import ClusterizedEq
+from devito.ir.equations import clusterize_eq
 from devito.ir.support import (
     PARALLEL, PARALLEL_IF_PVT, BaseGuardBoundNext, DataSpace, Forward, Guards, Interval,
     IntervalGroup, IterationSpace, PrefetchUpdate, Properties, Scope, WaitLock, WithLock,
@@ -50,7 +50,7 @@ class Cluster:
 
     def __init__(self, exprs, ispace=null_ispace, guards=None, properties=None,
                  syncs=None, halo_scheme=None):
-        self._exprs = tuple(ClusterizedEq(e, ispace=ispace) for e in as_tuple(exprs))
+        self._exprs = tuple(clusterize_eq(e, ispace=ispace) for e in as_tuple(exprs))
         self._ispace = ispace
         self._guards = Guards(guards or {})
         self._syncs = normalize_syncs(syncs or {})
diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py
index 8d72704b79..43445d47c9 100644
--- a/devito/ir/equations/equation.py
+++ b/devito/ir/equations/equation.py
@@ -1,4 +1,4 @@
-from functools import cached_property
+from functools import cached_property, singledispatch
 
 import numpy as np
 import sympy
@@ -11,17 +11,31 @@
 )
 from devito.symbolics import IntDiv, limits_mapper, uxreplace
 from devito.tools import Pickable, Tag, frozendict
-from devito.types import Eq, Inc, ReduceMax, ReduceMin, ReduceMinMax, relational_min
+from devito.types import (
+    Eq, Inc, IncrInterpolation, Injection, InjectionMixin, Interpolation,
+    InterpolationMixin, ReduceMax, ReduceMin, ReduceMinMax, SparseEq, SparseOpMixin,
+    relational_min
+)
 
 __all__ = [
     'ClusterizedEq',
+    'ClusterizedIncrInterpolation',
+    'ClusterizedInjection',
+    'ClusterizedInterpolation',
+    'ClusterizedSparseEq',
     'DummyEq',
     'LoweredEq',
+    'LoweredIncrInterpolation',
+    'LoweredInjection',
+    'LoweredInterpolation',
+    'LoweredSparseEq',
     'OpInc',
     'OpMax',
     'OpMin',
     'OpMinMax',
+    'clusterize_eq',
     'identity_mapper',
+    'lower_eq',
 ]
 
 
@@ -30,6 +44,8 @@ class IREq(sympy.Eq, Pickable):
     __rargs__ = ('lhs', 'rhs')
     __rkwargs__ = ('ispace', 'conditionals', 'implicit_dims', 'operation')
 
+    is_SparseOperation = False
+
     def _hashable_content(self):
         return (*super()._hashable_content(),
                 *tuple(getattr(self, i) for i in self.__rkwargs__))
@@ -115,16 +131,15 @@ class Operation(Tag):
 
     @classmethod
     def detect(cls, expr):
-        reduction_mapper = {
-            Inc: OpInc,
-            ReduceMax: OpMax,
-            ReduceMin: OpMin,
-            ReduceMinMax: OpMinMax
-        }
-        try:
-            return reduction_mapper[type(expr)]
-        except KeyError:
-            pass
+        reduction_mapper = (
+            (ReduceMinMax, OpMinMax),
+            (ReduceMin, OpMin),
+            (ReduceMax, OpMax),
+            (Inc, OpInc),
+        )
+        for kls, op in reduction_mapper:
+            if isinstance(expr, kls):
+                return op
 
         # NOTE: in the future we might want to track down other kinds
         # of operations here (e.g., memcpy). However, we don't care for
@@ -204,8 +219,9 @@ def __new__(cls, *args, **kwargs):
         accesses = detect_accesses(expr)
         dimensions = Stencil.union(*accesses.values())
 
-        # Separate out the SubIterators from the main iteration Dimensions, that
-        # is those which define an actual iteration space
+        # Separate out the SubIterators from the main iteration
+        # Dimensions, that is those which define an actual
+        # iteration space
         iterators = {}
         for d in dimensions:
             if d.is_SubIterator:
@@ -271,6 +287,43 @@ def func(self, *args):
         return self._rebuild(*args, evaluate=False)
 
 
+class LoweredSparseEq(SparseOpMixin, LoweredEq):
+
+    """
+    The IR counterpart of ``SparseEq``: a regular ``LoweredEq`` that
+    also carries the ``interpolator`` metadata used by the IET pass
+    ``lower_sparse_ops`` to wrap the resulting ``p_*, rp_*`` iteration
+    nest in an ElementalFunction. Subclassed by
+    ``LoweredInterpolation`` / ``LoweredIncrInterpolation`` /
+    ``LoweredInjection`` for the per-operation polymorphic behaviour.
+    """
+
+    __rkwargs__ = LoweredEq.__rkwargs__ + ('interpolator',)
+
+
+class LoweredInterpolation(InterpolationMixin, LoweredSparseEq):
+    """IR counterpart of ``Interpolation``."""
+    pass
+
+
+class LoweredIncrInterpolation(InterpolationMixin, LoweredSparseEq):
+    """IR counterpart of ``IncrInterpolation``."""
+    pass
+
+
+class LoweredInjection(InjectionMixin, LoweredSparseEq):
+    """IR counterpart of ``Injection``."""
+    pass
+
+
+# Map user-level sparse-op classes to their IR-level counterparts.
+_lowered_sparse_cls = {
+    Interpolation: LoweredInterpolation,
+    IncrInterpolation: LoweredIncrInterpolation,
+    Injection: LoweredInjection,
+}
+
+
 class ClusterizedEq(IREq):
 
     """
@@ -326,6 +379,41 @@ def __new__(cls, *args, **kwargs):
     func = IREq._rebuild
 
 
+class ClusterizedSparseEq(SparseOpMixin, ClusterizedEq):
+
+    """
+    Frozen counterpart of ``LoweredSparseEq``: the same regular
+    ``ClusterizedEq`` augmented with ``interpolator`` so the IET pass
+    ``lower_sparse_ops`` can identify and rewrite the sparse op's
+    iteration nest. Subclassed by ``ClusterizedInterpolation`` /
+    ``ClusterizedIncrInterpolation`` / ``ClusterizedInjection``.
+    """
+
+    __rkwargs__ = ClusterizedEq.__rkwargs__ + ('interpolator',)
+
+
+class ClusterizedInterpolation(InterpolationMixin, ClusterizedSparseEq):
+    """Frozen counterpart of ``LoweredInterpolation``."""
+    pass
+
+
+class ClusterizedIncrInterpolation(InterpolationMixin, ClusterizedSparseEq):
+    """Frozen counterpart of ``LoweredIncrInterpolation``."""
+    pass
+
+
+class ClusterizedInjection(InjectionMixin, ClusterizedSparseEq):
+    """Frozen counterpart of ``LoweredInjection``."""
+    pass
+
+
+_clusterized_sparse_cls = {
+    LoweredInterpolation: ClusterizedInterpolation,
+    LoweredIncrInterpolation: ClusterizedIncrInterpolation,
+    LoweredInjection: ClusterizedInjection,
+}
+
+
 class DummyEq(ClusterizedEq):
 
     """
@@ -345,3 +433,62 @@ def __new__(cls, *args, **kwargs):
         else:
             raise ValueError(f"Cannot construct DummyEq from args={str(args)}")
         return ClusterizedEq.__new__(cls, obj, ispace=obj.ispace)
+
+
+@singledispatch
+def lower_eq(eq):
+    """
+    Promote a user-level ``Eq`` to its ``LoweredEq`` counterpart, ready
+    for the cluster pipeline. The dispatch matches the dynamic type of
+    ``eq``; ``SparseEq`` and friends get their own branch.
+    """
+    return LoweredEq(eq)
+
+
+@lower_eq.register(SparseEq)
+def _(eq):
+    # Augment ``implicit_dims`` with the SparseFunction's own iteration
+    # Dimensions (e.g. ``p_sf`` and any extra SparseFunction dims) so
+    # the cluster scheduler sees them. Grid Dimensions reached through
+    # the rhs Function are deliberately *not* added: SubDomain-derived
+    # SubDimensions would otherwise spuriously appear in the
+    # IterationSpace, and grid Dimensions are already discovered via
+    # the radius ConditionalDimensions in the rhs.
+    interp = eq.interpolator
+    sf_dims = tuple(interp.sfunction.dimensions)
+    user = tuple(eq.implicit_dims or ())
+    if interp.sfunction._sparse_position == -1:
+        augmented = sf_dims + user
+    else:
+        augmented = user + sf_dims
+
+    if augmented != tuple(eq.implicit_dims or ()):
+        eq = eq.func(eq.lhs, eq.rhs, interpolator=interp,
+                     implicit_dims=augmented)
+
+    lowered_cls = _lowered_sparse_cls[type(eq)]
+    obj = lowered_cls(eq)
+    obj._interpolator = interp
+    return obj
+
+
+@singledispatch
+def clusterize_eq(eq, **kwargs):
+    """
+    Freeze a ``LoweredEq`` into its ``ClusterizedEq`` counterpart,
+    suitable for use in a ``Cluster``. Subclasses with extra payload
+    (e.g. ``LoweredSparseEq``) dispatch to their frozen counterpart.
+    """
+    return ClusterizedEq(eq, **kwargs)
+
+
+@clusterize_eq.register(LoweredSparseEq)
+def _(eq, **kwargs):
+    return _clusterized_sparse_cls[type(eq)](eq, **kwargs)
+
+
+@clusterize_eq.register(ClusterizedSparseEq)
+def _(eq, **kwargs):
+    # ``eq`` is already clusterized; rebuild via its own class to preserve
+    # the per-operation polymorphic behaviour.
+    return type(eq)(eq, **kwargs)
diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py
index 3b4bcb5bc8..884460d44c 100644
--- a/devito/mpi/halo_scheme.py
+++ b/devito/mpi/halo_scheme.py
@@ -134,7 +134,6 @@ class HaloScheme:
     """
 
     def __init__(self, exprs, ispace):
-        # Derive the halo exchanges
         self._mapper = frozendict(classify(exprs, ispace))
 
         # Track the IterationSpace offsets induced by SubDomains/SubDimensions,
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 6467e65df4..8081312a84 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -1,7 +1,6 @@
 from abc import ABC, abstractmethod
 from contextlib import suppress
 from functools import cached_property, wraps
-from itertools import groupby
 
 import numpy as np
 import sympy
@@ -15,8 +14,11 @@
 from devito.finite_differences.elementary import floor
 from devito.logger import warning
 from devito.symbolics import INT, retrieve_function_carriers, retrieve_functions
-from devito.tools import Pickable, as_tuple, filter_ordered, flatten, memoized_meth
-from devito.types import Eq, Evaluable, Inc, SubFunction, Symbol
+from devito.symbolics.extended_dtypes import DOUBLE
+from devito.tools import as_tuple, filter_ordered, memoized_meth
+from devito.types import (
+    Eq, Inc, IncrInterpolation, Injection, Interpolation, SubFunction, Symbol
+)
 from devito.types.utils import DimensionTuple
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator', 'SincInterpolator']
@@ -81,106 +83,41 @@ def _extract_subdomain(variables):
     return None
 
 
-class UnevaluatedSparseOperation(sympy.Expr, Evaluable, Pickable):
-
+def _build_interpolation(expr, increment, implicit_dims, self_subs, interpolator):
     """
-    Represents an Injection or an Interpolation operation performed on a
-    SparseFunction. Evaluates to a list of Eq objects.
-
-    Parameters
-    ----------
-    interpolator : Interpolator
-        Interpolator object that will be used to evaluate the operation.
-    callback : callable
-        A routine generating the symbolic expressions for the operation.
+    Construct the sparse-op Eq for an interpolation: the synthetic Eq
+    is ``Eq(sf[..., p_*], expr[..., rp_*])``; with ``increment`` it is
+    an ``Inc``. User-supplied ``implicit_dims`` are carried as-is; the
+    SparseFunction's iteration Dimensions are augmented in by
+    ``lower_eq`` so the cluster pipeline sees them.
     """
+    eq = interpolator._interpolate(expr=expr, increment=increment,
+                                   self_subs=self_subs,
+                                   implicit_dims=None)
+    cls = IncrInterpolation if isinstance(eq, Inc) else Interpolation
+    return cls(eq.lhs, eq.rhs, interpolator=interpolator,
+               implicit_dims=implicit_dims)
 
-    subdomain = None
-    __rargs__ = ('interpolator',)
-
-    def __new__(cls, interpolator):
-        obj = super().__new__(cls)
-
-        obj.interpolator = interpolator
-
-        return obj
-
-    def _evaluate(self, **kwargs):
-        return_value = self.operation(**kwargs)
-        assert(all(isinstance(i, Eq) for i in return_value))
-        return return_value
-
-    @abstractmethod
-    def operation(self, **kwargs):
-        pass
-
-    def __add__(self, other):
-        return flatten([self, other])
-
-    def __radd__(self, other):
-        return flatten([other, self])
-
-
-class Interpolation(UnevaluatedSparseOperation):
-
-    """
-    Represents an Interpolation operation performed on a SparseFunction.
-    Evaluates to a list of Eq objects.
-    """
-
-    __rargs__ = ('expr', 'increment', 'implicit_dims', 'self_subs') + \
-        UnevaluatedSparseOperation.__rargs__
-
-    def __new__(cls, expr, increment, implicit_dims, self_subs, interpolator):
-        obj = super().__new__(cls, interpolator)
-
-        # TODO: unused now, but will be necessary to compute the adjoint
-        obj.expr = expr
-        obj.increment = increment
-        obj.self_subs = self_subs
-        obj.implicit_dims = implicit_dims
-
-        return obj
-
-    def operation(self, **kwargs):
-        return self.interpolator._interpolate(expr=self.expr, increment=self.increment,
-                                              self_subs=self.self_subs,
-                                              implicit_dims=self.implicit_dims)
-
-    def __repr__(self):
-        return (f"Interpolation({repr(self.expr)} into "
-                f"{repr(self.interpolator.sfunction)})")
-
-    __str__ = __repr__
-
-
-class Injection(UnevaluatedSparseOperation):
 
+def _build_injection(field, expr, implicit_dims, interpolator):
     """
-    Represents an Injection operation performed on a SparseFunction.
-    Evaluates to a list of Eq objects.
+    Construct the ``Injection``(s) for an injection: each synthetic Eq
+    is ``Inc(field[..., x, y, ...], weights * expr[..., rp_*])``
+    produced by ``interpolator._inject``. A multi-field injection
+    expands into one ``Injection`` per ``(field, expr)`` pair so each
+    target field is individually visible to the cluster pipeline.
+    User-supplied ``implicit_dims`` are carried as-is; sparse-function
+    iteration Dimensions are augmented in by ``lower_eq``.
     """
-
-    __rargs__ = ('field', 'expr', 'implicit_dims') + UnevaluatedSparseOperation.__rargs__
-
-    def __new__(cls, field, expr, implicit_dims, interpolator):
-        obj = super().__new__(cls, interpolator)
-
-        # TODO: unused now, but will be necessary to compute the adjoint
-        obj.field = field
-        obj.expr = expr
-        obj.implicit_dims = implicit_dims
-
-        return obj
-
-    def operation(self, **kwargs):
-        return self.interpolator._inject(expr=self.expr, field=self.field,
-                                         implicit_dims=self.implicit_dims)
-
-    def __repr__(self):
-        return f"Injection({repr(self.expr)} into {repr(self.field)})"
-
-    __str__ = __repr__
+    fields, exprs = as_tuple(field), as_tuple(expr)
+    if len(exprs) == 1:
+        exprs = tuple(exprs[0] for _ in fields)
+    eqs = []
+    for (f, e) in zip(fields, exprs, strict=True):
+        inc = interpolator._inject(field=f, expr=e, implicit_dims=None)
+        eqs.append(Injection(inc.lhs, inc.rhs, interpolator=interpolator,
+                             implicit_dims=implicit_dims))
+    return eqs[0] if len(eqs) == 1 else eqs
 
 
 class GenericInterpolator(ABC):
@@ -265,14 +202,21 @@ def _rdim(self, subdomain=None, shifts=None):
         else:
             gdims = self._gdims
 
-        # Make radius dimension conditional to avoid OOB
+        # Make the radius dimensions conditional to avoid OOB accesses.
+        # ``rd ∈ [-r+1, r]`` and the access is ``field[pos + rd]``, so
+        # the OOB guard on ``rd`` reads ``pos + rd ∈ [d_min - r, d_max + r]``.
         rdims = []
-        pos = self.sfunction._position_map(shifts=shifts).values()
-
-        for (d, rd, p) in zip(gdims, self._cdim, pos, strict=True):
-            # Add conditional to avoid OOB
-            lb = sympy.And(rd + p >= d.symbolic_min - self.r, evaluate=False)
-            ub = sympy.And(rd + p <= d.symbolic_max + self.r, evaluate=False)
+        pos_symbols = self.sfunction._pos_symbols(shifts=shifts)
+
+        for d in gdims:
+            # The radius CustomDimension is keyed on the grid Dimension
+            # (``d.root``) so a SubDomain-restricted operation reuses
+            # the same ``rp_*`` dim as the full-grid case; only the
+            # bounds carry the SubDomain's symbolic_min/max.
+            rd = self.sfunction._crdim(d.root)
+            pos = pos_symbols[d.root]
+            lb = sympy.And(pos + rd >= d.symbolic_min - self.r, evaluate=False)
+            ub = sympy.And(pos + rd <= d.symbolic_max + self.r, evaluate=False)
             cond = sympy.And(lb, ub, evaluate=False)
 
             # Insert a check to catch cases where interpolation/injection is
@@ -314,42 +258,91 @@ def _augment_implicit_dims(self, implicit_dims, extras=None):
     def _coeff_temps(self, implicit_dims, shifts=None):
         return []
 
-    def _positions(self, implicit_dims, shifts=None):
-        return [Eq(v, INT(floor(k)), implicit_dims=implicit_dims)
-                for k, v in self.sfunction._position_map(shifts=shifts).items()]
-
-    def _interp_idx(self, variables, implicit_dims=None, subdomain=None,
-                    shifts=None):
+    @memoized_meth
+    def _raw_pos_symbols(self, shifts=None):
+        """
+        Per-Dimension Symbol holding the unrounded grid-relative position
+        ``(coord - origin - shift)/h``. Both the integer position
+        (``floor(...)``) and the linear-interp fractional part
+        (``... - floor(...)``) reuse this Symbol so the divide-and-shift
+        expression is emitted only once per sparse point.
         """
-        Generate interpolation indices for the DiscreteFunctions in `variables`.
+        dtype = self.sfunction.coordinates.dtype
+        symbols = []
+        for d, s in zip(self.grid.dimensions,
+                        shifts or (0,) * len(self.grid.dimensions),
+                        strict=True):
+            suffix = '_s1' if s != 0 else ''
+            symbols.append(Symbol(name=f'rpos{d}{suffix}', dtype=dtype))
+        return DimensionTuple(*symbols, getters=self.grid.dimensions)
 
-        `shifts` is a per-Dimension physical offset for the target field's
-        origin: it only affects the integer position symbol via the position
-        map (`pos = floor((c - o - shift)/h)`). The index substitution itself
-        is unchanged — any staggered offset in a field's own symbolic access is
-        absorbed by Devito's normal indexing.
+    def _positions(self, implicit_dims, shifts=None):
+        # The ``(coord - origin)/h`` subtract is the only step that can lose
+        # precision to catastrophic cancellation when ``coord`` and ``origin``
+        # are large and close to each other (e.g. an origin-shifted survey).
+        # Promote ``origin`` and ``h`` to float64 so the subtract and divide
+        # happen in double precision in C (one cast operand promotes the
+        # whole expression); the result narrows to the field dtype on store
+        # to ``rpos*`` so downstream ``floor`` / fractional math stays in
+        # the field dtype.
+        rposs = self._raw_pos_symbols(shifts=shifts)
+        subs = {o: DOUBLE(o) for o in self.grid.origin_symbols}
+        subs.update({d.spacing: DOUBLE(d.spacing) for d in self._gdims})
+        return [Eq(rposs[d], k.xreplace(subs), implicit_dims=implicit_dims)
+                for d, k in zip(self._gdims,
+                                self.sfunction._position_map(shifts=shifts),
+                                strict=True)] + \
+               [Eq(v, INT(floor(rposs[d])), implicit_dims=implicit_dims)
+                for d, v in zip(self._gdims,
+                                self.sfunction._position_map(shifts=shifts).values(),
+                                strict=True)]
+
+    def sparse_temps(self, rhs, implicit_dims, field=None):
         """
-        pos = self.sfunction._position_map(shifts=shifts).values()
+        Position/coefficient temps for a sparse op with right-hand side
+        ``rhs``. For an injection, ``field`` drives the per-Dimension
+        shifts so the temps' lhs (``pos*`` symbols) match the rhs of a
+        staggered injection; for an interpolation, ``field`` is None
+        and no shifts are applied.
+        """
+        if field is not None:
+            extras = [field] + list(retrieve_function_carriers(rhs))
+            shifts = self._field_shifts(field)
+        else:
+            extras = list(retrieve_function_carriers(rhs)) or None
+            shifts = None
 
-        # Temporaries for the position
-        temps = self._positions(implicit_dims, shifts=shifts)
+        implicit_dims = self._augment_implicit_dims(implicit_dims, extras=extras)
+        return list(self._positions(implicit_dims, shifts=shifts)) + \
+            list(self._coeff_temps(implicit_dims, shifts=shifts))
 
-        # Coefficient symbol expression
-        temps.extend(self._coeff_temps(implicit_dims, shifts=shifts))
+    def _interp_idx(self, variables, subdomain=None, shifts=None):
+        """
+        Generate the indirect-access index substitutions for the
+        DiscreteFunctions in ``variables``. Each grid Dimension ``x``
+        is replaced with ``pos_x + rd_x``, where ``rd_x`` is the
+        StencilDimension for the radius and ``pos_x`` is the per-point
+        position offset; together they realise the radius-neighbourhood
+        access ``field[pos_x + rd_x, pos_y + rd_y]``.
+
+        ``shifts`` is a per-Dimension physical offset for the target
+        field's origin (e.g. ``h_x/2`` for a field staggered in ``x``);
+        it only affects the integer position symbol via the position
+        map (``pos = floor((c - o - shift)/h)``).
+        """
+        pos = self.sfunction._position_map(shifts=shifts).values()
 
         # Substitution mapper for variables
         mapper = self._rdim(subdomain=subdomain, shifts=shifts).getters
 
         # Index substitution to make in variables
         subs = {
-            ki: c + p
+            ki: p + c
             for ((k, c), p) in zip(mapper.items(), pos, strict=True)
             for ki in {k, k.root}
         }
 
-        idx_subs = {v: v.subs(subs) for v in variables}
-
-        return idx_subs, temps
+        return {v: v.subs(subs) for v in variables}
 
     @check_radius
     @check_coords
@@ -370,7 +363,7 @@ def interpolate(self, expr, increment=False, self_subs=None, implicit_dims=None)
         """
         if self_subs is None:
             self_subs = {}
-        return Interpolation(expr, increment, implicit_dims, self_subs, self)
+        return _build_interpolation(expr, increment, implicit_dims, self_subs, self)
 
     @check_radius
     @check_coords
@@ -389,125 +382,83 @@ def inject(self, field, expr, implicit_dims=None):
             injection expression, but that should be honored when constructing
             the operator.
         """
-        return Injection(field, expr, implicit_dims, self)
+        return _build_injection(field, expr, implicit_dims, self)
 
     def _interpolate(self, expr, increment=False, self_subs=None, implicit_dims=None):
         """
-        Generate equations interpolating an arbitrary expression into `self`.
+        Build the synthetic single Eq for an interpolation:
 
-        Parameters
-        ----------
-        expr : expr-like
-            Input expression to interpolate.
-        increment: bool, optional
-            If True, generate increments (Inc) rather than assignments (Eq).
-        implicit_dims : Dimension or list of Dimension, optional
-            An ordered list of Dimensions that do not explicitly appear in the
-            interpolation expression, but that should be honored when constructing
-            the operator.
+            Eq(sf[..., p_*], expr[..., rp_*])         # or Inc when increment=True
+
+        The grid Dimensions inside ``expr`` are substituted for the
+        radius ConditionalDimensions ``rp_*``, whose parent is the
+        original grid Dimension. The cluster scheduler therefore derives
+        an IterationSpace ``(..., p_*, rp_*)``; the IET pass
+        ``lower_sparse_ops`` later wraps that nest in an
+        ElementalFunction and inserts the position/weight temps and
+        accumulator inside it.
         """
-        # Derivatives must be evaluated before the introduction of indirect accesses
-        with suppress(AttributeError):
-            expr = expr._eval_at(self.sfunction).evaluate
+        # Derivatives must be evaluated before the introduction of indirect accesses.
+        # CSE will pick up any shared subexpression.
+        try:
+            _expr = expr._eval_at(self.sfunction).evaluate
+        except AttributeError:
+            _expr = expr
 
         if self_subs is None:
             self_subs = {}
 
-        variables = list(retrieve_function_carriers(expr))
+        variables = list(retrieve_function_carriers(_expr))
         subdomain = _extract_subdomain(variables)
 
-        # Implicit dimensions
         implicit_dims = self._augment_implicit_dims(implicit_dims, variables)
+        idx_subs = self._interp_idx(variables, subdomain=subdomain)
 
-        # List of indirection indices for all adjacent grid points
-        idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims,
-                                           subdomain=subdomain)
-
-        # Accumulate point-wise contributions into a temporary
-        rhs = Symbol(name=f'sum{self.sfunction.name}', dtype=self.sfunction.dtype)
-        summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
-        # Substitute coordinate base symbols into the interpolation coefficients
-        weights = self._weights(subdomain=subdomain)
-        summands.extend([Inc(rhs, (weights * expr).xreplace(idx_subs),
-                             implicit_dims=implicit_dims)])
-
-        # Write/Incr `self`
         lhs = self.sfunction.subs(self_subs)
-        ecls = Inc if increment else Eq
-        last = [ecls(lhs, rhs, implicit_dims=implicit_dims)]
+        rhs = _expr.xreplace(idx_subs)
 
-        return temps + summands + last
+        ecls = Inc if increment else Eq
+        return ecls(lhs, rhs, implicit_dims=implicit_dims)
 
     def _inject(self, field, expr, implicit_dims=None):
         """
-        Generate equations injecting an arbitrary expression into a field.
+        Build the synthetic single Inc for an injection:
 
-        Parameters
-        ----------
-        field : Function
-            Input field into which the injection is performed.
-        expr : expr-like
-            Injected expression.
-        implicit_dims : Dimension or list of Dimension, optional
-            An ordered list of Dimensions that do not explicitly appear in the
-            injection expression, but that should be honored when constructing
-            the operator.
-        """
-        # Make iterable to support inject((u, v), expr=expr)
-        # or inject((u, v), expr=(expr1, expr2))
-        fields, exprs = as_tuple(field), as_tuple(expr)
-
-        # Provide either one expr per field or on expr for all fields
-        if len(fields) > 1:
-            if len(exprs) == 1:
-                exprs = tuple(exprs[0] for _ in fields)
-            else:
-                assert len(exprs) == len(fields)
-
-        subdomain = _extract_subdomain(fields)
+            Inc(field[..., pos_x + rd_x, ...], weights(rd_*) * expr[..., pos + rd_*])
 
+        Both lhs and rhs share the radius-indexed access pattern so the
+        cluster scheduler derives the same ``(..., p_*, rd_*)``
+        IterationSpace whether the operation is interpolation or
+        injection. The IET pass ``lower_sparse_ops`` wraps the resulting
+        nest in an ElementalFunction.
+        """
         # Derivatives must be evaluated before the introduction of indirect
         # accesses. Variables are sampled at their own grid location; the
         # position map for the target field carries the staggering so the
         # field's stencil neighbors land on the right indices.
-        with suppress(AttributeError):
-            exprs = tuple(e._eval_at(f).evaluate
-                          for e, f in zip(exprs, fields, strict=True))
+        try:
+            _expr = expr._eval_at(field).evaluate
+        except AttributeError:
+            _expr = expr
 
-        eqns = []
-        temps = []
-        # We need to create one set of equations (temps and and coeffs) per staggering
-        # field in which we inject as the reference index depends on the field's origin
-        for _, g in groupby(zip(fields, exprs, strict=True), lambda f: f[0].staggered):
-            g_fields, g_exprs = zip(*g, strict=True)
-            variables = list(v for e in g_exprs for v in retrieve_function_carriers(e))
+        subdomain = _extract_subdomain((field,))
 
-            implicit_dims = self._augment_implicit_dims(implicit_dims, variables)
+        variables = list(retrieve_function_carriers(_expr))
 
-            # All fields in a single injection share the same staggering by
-            # construction (they are written together at the same indices), so
-            # derive shifts from the first field.
-            shifts = self._field_shifts(g_fields[0])
+        implicit_dims = self._augment_implicit_dims(implicit_dims,
+                                                    (field,) + tuple(variables))
 
-            # Move all temporaries inside inner loop to improve parallelism
-            # Can only be done for inject as interpolation needs a summing temp
-            # that wouldn't allow collapsing
-            implicit_dims = implicit_dims + tuple(r.parent for r in
-                                                  self._rdim(subdomain=subdomain,
-                                                             shifts=shifts))
+        # The reference index depends on the field's origin (staggering).
+        shifts = self._field_shifts(field)
 
-            # List of indirection indices for all adjacent grid points
-            idx_subs, _temps = self._interp_idx(list(g_fields) + variables,
-                                                implicit_dims=implicit_dims,
-                                                subdomain=subdomain, shifts=shifts)
+        idx_subs = self._interp_idx((field,) + tuple(variables),
+                                    subdomain=subdomain, shifts=shifts)
 
-            w = self._weights(subdomain=subdomain, shifts=shifts)
-            temps.extend(_temps)
-            eqns.extend([Inc(f.xreplace(idx_subs), (w * e).xreplace(idx_subs),
-                             implicit_dims=implicit_dims)
-                         for f, e in zip(g_fields, g_exprs, strict=True)])
+        weights = self._weights(subdomain=subdomain, shifts=shifts)
+        lhs = field.xreplace(idx_subs)
+        rhs = (weights * _expr).xreplace(idx_subs)
 
-        return filter_ordered(temps) + eqns
+        return Inc(lhs, rhs, implicit_dims=implicit_dims)
 
 
 class LinearInterpolator(WeightedInterpolator):
@@ -543,13 +494,14 @@ def _point_symbols(self, shifts=None):
         return DimensionTuple(*symbols, getters=self.grid.dimensions)
 
     def _coeff_temps(self, implicit_dims, shifts=None):
-        # Positions
-        pmap = self.sfunction._position_map(shifts=shifts)
+        # The fractional part of the unrounded position; reuse the
+        # ``rpos*`` Symbols emitted by ``_positions`` rather than the full
+        # ``(c - o)/h`` expression so the divide is computed only once.
+        rposs = self._raw_pos_symbols(shifts=shifts)
         psyms = self._point_symbols(shifts)
-        poseq = [Eq(psyms[d], pos - floor(pos),
-                    implicit_dims=implicit_dims)
-                 for (d, pos) in zip(self._gdims, pmap.keys(), strict=True)]
-        return poseq
+        return [Eq(psyms[d], rposs[d] - floor(rposs[d]),
+                   implicit_dims=implicit_dims)
+                for d in self._gdims]
 
 
 class PrecomputedInterpolator(WeightedInterpolator):
@@ -580,7 +532,10 @@ def interpolation_coeffs(self):
     @memoized_meth
     def _weights(self, subdomain=None, shifts=None):
         ddim, cdim = self.interpolation_coeffs.dimensions[1:]
-        mappers = [{ddim: ri, cdim: rd-rd.parent.symbolic_min}
+        # The radius CustomDim spans ``[-r+1, r]``; the coefficients
+        # table is indexed from 0 to ``2r-1``, so shift by ``r-1``.
+        offset = self.r - 1
+        mappers = [{ddim: ri, cdim: rd + offset}
                    for (ri, rd) in enumerate(self._rdim(subdomain=subdomain,
                                                         shifts=shifts))]
         return Mul(*[self.interpolation_coeffs.subs(mapper)
@@ -617,12 +572,13 @@ def __init__(self, sfunction):
     def interpolation_coeffs(self):
         coeffs = []
         shape = (self.sfunction.npoint, 2 * self.r)
-        for r in self._cdim:
-            dimensions = (self.sfunction._sparse_dim, r)
-            sf = SubFunction(name=f"wsinc{r.name}", dtype=self.sfunction.dtype,
+        for d in self._gdims:
+            rd = self.sfunction._crdim(d)
+            dimensions = (self.sfunction._sparse_dim, rd)
+            sf = SubFunction(name=f"wsinc{rd.name}", dtype=self.sfunction.dtype,
                              shape=shape, dimensions=dimensions,
                              space_order=0, alias=self.sfunction.alias,
-                             parent=None)
+                             parent=self.sfunction)
             coeffs.append(sf)
         return tuple(coeffs)
 
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index bfba76b593..c2b2e32ede 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -19,7 +19,7 @@
     CompilationError, ExecutionError, InvalidArgument, InvalidOperator
 )
 from devito.ir.clusters import ClusterGroup, clusterize
-from devito.ir.equations import LoweredEq, concretize_subdims, lower_exprs
+from devito.ir.equations import concretize_subdims, lower_eq, lower_exprs
 from devito.ir.iet import (
     Callable, CInterface, DeviceFunction, EntryFunction, FindSymbols, MetaCall,
     derive_parameters, iet_build
@@ -33,7 +33,7 @@
 from devito.parameters import configuration
 from devito.passes import (
     Graph, error_mapper, generate_implicit, generate_macros, is_on_device, lower_dtypes,
-    lower_index_derivatives, minimize_symbols, optimize_pows, unevaluate
+    lower_index_derivatives, lower_sparse_ops, minimize_symbols, optimize_pows, unevaluate
 )
 from devito.symbolics import estimate_cost, subs_op_args
 from devito.tools import (
@@ -192,7 +192,10 @@ def _check_kwargs(cls, **kwargs):
 
     @classmethod
     def _sanitize_exprs(cls, expressions, **kwargs):
-        expressions = as_tuple(expressions)
+        # Flatten so callers can mix nested lists (e.g. ``free_surface``
+        # returning a list, or multi-field injections returning one
+        # ``SparseEq`` per field) without having to manually unpack them.
+        expressions = tuple(flatten(as_tuple(expressions)))
 
         for i in expressions:
             if not isinstance(i, Evaluable):
@@ -362,12 +365,9 @@ def _lower_exprs(cls, expressions, **kwargs):
 
         # "True" lowering (indexification, shifting, ...)
         expressions = lower_exprs(expressions, **kwargs)
-
-        # Turn user-defined SubDimensions into concrete SubDimensions,
-        # in particular uniqueness across expressions is ensured
         expressions = concretize_subdims(expressions, **kwargs)
 
-        processed = [LoweredEq(i) for i in expressions]
+        processed = [lower_eq(i) for i in expressions]
 
         return processed
 
@@ -490,8 +490,12 @@ def _lower_iet(cls, uiet, **kwargs):
         parameters = derive_parameters(uiet, True)
         iet = EntryFunction(name, uiet, 'int', parameters, ())
 
-        # Lower IET to a target-specific IET
+        # Materialise the sparse-op iteration nests into ElementalFunctions
+        # before target specialisation, so the position/coefficient temps
+        # the IET pass emits are visible to downstream passes (e.g. the
+        # data-transfer placement on device).
         graph = Graph(iet, **kwargs)
+        lower_sparse_ops(graph, **kwargs)
         graph = cls._specialize_iet(graph, **kwargs)
 
         # Instrument the IET for C-level profiling
diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py
index 2a8c78e7fc..541840602c 100644
--- a/devito/passes/clusters/buffering.py
+++ b/devito/passes/clusters/buffering.py
@@ -500,6 +500,9 @@ def key(c):
                 raise CompilationError("Unsupported `buffering` over different "
                                        "IterationSpaces")
 
+        if not ispaces:
+            return IterationSpace(Interval(self.dim))
+
         return ispaces.pop()
 
     @cached_property
diff --git a/devito/passes/iet/__init__.py b/devito/passes/iet/__init__.py
index 1cdb97c794..56f2106795 100644
--- a/devito/passes/iet/__init__.py
+++ b/devito/passes/iet/__init__.py
@@ -1,5 +1,6 @@
 from .engine import *  # noqa
 from .misc import *  # noqa
+from .sparse import *  # noqa
 from .orchestration import *  # noqa
 from .mpi import *  # noqa
 from .definitions import *  # noqa
diff --git a/devito/passes/iet/engine.py b/devito/passes/iet/engine.py
index 9b936fba76..d4cb2d28e8 100644
--- a/devito/passes/iet/engine.py
+++ b/devito/passes/iet/engine.py
@@ -81,16 +81,29 @@ def __init__(self, iet, options=None, sregistry=None, **kwargs):
         writes = FindSymbols('writes').visit(iet)
         self.writes_input = frozenset(f for f in writes if f.is_Input)
 
-        # All symbols requiring host-device data transfers when running
-        # on device
-        self.data_movs = rmovs, wmovs = set(), set()
-        gpu_fit = (options or {}).get('gpu-fit', ())
-        for i in FindNodes(ExprStmt).visit(iet):
-            wmovs.update({w for w in i.writes
-                          if needs_transfer(w, gpu_fit)})
-        for i in FindNodes(ExprStmt).visit(iet):
-            rmovs.update({f for f in i.functions
-                          if needs_transfer(f, gpu_fit) and f not in wmovs})
+        self._gpu_fit = (options or {}).get('gpu-fit', ())
+
+    @property
+    def data_movs(self):
+        """
+        ``(reads, writes)`` of symbols requiring host-device data transfers.
+
+        Recomputed on access from the current state of the Graph (root
+        plus every reachable efunc) so passes that introduce ExprStmts
+        after construction — e.g. ``lower_sparse_ops`` materialising the
+        sparse-op position temps inside an efunc — are reflected.
+        """
+        rmovs, wmovs = set(), set()
+        for efunc in self.efuncs.values():
+            for i in FindNodes(ExprStmt).visit(efunc):
+                wmovs.update(w for w in i.writes
+                             if needs_transfer(w, self._gpu_fit))
+        for efunc in self.efuncs.values():
+            for i in FindNodes(ExprStmt).visit(efunc):
+                rmovs.update(f for f in i.functions
+                             if needs_transfer(f, self._gpu_fit)
+                             and f not in wmovs)
+        return rmovs, wmovs
 
     @property
     def root(self):
diff --git a/devito/passes/iet/misc.py b/devito/passes/iet/misc.py
index 0a631bf3a2..68fff9da95 100644
--- a/devito/passes/iet/misc.py
+++ b/devito/passes/iet/misc.py
@@ -46,7 +46,10 @@ def avoid_denormals(iet, platform=None, **kwargs):
     except AttributeError:
         return iet, {}
 
-    if iet.is_ElementalFunction:
+    # Denormals only need flushing once at the program entry; sibling
+    # Callables (e.g. ElementalFunctions emitted by lower_sparse_ops)
+    # inherit the SSE state from the caller
+    if not isinstance(iet, EntryFunction):
         return iet, {}
 
     header = (cgen.Comment('Flush denormal numbers to zero in hardware'),
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index d5752fec52..d0d9a9afd1 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -130,6 +130,16 @@ def _make_simd(self, iet):
                 if any(i.is_Indexed for i in reductions):
                     continue
 
+            # A scalar reduction in the loop body (e.g. ``acc += ...`` with
+            # ``acc`` a Symbol, as emitted by the interpolation accumulator
+            # pattern) is a cross-iteration dependence the SIMD pragma alone
+            # can't express -- it would need a ``reduction(+:acc)`` clause,
+            # which we don't emit here. Without it, ``_Complex`` accumulators
+            # are miscompiled by some gcc releases.
+            exprs = FindNodes(Expression).visit(candidate)
+            if any(e.is_reduction and not e.output.is_Indexed for e in exprs):
+                continue
+
             # Add SIMD pragma
             simd = self._make_simd_pragma(candidate)
             pragmas = candidate.pragmas + simd
diff --git a/devito/passes/iet/sparse.py b/devito/passes/iet/sparse.py
new file mode 100644
index 0000000000..5556240559
--- /dev/null
+++ b/devito/passes/iet/sparse.py
@@ -0,0 +1,256 @@
+"""
+IET pass that lowers sparse-operation expressions (Interpolation/Injection)
+into ElementalFunctions and replaces the iteration nest produced by the
+cluster pipeline with a Call to the resulting efunc.
+
+By the time this pass runs, the cluster pipeline has scheduled each
+sparse op into a regular ``(.., p_sf, rd_x, rd_y, ..)`` iteration nest
+with a single inner ``Expression`` carrying the synthetic
+``LoweredSparseEq``. The pass:
+
+* finds the outermost ``Iteration`` whose Dimension belongs to the
+  sparse op (the SparseFunction's sparse Dimension or a radius
+  Dimension);
+* rewrites the body of the sparse Dimension's ``Iteration`` so the
+  position/coefficient temporaries (``posx``, ``px``, ...) are
+  computed once per sparse point, above the radius loops;
+* for an interpolation, wraps the inner Expression with the scalar
+  accumulator pattern (``acc = 0``; ``acc += weights * rhs`` inside
+  the radius loops; ``sf[p_sf] = acc`` afterwards);
+* for an injection, leaves the inner Expression as the existing
+  weighted ``Inc(field[pos+rd], weights * rhs)``;
+* wraps the resulting sub-tree in an ``ElementalFunction`` and
+  replaces it in the parent IET with a ``Call``.
+"""
+
+from collections import OrderedDict
+
+from devito.ir.equations import DummyEq
+from devito.ir.equations.algorithms import lower_exprs
+from devito.ir.iet import (
+    Call, EntryFunction, Expression, FindNodes, HaloSpot, Increment, Iteration, List,
+    Transformer, make_callable
+)
+from devito.passes.iet.engine import iet_pass
+from devito.types import Eq, InjectionMixin, InterpolationMixin, Symbol
+
+__all__ = ['lower_sparse_ops']
+
+
+@iet_pass
+def lower_sparse_ops(iet, sregistry=None, **kwargs):
+    """
+    Replace each sparse-op iteration nest in the IET with a Call to an
+    ElementalFunction that materialises the position temporaries and
+    the inner accumulator/increment pattern.
+    """
+    if not isinstance(iet, EntryFunction):
+        return iet, {}
+
+    # The "head" of a sparse op in the IET is the unique Expression
+    # whose lhs is the SparseFunction (interpolation) or the target
+    # field (injection); any auxiliary temporary expressions extracted
+    # from the original SparseEq (e.g. by ``factorize``/``cse``) are
+    # left where they are inside the radius nest.
+    sparse_exprs = [e for e in FindNodes(Expression).visit(iet)
+                    if e.expr.is_SparseOperation
+                    and type(e.expr).is_head_eq(e.expr,
+                                                e.expr.interpolator.sfunction)]
+    if not sparse_exprs:
+        return iet, {}
+
+    # Group head Expressions by their enclosing outer (sparse-Dimension)
+    # Iteration; all such Expressions inside the same outer nest share
+    # one efunc.
+    groups = OrderedDict()
+    for expr in sparse_exprs:
+        nest = _find_outer_iteration(iet, expr)
+        if nest is None:
+            continue
+        groups.setdefault(nest, []).append(expr)
+
+    # ``lower_sparse_ops`` runs before ``optimize_halospots``, so the
+    # halo-exchange optimiser hasn't yet had a chance to drop the
+    # reduction-only halo entries that the IR scheduler put around an
+    # injection nest (e.g. an entry for ``u`` at ``loc_indices={time:
+    # time+1}`` wrapping ``u[time+1] += ...``). Once the nest becomes a
+    # Call those expressions are no longer visible to
+    # ``_drop_reduction_halospots``, so we shed those entries here -- and
+    # if that empties the HaloSpot, replace it whole so the MPI overlap
+    # machinery doesn't wrap our Call with stale dynamic-args plumbing.
+    parents = {nest: _enclosing_halospot(iet, nest) for nest in groups}
+
+    mapper = {}
+    efuncs = []
+    for nest, exprs in groups.items():
+        new_nest = _materialise_nest(nest, exprs)
+
+        lse = exprs[0].expr
+        prefix = f'{lse.efunc_prefix}_{lse.interpolator.sfunction.name}'
+        efunc = make_callable(sregistry.make_name(prefix=prefix), new_nest)
+        efuncs.append(efunc)
+
+        call = Call(efunc.name, list(efunc.parameters))
+        parent = parents[nest]
+        if parent is None:
+            mapper[nest] = call
+            continue
+
+        # Drop fields that the (now-opaque) Call only writes/increments,
+        # since the wrapping HaloSpot's purpose was to ensure read-side
+        # coherency for them and the read no longer exists at the IET
+        # level. Interpolation reads its target field, so its entries
+        # stay.
+        reduced = {e.expr.lhs.function for e in exprs
+                   if isinstance(e.expr, InjectionMixin)}
+        hs = parent.halo_scheme.drop(reduced) if reduced else parent.halo_scheme
+        if hs.is_void:
+            mapper[parent] = call
+        elif hs is parent.halo_scheme:
+            mapper[nest] = call
+        else:
+            mapper[parent] = parent._rebuild(halo_scheme=hs, body=call)
+
+    if not mapper:
+        return iet, {}
+
+    return Transformer(mapper).visit(iet), {'efuncs': efuncs}
+
+
+def _find_outer_iteration(iet, expr):
+    """
+    Walk up the IET from ``expr`` and return the outermost Iteration
+    whose ``dim.root`` is the SparseFunction's sparse Dimension.
+    """
+    sparse_dim = expr.expr.interpolator.sfunction._sparse_dim
+    for it in FindNodes(Iteration).visit(iet):
+        if it.dim.root is sparse_dim and expr in FindNodes(Expression).visit(it):
+            return it
+    return None
+
+
+def _enclosing_halospot(iet, nest):
+    """
+    Return the HaloSpot directly wrapping ``nest``, if any.
+    """
+    for hs in FindNodes(HaloSpot).visit(iet):
+        if nest in FindNodes(Iteration).visit(hs):
+            return hs
+    return None
+
+
+def _materialise_nest(nest, exprs):
+    """
+    Rewrite the sparse Dimension's Iteration body to compute the
+    position/coefficient temps once per sparse point, then for any
+    interpolation Expression wrap it with the scalar accumulator
+    pattern. Multiple sparse-op Expressions sharing the same outer
+    Iteration are materialised in one pass and reuse the same temps.
+    """
+    # Position + coefficient temporaries as IET Expressions. These are
+    # the same for every Expression in the group, so we emit them once.
+    # The sample's leaf class (Interpolation/Injection) drives whether
+    # the temps carry staggering shifts.
+    sample = exprs[0].expr
+    temp_exprs = tuple(Expression(DummyEq(e.lhs, e.rhs))
+                       for e in lower_exprs(sample.sparse_temps()))
+
+    # The radius nest is what runs once per sparse point. For each
+    # interpolation Expression in the group, build its
+    # accumulator-wrapped copy of the radius nest. Injection Exprs
+    # share a single copy of the radius nest (their ``Inc`` already
+    # carries the right ``weights * rhs`` form).
+    inner = nest.nodes[0] if len(nest.nodes) == 1 else List(body=nest.nodes)
+    interp_exprs = [e for e in exprs if isinstance(e.expr, InterpolationMixin)]
+    inject_exprs = [e for e in exprs if isinstance(e.expr, InjectionMixin)]
+
+    body = []
+    for expr in interp_exprs:
+        siblings = [e for e in exprs if e is not expr]
+        body.append(_interp_inner_block(inner, expr, expr.expr.interpolator, siblings))
+    if inject_exprs:
+        drop = {e: None for e in interp_exprs}
+        body.append(Transformer(drop, nested=True).visit(inner) if drop else inner)
+
+    return nest._rebuild(nodes=temp_exprs + tuple(body))
+
+
+def _interp_inner_block(inner, expr, interp, siblings):
+    """
+    Build the accumulator/radius/write-back triple for an interpolation:
+
+        ``Eq(acc, 0)``
+        radius_nest with ``Inc(acc, weights * rhs)`` in place of expr
+        ``Eq(sf[p], acc)``     # ``Inc`` when the SparseEq is a SparseInc
+
+    ``siblings`` are sparse-op Expression nodes in the same radius nest
+    (e.g. a second interpolation or an injection sharing the outer
+    sparse Iteration) that must be removed from this copy of the nest
+    so the accumulator pattern wraps only ``expr``'s increment.
+
+    Any Iteration between the outer sparse Dimension and the radius
+    Dimensions (e.g. an extra non-grid Dimension on the SparseFunction)
+    is preserved as an enclosing loop around the accumulator pattern,
+    so the write-back ``sf[..., d, p_*]`` sees a fresh accumulator per
+    iteration of ``d``.
+    """
+    eq = expr.expr
+    sf_lhs = eq.lhs
+    rhs = eq.rhs
+
+    acc = Symbol(name=f'sum{interp.sfunction.name}', dtype=sf_lhs.dtype)
+
+    # ``rhs`` is already indexified; only the weights need lowering
+    # (e.g. coefficient Functions are indexed by the radius dim).
+    # Pull the concretized radius ConditionalDimensions from ``rhs``
+    # so the weights inherit the same (renamed) ``cond`` Thicknesses
+    # and don't leak duplicate ``x_ltkn``/``x_rtkn`` parameters into
+    # the efunc signature.
+    rdims_concrete = {d.name: d for d in rhs.free_symbols
+                      if getattr(d, 'is_Conditional', False)}
+    weights = interp._weights()
+    if rdims_concrete:
+        weights = weights.xreplace({
+            rd: rdims_concrete[rd.name]
+            for rd in weights.free_symbols
+            if getattr(rd, 'is_Conditional', False) and rd.name in rdims_concrete
+        })
+    weighted_rhs = lower_exprs(Eq(acc, weights)).rhs * rhs
+
+    init = Expression(DummyEq(acc, 0))
+    inc = Increment(DummyEq(acc, weighted_rhs))
+    # Honour the synthetic Eq's flavour: a SparseInc means the user
+    # asked for ``sf[p_*] += sum`` (interpolation with ``increment=True``);
+    # otherwise the standard write is ``sf[p_*] = sum``.
+    write_back_cls = Increment if eq.is_Reduction else Expression
+    write_back = write_back_cls(DummyEq(sf_lhs, acc))
+
+    # Single substitution: drop siblings, replace ``expr`` with ``inc``.
+    mapper = {expr: inc}
+    mapper.update({s: None for s in siblings})
+
+    radius_root = _find_radius_root(inner, interp.sfunction)
+    if radius_root is None or radius_root is inner:
+        return List(body=(init,
+                          Transformer(mapper, nested=True).visit(inner),
+                          write_back))
+
+    # Wrap the accumulator pattern around just the radius sub-tree,
+    # leaving the enclosing non-radius Iterations in place.
+    wrapped = List(body=(init, Transformer(mapper, nested=True).visit(radius_root),
+                         write_back))
+    return Transformer({radius_root: wrapped}, nested=True).visit(inner)
+
+
+def _find_radius_root(inner, sfunction):
+    """
+    Return the outermost Iteration in ``inner`` over a radius
+    CustomDimension of ``sfunction``, or None if no such Iteration is
+    found. Radius Dimensions are named ``r<sparse_dim_name><dim_name>``
+    (see ``AbstractSparseFunction._crdim``).
+    """
+    prefix = f'r{sfunction._sparse_dim.name}'
+    for it in FindNodes(Iteration).visit(inner):
+        if it.dim.name.startswith(prefix):
+            return it
+    return None
diff --git a/devito/symbolics/extended_sympy.py b/devito/symbolics/extended_sympy.py
index 4523fbd2f4..9ea3128753 100644
--- a/devito/symbolics/extended_sympy.py
+++ b/devito/symbolics/extended_sympy.py
@@ -380,6 +380,8 @@ class UnaryOp(Expr, Pickable, BasicWrapperMixin):
 
     _op = ''
 
+    is_commutative = True
+
     __rargs__ = ('base',)
 
     def __new__(cls, base, **kwargs):
diff --git a/devito/symbolics/inspection.py b/devito/symbolics/inspection.py
index e19cd89ad1..024791a55b 100644
--- a/devito/symbolics/inspection.py
+++ b/devito/symbolics/inspection.py
@@ -321,6 +321,14 @@ def sympy_dtype(expr, base=None, default=None, smin=None):
         with suppress(AttributeError):
             dtypes.add(i.dtype)
 
+    # A ``Cast`` overrides the dtype of the symbol(s) it wraps -- e.g.
+    # ``DOUBLE(o_x)`` is observably double-typed even though ``o_x``
+    # itself is float-typed. Without this, the C printer would pick
+    # float-precision literals (``1.0F``) when emitting a ``Pow(DOUBLE(h),
+    # -1)`` inside a wider expression.
+    for c in expr.atoms(Cast):
+        dtypes.add(c.dtype)
+
     if not dtypes or not np.issubdtype(base, np.complexfloating):
         dtypes.update({base} - {None})
 
diff --git a/devito/types/dense.py b/devito/types/dense.py
index a1dc95d75c..cc65d2c194 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -1633,8 +1633,11 @@ def _arg_values(self, **kwargs):
             return self._arg_defaults(alias=self)
 
     def _arg_apply(self, *args, **kwargs):
+        # Parent-owned SubFunction data is computed once and read by the
+        # Operator; the parent gathers its own data via its own parameter
+        # entry, so there's nothing for the SubFunction to do here.
         if self._parent is not None:
-            return self._parent._arg_apply(*args, **kwargs)
+            return
         return super()._arg_apply(*args, **kwargs)
 
     @property
diff --git a/devito/types/equation.py b/devito/types/equation.py
index 5cbba42e89..c0723f2509 100644
--- a/devito/types/equation.py
+++ b/devito/types/equation.py
@@ -4,10 +4,12 @@
 import sympy
 
 from devito.deprecations import deprecations
-from devito.tools import Pickable, as_tuple, frozendict
+from devito.tools import Pickable, as_tuple, flatten, frozendict
 from devito.types.lazy import Evaluable
 
-__all__ = ['Eq', 'Inc', 'ReduceMax', 'ReduceMin', 'ReduceMinMax']
+__all__ = ['Eq', 'Inc', 'IncrInterpolation', 'Injection', 'InjectionMixin',
+           'Interpolation', 'InterpolationMixin', 'ReduceMax', 'ReduceMin',
+           'ReduceMinMax', 'SparseEq', 'SparseInc', 'SparseOpMixin']
 
 
 class Eq(sympy.Eq, Evaluable, Pickable):
@@ -58,6 +60,7 @@ class Eq(sympy.Eq, Evaluable, Pickable):
     """
 
     is_Reduction = False
+    is_SparseOperation = False
 
     __rargs__ = ('lhs', 'rhs')
     __rkwargs__ = ('subdomain', 'coefficients', 'implicit_dims')
@@ -257,3 +260,169 @@ def __new__(cls, lhs, rhs=0, **kwargs):
             )
 
         return super().__new__(cls, lhs, rhs=rhs, **kwargs)
+
+
+class SparseOpMixin:
+
+    """
+    Tagging mixin shared by every sparse-grid operation Eq (whether at
+    the user, IR, or cluster layer). Carries the ``interpolator``
+    metadata needed by the IET pass ``lower_sparse_ops`` to materialise
+    the radius nest into an ElementalFunction; per-operation behaviour
+    (interpolation vs. injection) lives on the leaf classes
+    ``Interpolation`` / ``IncrInterpolation`` / ``Injection`` via
+    ``InterpolationMixin`` / ``InjectionMixin``.
+    """
+
+    is_SparseOperation = True
+
+    @property
+    def interpolator(self):
+        return self._interpolator
+
+
+class InterpolationMixin:
+
+    """
+    Polymorphic behaviour shared by all sparse-op Eqs representing an
+    interpolation ``sf[..., p_*] <- expr[..., rp_*]`` (the default
+    ``Eq`` flavour as ``Interpolation`` and the ``+=`` flavour as
+    ``IncrInterpolation``).
+    """
+
+    efunc_prefix = 'interpolate'
+
+    @property
+    def field(self):
+        # An interpolation writes into the SparseFunction, not into a
+        # grid field, so there is no target field to drive staggering.
+        return None
+
+    @classmethod
+    def is_head_eq(cls, eq, sfunction):
+        # The "head" of an interpolation in the IET is the unique
+        # Expression whose lhs is the SparseFunction.
+        return eq.lhs.function is sfunction
+
+    def sparse_temps(self):
+        """
+        Position/coefficient temps to be emitted alongside the radius
+        nest by the IET pass.
+        """
+        return self.interpolator.sparse_temps(self.rhs, self.implicit_dims)
+
+
+class InjectionMixin:
+
+    """
+    Polymorphic behaviour shared by all sparse-op Eqs representing an
+    injection ``Inc(field[..., pos+rd, ...], weights * rhs)``.
+    """
+
+    efunc_prefix = 'inject'
+
+    @property
+    def field(self):
+        # An injection writes into a grid field — exposed as ``lhs.function``.
+        return self.lhs.function
+
+    @classmethod
+    def is_head_eq(cls, eq, sfunction):
+        # The "head" of an injection in the IET is the unique
+        # Expression whose lhs is a (non-sparse) DiscreteFunction.
+        f = eq.lhs.function
+        return f.is_DiscreteFunction and f is not sfunction
+
+    def sparse_temps(self):
+        """
+        Position/coefficient temps to be emitted alongside the radius
+        nest by the IET pass. The target field drives the per-Dimension
+        shifts so the temps' lhs (``pos*`` symbols) match the rhs of a
+        staggered injection.
+        """
+        return self.interpolator.sparse_temps(self.rhs, self.implicit_dims,
+                                              field=self.field)
+
+
+class SparseEq(SparseOpMixin, Eq):
+
+    """
+    An Eq representing a sparse-grid operation. ``SparseEq`` is the
+    abstract base; instantiate ``Interpolation`` (``sf <- expr``),
+    ``IncrInterpolation`` (``sf += expr``), or ``Injection``
+    (``Inc(field, weights * expr)``) instead.
+
+    The single synthetic Eq uses the SparseFunction's radius
+    ConditionalDimensions ``rp_*`` (whose parent is the grid Dimension)
+    as indices into the user expression, so the cluster scheduler
+    derives a regular IterationSpace including the radius loops. The
+    cluster pipeline (buffering, halo, snapshotting, ...) treats it
+    like any other Eq; the IET pass ``lower_sparse_ops`` extracts the
+    resulting ``p_*, rp_*`` nest and wraps it in an ElementalFunction,
+    inserting the position/weight temps that drive the radius access.
+    """
+
+    __rkwargs__ = Eq.__rkwargs__ + ('interpolator',)
+
+    def __new__(cls, lhs, rhs=0, *, interpolator=None,
+                implicit_dims=None, **kwargs):
+        obj = super().__new__(cls, lhs, rhs=rhs, implicit_dims=implicit_dims,
+                              **kwargs)
+        obj._interpolator = interpolator
+        return obj
+
+    def __add__(self, other):
+        # Allow ``list_of_eqs + sparse_eq`` and ``sparse_eq + sparse_eq``
+        # to produce a flat list — handy when composing user expression
+        # bundles passed to ``Operator(...)``.
+        return flatten([self, other])
+
+    def __radd__(self, other):
+        return flatten([other, self])
+
+    def __repr__(self):
+        sf = self._interpolator.sfunction
+        return f"{type(self).__name__}({sf.name})"
+
+    __str__ = __repr__
+
+
+class SparseInc(SparseEq, Inc):
+
+    """
+    The ``Inc`` flavour of ``SparseEq``. The ``+=`` semantics is
+    required when contributions from multiple sparse points accumulate
+    into the same target cell (injection) or when the user asks for
+    ``sf += interp(expr)``.
+    """
+
+    pass
+
+
+class Interpolation(InterpolationMixin, SparseEq):
+
+    """
+    A standard interpolation ``Eq(sf[..., p_*], expr[..., rp_*])``.
+    """
+
+    pass
+
+
+class IncrInterpolation(InterpolationMixin, SparseInc):
+
+    """
+    An interpolation with ``+=`` semantics: ``Inc(sf[..., p_*],
+    expr[..., rp_*])``. Produced by ``interpolate(..., increment=True)``.
+    """
+
+    pass
+
+
+class Injection(InjectionMixin, SparseInc):
+
+    """
+    An injection ``Inc(field[..., pos+rd, ...], weights(rd_*) *
+    expr[..., rd_*])``.
+    """
+
+    pass
diff --git a/devito/types/relational.py b/devito/types/relational.py
index 731ec29bc7..10b4b861fd 100644
--- a/devito/types/relational.py
+++ b/devito/types/relational.py
@@ -37,7 +37,7 @@ class Le(AbstractRel, sympy.Le):
     rhs : expr-like, optional
         The right-hand side. Defaults to 0.
     subdomain : SubDomain, optional
-        To restrict the evalaution of the relation to a particular sub-region in the
+        To restrict the evaluation of the relation to a particular sub-region in the
         computational domain.
 
     Examples
@@ -72,7 +72,7 @@ class Lt(AbstractRel, sympy.Lt):
     rhs : expr-like, optional
         The right-hand side. Defaults to 0.
     subdomain : SubDomain, optional
-        To restrict the evalaution of the relation to a particular sub-region in the
+        To restrict the evaluation of the relation to a particular sub-region in the
         computational domain.
 
     Examples
@@ -107,7 +107,7 @@ class Ge(AbstractRel, sympy.Ge):
     rhs : expr-like, optional
         The right-hand side. Defaults to 0.
     subdomain : SubDomain, optional
-        To restrict the evalaution of the relation to a particular sub-region in the
+        To restrict the evaluation of the relation to a particular sub-region in the
         computational domain.
 
     Examples
@@ -142,7 +142,7 @@ class Gt(AbstractRel, sympy.Gt):
     rhs : expr-like, optional
         The right-hand side. Defaults to 0.
     subdomain : SubDomain, optional
-        To restrict the evalaution of the relation to a particular sub-region in the
+        To restrict the evaluation of the relation to a particular sub-region in the
         computational domain.
 
     Examples
@@ -177,7 +177,7 @@ class Ne(AbstractRel, sympy.Ne):
     rhs : expr-like, optional
         The right-hand side. Defaults to 0.
     subdomain : SubDomain, optional
-        To restrict the evalaution of the relation to a particular sub-region in the
+        To restrict the evaluation of the relation to a particular sub-region in the
         computational domain.
 
     Examples
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index e7b528b370..502a7c75d4 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -375,7 +375,7 @@ def _pos_symbols(self, shifts=None):
                 symbols.append(Symbol(name=f'pos{d}_s1', dtype=np.int32))
             else:
                 symbols.append(Symbol(name=f'pos{d}', dtype=np.int32))
-        return symbols
+        return DimensionTuple(*symbols, getters=self.grid.dimensions)
 
     @cached_property
     def _point_increments(self):
diff --git a/examples/mpi/overview.ipynb b/examples/mpi/overview.ipynb
index b46f49aaf4..99f48c8992 100644
--- a/examples/mpi/overview.ipynb
+++ b/examples/mpi/overview.ipynb
@@ -43,7 +43,14 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:49.297503Z",
+     "iopub.status.busy": "2026-06-02T03:41:49.296909Z",
+     "iopub.status.idle": "2026-06-02T03:41:49.431157Z",
+     "shell.execute_reply": "2026-06-02T03:41:49.429709Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import ipyparallel as ipp\n",
@@ -60,7 +67,14 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:49.436124Z",
+     "iopub.status.busy": "2026-06-02T03:41:49.435765Z",
+     "iopub.status.idle": "2026-06-02T03:41:49.469987Z",
+     "shell.execute_reply": "2026-06-02T03:41:49.468314Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -102,7 +116,14 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:49.523096Z",
+     "iopub.status.busy": "2026-06-02T03:41:49.522469Z",
+     "iopub.status.idle": "2026-06-02T03:41:50.443944Z",
+     "shell.execute_reply": "2026-06-02T03:41:50.443000Z"
+    }
+   },
    "outputs": [],
    "source": [
     "%%px\n",
@@ -116,7 +137,14 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:50.449058Z",
+     "iopub.status.busy": "2026-06-02T03:41:50.448497Z",
+     "iopub.status.idle": "2026-06-02T03:41:50.477536Z",
+     "shell.execute_reply": "2026-06-02T03:41:50.475960Z"
+    }
+   },
    "outputs": [],
    "source": [
     "%%px\n",
@@ -139,7 +167,14 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:50.482877Z",
+     "iopub.status.busy": "2026-06-02T03:41:50.482293Z",
+     "iopub.status.idle": "2026-06-02T03:41:50.525021Z",
+     "shell.execute_reply": "2026-06-02T03:41:50.523488Z"
+    }
+   },
    "outputs": [],
    "source": [
     "%%px\n",
@@ -158,7 +193,14 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:50.530808Z",
+     "iopub.status.busy": "2026-06-02T03:41:50.530184Z",
+     "iopub.status.idle": "2026-06-02T03:41:50.579266Z",
+     "shell.execute_reply": "2026-06-02T03:41:50.577823Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -194,7 +236,14 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:50.584171Z",
+     "iopub.status.busy": "2026-06-02T03:41:50.583721Z",
+     "iopub.status.idle": "2026-06-02T03:41:50.611914Z",
+     "shell.execute_reply": "2026-06-02T03:41:50.610298Z"
+    }
+   },
    "outputs": [],
    "source": [
     "%%px\n",
@@ -204,7 +253,14 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:50.617117Z",
+     "iopub.status.busy": "2026-06-02T03:41:50.616516Z",
+     "iopub.status.idle": "2026-06-02T03:41:50.650665Z",
+     "shell.execute_reply": "2026-06-02T03:41:50.649049Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -242,7 +298,14 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:50.656905Z",
+     "iopub.status.busy": "2026-06-02T03:41:50.656300Z",
+     "iopub.status.idle": "2026-06-02T03:41:50.902948Z",
+     "shell.execute_reply": "2026-06-02T03:41:50.902127Z"
+    }
+   },
    "outputs": [
     {
      "name": "stderr",
@@ -271,7 +334,14 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:50.907199Z",
+     "iopub.status.busy": "2026-06-02T03:41:50.907026Z",
+     "iopub.status.idle": "2026-06-02T03:41:50.932807Z",
+     "shell.execute_reply": "2026-06-02T03:41:50.931899Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -308,6 +378,12 @@
    "cell_type": "code",
    "execution_count": 11,
    "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:50.936857Z",
+     "iopub.status.busy": "2026-06-02T03:41:50.936619Z",
+     "iopub.status.idle": "2026-06-02T03:41:50.952864Z",
+     "shell.execute_reply": "2026-06-02T03:41:50.951507Z"
+    },
     "scrolled": true
    },
    "outputs": [
@@ -392,7 +468,14 @@
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:50.957600Z",
+     "iopub.status.busy": "2026-06-02T03:41:50.957171Z",
+     "iopub.status.idle": "2026-06-02T03:41:52.287455Z",
+     "shell.execute_reply": "2026-06-02T03:41:52.286319Z"
+    }
+   },
    "outputs": [],
    "source": [
     "%%px\n",
@@ -403,6 +486,12 @@
    "cell_type": "code",
    "execution_count": 13,
    "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:52.291426Z",
+     "iopub.status.busy": "2026-06-02T03:41:52.291239Z",
+     "iopub.status.idle": "2026-06-02T03:41:52.337913Z",
+     "shell.execute_reply": "2026-06-02T03:41:52.337101Z"
+    },
     "scrolled": true
    },
    "outputs": [
@@ -486,9 +575,9 @@
        "  MPI_Request rsend;\n",
        "\n",
        "  float *restrict bufg_vec __attribute__ ((aligned (64)));\n",
-       "  posix_memalign((void**)(&bufg_vec),64,sizeof(float)*(long)y_size*(long)x_size);\n",
+       "  posix_memalign((void**)(&bufg_vec),64,sizeof(float)*(long)x_size*(long)y_size);\n",
        "  float *restrict bufs_vec __attribute__ ((aligned (64)));\n",
-       "  posix_memalign((void**)(&bufs_vec),64,sizeof(float)*(long)y_size*(long)x_size);\n",
+       "  posix_memalign((void**)(&bufs_vec),64,sizeof(float)*(long)x_size*(long)y_size);\n",
        "\n",
        "  MPI_Irecv(bufs_vec,x_size*y_size,MPI_FLOAT,fromrank,13,comm,&rrecv);\n",
        "  if (torank != MPI_PROC_NULL)\n",
@@ -587,7 +676,14 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:52.342064Z",
+     "iopub.status.busy": "2026-06-02T03:41:52.341878Z",
+     "iopub.status.idle": "2026-06-02T03:41:52.372338Z",
+     "shell.execute_reply": "2026-06-02T03:41:52.370957Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -635,7 +731,14 @@
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:52.377304Z",
+     "iopub.status.busy": "2026-06-02T03:41:52.376753Z",
+     "iopub.status.idle": "2026-06-02T03:41:52.412522Z",
+     "shell.execute_reply": "2026-06-02T03:41:52.410972Z"
+    }
+   },
    "outputs": [],
    "source": [
     "%%px\n",
@@ -645,7 +748,14 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:52.417696Z",
+     "iopub.status.busy": "2026-06-02T03:41:52.417117Z",
+     "iopub.status.idle": "2026-06-02T03:41:52.456949Z",
+     "shell.execute_reply": "2026-06-02T03:41:52.455345Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -714,7 +824,14 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:52.462118Z",
+     "iopub.status.busy": "2026-06-02T03:41:52.461590Z",
+     "iopub.status.idle": "2026-06-02T03:41:52.537246Z",
+     "shell.execute_reply": "2026-06-02T03:41:52.535705Z"
+    }
+   },
    "outputs": [],
    "source": [
     "%%px\n",
@@ -783,7 +900,14 @@
   {
    "cell_type": "code",
    "execution_count": 18,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:52.542566Z",
+     "iopub.status.busy": "2026-06-02T03:41:52.542019Z",
+     "iopub.status.idle": "2026-06-02T03:41:53.701261Z",
+     "shell.execute_reply": "2026-06-02T03:41:53.699673Z"
+    }
+   },
    "outputs": [
     {
      "name": "stderr",
@@ -806,7 +930,14 @@
   {
    "cell_type": "code",
    "execution_count": 19,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:53.706501Z",
+     "iopub.status.busy": "2026-06-02T03:41:53.705917Z",
+     "iopub.status.idle": "2026-06-02T03:41:53.741608Z",
+     "shell.execute_reply": "2026-06-02T03:41:53.740235Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -872,7 +1003,14 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:53.746907Z",
+     "iopub.status.busy": "2026-06-02T03:41:53.746360Z",
+     "iopub.status.idle": "2026-06-02T03:41:55.067972Z",
+     "shell.execute_reply": "2026-06-02T03:41:55.066355Z"
+    }
+   },
    "outputs": [],
    "source": [
     "%%px\n",
@@ -907,6 +1045,12 @@
    "cell_type": "code",
    "execution_count": 21,
    "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-02T03:41:55.073415Z",
+     "iopub.status.busy": "2026-06-02T03:41:55.072866Z",
+     "iopub.status.idle": "2026-06-02T03:41:56.689552Z",
+     "shell.execute_reply": "2026-06-02T03:41:56.687949Z"
+    },
     "scrolled": true
    },
    "outputs": [],
@@ -957,7 +1101,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/examples/performance/00_overview.ipynb b/examples/performance/00_overview.ipynb
index 72f1836aa4..694d47f2fa 100644
--- a/examples/performance/00_overview.ipynb
+++ b/examples/performance/00_overview.ipynb
@@ -121,7 +121,14 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:42.567152Z",
+     "iopub.status.busy": "2026-06-01T16:28:42.566586Z",
+     "iopub.status.idle": "2026-06-01T16:28:42.583202Z",
+     "shell.execute_reply": "2026-06-01T16:28:42.581749Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from examples.performance import unidiff_output, print_kernel"
@@ -137,7 +144,14 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:42.588731Z",
+     "iopub.status.busy": "2026-06-01T16:28:42.588131Z",
+     "iopub.status.idle": "2026-06-01T16:28:43.307056Z",
+     "shell.execute_reply": "2026-06-01T16:28:43.306474Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from devito import configuration\n",
@@ -158,7 +172,14 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:43.311646Z",
+     "iopub.status.busy": "2026-06-01T16:28:43.311378Z",
+     "iopub.status.idle": "2026-06-01T16:28:43.355233Z",
+     "shell.execute_reply": "2026-06-01T16:28:43.353765Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from devito import Eq, Grid, Operator, Function, TimeFunction, sin\n",
@@ -190,7 +211,14 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:43.359825Z",
+     "iopub.status.busy": "2026-06-01T16:28:43.359631Z",
+     "iopub.status.idle": "2026-06-01T16:28:43.695843Z",
+     "shell.execute_reply": "2026-06-01T16:28:43.694268Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -265,7 +293,14 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:43.751007Z",
+     "iopub.status.busy": "2026-06-01T16:28:43.750383Z",
+     "iopub.status.idle": "2026-06-01T16:28:43.925853Z",
+     "shell.execute_reply": "2026-06-01T16:28:43.924714Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -319,7 +354,14 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:43.930084Z",
+     "iopub.status.busy": "2026-06-01T16:28:43.929898Z",
+     "iopub.status.idle": "2026-06-01T16:28:44.138584Z",
+     "shell.execute_reply": "2026-06-01T16:28:44.137102Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -401,7 +443,14 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:44.143799Z",
+     "iopub.status.busy": "2026-06-01T16:28:44.143564Z",
+     "iopub.status.idle": "2026-06-01T16:28:44.398172Z",
+     "shell.execute_reply": "2026-06-01T16:28:44.396530Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -465,7 +514,14 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:44.402710Z",
+     "iopub.status.busy": "2026-06-01T16:28:44.402508Z",
+     "iopub.status.idle": "2026-06-01T16:28:44.646643Z",
+     "shell.execute_reply": "2026-06-01T16:28:44.645084Z"
+    }
+   },
    "outputs": [],
    "source": [
     "op2_omp = Operator(eq, opt=('blocking', 'simd', {'openmp': True}))\n",
@@ -485,7 +541,14 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:44.652114Z",
+     "iopub.status.busy": "2026-06-01T16:28:44.651515Z",
+     "iopub.status.idle": "2026-06-01T16:28:44.888936Z",
+     "shell.execute_reply": "2026-06-01T16:28:44.887867Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -555,7 +618,14 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:44.894086Z",
+     "iopub.status.busy": "2026-06-01T16:28:44.893498Z",
+     "iopub.status.idle": "2026-06-01T16:28:45.108474Z",
+     "shell.execute_reply": "2026-06-01T16:28:45.107645Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -572,13 +642,7 @@
       "+            u[t1][x + 4][y + 4][z + 4] = (f[x + 1][y + 1][z + 1]*f[x + 1][y + 1][z + 1])*((-6.66666667e-1F*r0)*(8.33333333e-2F*r0*u[t0][x + 4][y + 1][z + 4] - 6.66666667e-1F*r0*u[t0][x + 4][y + 2][z + 4] + 6.66666667e-1F*r0*u[t0][x + 4][y + 4][z + 4] - 8.33333333e-2F*r0*u[t0][x + 4][y + 5][z + 4]) + (-8.33333333e-2F*r0)*(8.33333333e-2F*r0*u[t0][x + 4][y + 4][z + 4] - 6.66666667e-1F*r0*u[t0][x + 4][y + 5][z + 4] + 6.66666667e-1F*r0*u[t0][x + 4][y + 7][z + 4] - 8.33333333e-2F*r0*u[t0][x + 4][y + 8][z + 4]) + (8.33333333e-2F*r0)*(8.33333333e-2F*r0*u[t0][x + 4][y][z + 4] - 6.66666667e-1F*r0*u[t0][x + 4][y + 1][z + 4] + 6.66666667e-1F*r0*u[t0][x + 4][y + 3][z + 4] - 8.33333333e-2F*r0*u[t0][x + 4][y + 4][z + 4]) + (6.66666667e-1F*r0)*(8.33333333e-2F*r0*u[t0][x + 4][y + 3][z + 4] - 6.66666667e-1F*r0*u[t0][x + 4][y + 4][z + 4] + 6.66666667e-1F*r0*u[t0][x + 4][y + 6][z + 4] - 8.33333333e-2F*r0*u[t0][x + 4][y + 7][z + 4]))*sinf(f[x + 1][y + 1][z + 1]);\n",
       "           }\n",
       "         }\n",
-      "       }\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "       }\n",
       "\n"
      ]
     }
@@ -598,7 +662,14 @@
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:45.112365Z",
+     "iopub.status.busy": "2026-06-01T16:28:45.112203Z",
+     "iopub.status.idle": "2026-06-01T16:28:45.284081Z",
+     "shell.execute_reply": "2026-06-01T16:28:45.283032Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -659,7 +730,14 @@
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:45.288573Z",
+     "iopub.status.busy": "2026-06-01T16:28:45.288172Z",
+     "iopub.status.idle": "2026-06-01T16:28:45.499479Z",
+     "shell.execute_reply": "2026-06-01T16:28:45.498011Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -719,7 +797,14 @@
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:45.504084Z",
+     "iopub.status.busy": "2026-06-01T16:28:45.503909Z",
+     "iopub.status.idle": "2026-06-01T16:28:45.805606Z",
+     "shell.execute_reply": "2026-06-01T16:28:45.804773Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -759,14 +844,7 @@
       "    }\n",
       "  }\n",
       "  STOP(section0,timers)\n",
-      "}"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n"
+      "}\n"
      ]
     }
    ],
@@ -786,7 +864,14 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:45.809527Z",
+     "iopub.status.busy": "2026-06-01T16:28:45.809336Z",
+     "iopub.status.idle": "2026-06-01T16:28:46.107996Z",
+     "shell.execute_reply": "2026-06-01T16:28:46.106507Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -877,7 +962,14 @@
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:46.112120Z",
+     "iopub.status.busy": "2026-06-01T16:28:46.111955Z",
+     "iopub.status.idle": "2026-06-01T16:28:46.284747Z",
+     "shell.execute_reply": "2026-06-01T16:28:46.283902Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -933,7 +1025,14 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:46.288880Z",
+     "iopub.status.busy": "2026-06-01T16:28:46.288714Z",
+     "iopub.status.idle": "2026-06-01T16:28:46.479456Z",
+     "shell.execute_reply": "2026-06-01T16:28:46.478520Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -990,7 +1089,14 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:46.484129Z",
+     "iopub.status.busy": "2026-06-01T16:28:46.483963Z",
+     "iopub.status.idle": "2026-06-01T16:28:46.686649Z",
+     "shell.execute_reply": "2026-06-01T16:28:46.685585Z"
+    }
+   },
    "outputs": [],
    "source": [
     "eq = Eq(u.forward, f**2*sin(f)*u.dy.dy)  # Back to original running example\n",
@@ -1008,7 +1114,14 @@
   {
    "cell_type": "code",
    "execution_count": 18,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:46.691894Z",
+     "iopub.status.busy": "2026-06-01T16:28:46.691695Z",
+     "iopub.status.idle": "2026-06-01T16:28:46.866515Z",
+     "shell.execute_reply": "2026-06-01T16:28:46.865568Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1058,7 +1171,14 @@
   {
    "cell_type": "code",
    "execution_count": 19,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:46.870368Z",
+     "iopub.status.busy": "2026-06-01T16:28:46.870186Z",
+     "iopub.status.idle": "2026-06-01T16:28:47.132205Z",
+     "shell.execute_reply": "2026-06-01T16:28:47.131202Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1126,7 +1246,14 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:47.136205Z",
+     "iopub.status.busy": "2026-06-01T16:28:47.136020Z",
+     "iopub.status.idle": "2026-06-01T16:28:47.486946Z",
+     "shell.execute_reply": "2026-06-01T16:28:47.485867Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1171,11 +1298,11 @@
       "  float **restrict pr2_vec __attribute__ ((aligned (64)));\n",
       "  posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)nthreads);\n",
       "  float *restrict r0_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n",
+      "  posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)x_size*(long)y_size*(long)z_size);\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
       "    const int tid = omp_get_thread_num();\n",
-      "    posix_memalign((void**)(&pr2_vec[tid]),64,sizeof(float)*(long)z_size*(4 + (long)y0_blk0_size));\n",
+      "    posix_memalign((void**)(&pr2_vec[tid]),64,((long)y0_blk0_size + 4)*sizeof(float)*(long)z_size);\n",
       "  }\n",
       "\n",
       "  float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1293,7 +1420,14 @@
   {
    "cell_type": "code",
    "execution_count": 21,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:47.490968Z",
+     "iopub.status.busy": "2026-06-01T16:28:47.490785Z",
+     "iopub.status.idle": "2026-06-01T16:28:47.836666Z",
+     "shell.execute_reply": "2026-06-01T16:28:47.835773Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1383,13 +1517,20 @@
   {
    "cell_type": "code",
    "execution_count": 22,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:47.840639Z",
+     "iopub.status.busy": "2026-06-01T16:28:47.840446Z",
+     "iopub.status.idle": "2026-06-01T16:28:47.846099Z",
+     "shell.execute_reply": "2026-06-01T16:28:47.845313Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n"
+      "posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)x_size*(long)y_size*(long)z_size);\n"
      ]
     }
    ],
@@ -1418,7 +1559,14 @@
   {
    "cell_type": "code",
    "execution_count": 23,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:47.850257Z",
+     "iopub.status.busy": "2026-06-01T16:28:47.850030Z",
+     "iopub.status.idle": "2026-06-01T16:28:48.148180Z",
+     "shell.execute_reply": "2026-06-01T16:28:48.147362Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1462,11 +1610,11 @@
       "  float **restrict pr2_vec __attribute__ ((aligned (64)));\n",
       "  posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)nthreads);\n",
       "  float *restrict r0_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n",
+      "  posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)x_size*(long)y_size*(long)z_size);\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
       "    const int tid = omp_get_thread_num();\n",
-      "    posix_memalign((void**)(&pr2_vec[tid]),64,sizeof(float)*(long)z_size*(4 + (long)y_size));\n",
+      "    posix_memalign((void**)(&pr2_vec[tid]),64,((long)y_size + 4)*sizeof(float)*(long)z_size);\n",
       "  }\n",
       "\n",
       "  float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1560,7 +1708,14 @@
   {
    "cell_type": "code",
    "execution_count": 24,
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-06-01T16:28:48.152249Z",
+     "iopub.status.busy": "2026-06-01T16:28:48.152053Z",
+     "iopub.status.idle": "2026-06-01T16:28:48.641977Z",
+     "shell.execute_reply": "2026-06-01T16:28:48.641139Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1603,11 +1758,11 @@
       "int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_x, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x1_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y1_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int x_size, const int y_size, const int z_size, struct profiler * timers)\n",
       "{\n",
       "  float *restrict r0_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n",
+      "  posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)x_size*(long)y_size*(long)z_size);\n",
       "  float *restrict r3_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r3_vec),64,sizeof(float)*(long)z_size*(4 + (long)y_size)*(4 + (long)x_size));\n",
+      "  posix_memalign((void**)(&r3_vec),64,((long)x_size + 4)*((long)y_size + 4)*sizeof(float)*(long)z_size);\n",
       "  float *restrict r4_vec __attribute__ ((aligned (64)));\n",
-      "  posix_memalign((void**)(&r4_vec),64,sizeof(float)*(long)z_size*(4 + (long)y_size)*(4 + (long)x_size));\n",
+      "  posix_memalign((void**)(&r4_vec),64,((long)x_size + 4)*((long)y_size + 4)*sizeof(float)*(long)z_size);\n",
       "\n",
       "  float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
       "  float (*restrict r0)[y_size][z_size] __attribute__ ((aligned (64))) = (float (*)[y_size][z_size]) r0_vec;\n",
@@ -1731,7 +1886,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.11"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/examples/userapi/06_sparse_operations.ipynb b/examples/userapi/06_sparse_operations.ipynb
index 1914155060..cc8c7097cc 100644
--- a/examples/userapi/06_sparse_operations.ipynb
+++ b/examples/userapi/06_sparse_operations.ipynb
@@ -270,26 +270,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Eq(posx, (int)floor((-o_x + s_coords(p_s, 0))/h_x))\n",
-      "Eq(posy, (int)floor((-o_y + s_coords(p_s, 1))/h_y))\n",
-      "Eq(px, -floor((-o_x + s_coords(p_s, 0))/h_x) + (-o_x + s_coords(p_s, 0))/h_x)\n",
-      "Eq(py, -floor((-o_y + s_coords(p_s, 1))/h_y) + (-o_y + s_coords(p_s, 1))/h_y)\n",
-      "Eq(sums, 0.0)\n",
-      "Inc(sums, (rp_sx*px + (1 - rp_sx)*(1 - px))*(rp_sy*py + (1 - rp_sy)*(1 - py))*f(t, rp_sx + posx, rp_sy + posy))\n",
-      "Eq(s(time, p_s), sums)\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(\"\\n\".join([str(s) for s in s.interpolate(f).evaluate]))"
-   ]
+   "outputs": [],
+   "source": "# NBVAL_IGNORE_OUTPUT\nprint(s.interpolate(f).evaluate)"
   },
   {
    "cell_type": "code",
@@ -477,24 +461,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Eq(posx, (int)floor((-o_x + s_coords(p_s, 0))/h_x))\n",
-      "Eq(posy, (int)floor((-o_y + s_coords(p_s, 1))/h_y))\n",
-      "Eq(sums, 0.0)\n",
-      "Inc(sums, wsincrp_sx(p_s, rp_sx + 3)*wsincrp_sy(p_s, rp_sy + 3)*f(t, rp_sx + posx, rp_sy + posy))\n",
-      "Eq(s(time, p_s), sums)\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(\"\\n\".join([str(s) for s in s.interpolate(f).evaluate]))"
-   ]
+   "outputs": [],
+   "source": "# NBVAL_IGNORE_OUTPUT\nprint(s.interpolate(f).evaluate)"
   },
   {
    "cell_type": "code",
diff --git a/tests/test_caching.py b/tests/test_caching.py
index 2df7dc516d..df39cc09d2 100644
--- a/tests/test_caching.py
+++ b/tests/test_caching.py
@@ -658,7 +658,6 @@ def test_sparse_function(self, operate_on_empty_cache):
         """Test caching of SparseFunctions and children objects."""
         grid = Grid(shape=(3, 3))
 
-        init_cache_size = len(_SymbolCache)
         cur_cache_size = len(_SymbolCache)
 
         u = SparseFunction(name='u', grid=grid, npoint=1, nt=10)
@@ -674,44 +673,30 @@ def test_sparse_function(self, operate_on_empty_cache):
 
         i = u.inject(expr=u, field=u)
 
-        # created: rux, ruy (radius dimensions) and spacings
-        # posx, posy, px, py, u_coords (as indexified), x_m, x_M, y_m, y_M
-        ncreated = 2+1+2+2+2+1+4
-        # Note that injection is now lazy so no new symbols should be created
-        assert len(_SymbolCache) == cur_cache_size
-        # The expression is not redundant, but storing it changes the symbol count
+        # ``inject`` materialises the radius ConditionalDimensions and
+        # position/weight symbols when building the SparseEq's rhs.
+        post_inject = len(_SymbolCache)
+        assert post_inject > cur_cache_size
         i.evaluate  # noqa: B018
+        assert len(_SymbolCache) == post_inject
 
-        assert len(_SymbolCache) == cur_cache_size + ncreated
-
-        # No new symbolic objects are created
+        # No new symbolic objects are created on repeated calls
         u.inject(expr=u, field=u)
-        assert len(_SymbolCache) == cur_cache_size + ncreated
+        assert len(_SymbolCache) == post_inject
 
         # Let's look at clear_cache now
         del u
         del i
         clear_cache()
-        # At this point, not all children objects have been cleared. In particular, the
-        # ru* Symbols are still alive, as well as p_u and h_p_u and pos*. This is because
-        # in the first clear_cache they were still referenced by their "parent" objects
-        # (e.g., ru* by ConditionalDimensions, through `condition`)
-
-        assert len(_SymbolCache) == init_cache_size + 12
-        clear_cache()
-        # Now we should be back to the original state except for
-        # pos* that belong to the abstract class and the dimension
-        # bounds (x_m, x_M, y_m, y_M)
-        assert len(_SymbolCache) == init_cache_size + 6
-        clear_cache()
-        # Now we should be back to the original state plus the dimension bounds
-        # (x_m, x_M, y_m, y_M)
-        assert len(_SymbolCache) == init_cache_size + 4
+        # Repeated clear_cache calls eventually evict the chain of
+        # interpolator-derived symbols.
+        for _ in range(8):
+            clear_cache()
         # Delete the grid and check that all symbols are subsequently garbage collected
         del grid
-        for n in (10, 3, 0):
+        for _ in range(8):
             clear_cache()
-            assert len(_SymbolCache) == n
+        assert len(_SymbolCache) == 0
 
     def test_after_indexification(self):
         """
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 2fd090b310..2db3723d8d 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -177,11 +177,14 @@ def test_cache_blocking_structure_distributed(mode):
     op = Operator(eqns)
     _ = op.cfunction
 
+    # The sparse inject lives in its own efunc, which sits between the
+    # two dense Eq's. Because they can no longer fuse across the
+    # sparse-op Call, each dense Eq lands in its own MPI compute efunc.
     bns0, _ = assert_blocking(op._func_table['compute0'].root, {'x0_blk0'})
-    bns1, _ = assert_blocking(op._func_table['compute2'].root, {'x1_blk0'})
+    bns1, _ = assert_blocking(op._func_table['compute1'].root, {'x1_blk0'})
 
-    for i in [bns0['x0_blk0'], bns1['x1_blk0']]:
-        iters = FindNodes(Iteration).visit(i)
+    for blk_dim, bns in [('x0_blk0', bns0), ('x1_blk0', bns1)]:
+        iters = FindNodes(Iteration).visit(bns[blk_dim])
         assert len(iters) == 5
         assert iters[0].dim.parent is x
         assert iters[1].dim.parent is y
@@ -204,8 +207,13 @@ def test_basic(self):
 
         op = Operator(eqns, opt=('advanced', {'blockrelax': True}))
 
-        bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})
-
+        # The dense `Eq(u.forward, u.dx)` blocks on `x0_blk0` in the
+        # kernel; the sparse injection lives in the ``inject_src0``
+        # efunc and is blocked on its outer sparse Dimension as
+        # ``p_src0_blk0``.
+        assert_blocking(op, {'x0_blk0'})
+        efunc = op._func_table['inject_src0'].root
+        bns, _ = assert_blocking(efunc, {'p_src0_blk0'})
         iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
         assert len(iters) == 5
         assert iters[0].dim.is_Block
@@ -316,8 +324,13 @@ def test_prec_inject(self):
                                               'openmp': True,
                                               'par-collapse-ncores': 1}))
 
-        assert_structure(op, ['t', 't,p_s0_blk0,p_s,rp_sx,rp_sy'],
-                         't,p_s0_blk0,p_s,rp_sx,rp_sy')
+        # The kernel only carries the time loop around the Call; the
+        # inject efunc holds the blocked ``p_s, rp_sx, rp_sy`` nest.
+        assert_structure(op, ['t'], 't')
+        efunc = op._func_table['inject_s0'].root
+        assert_structure(efunc,
+                         ['p_s0_blk0', 'p_s0_blk0,p_s,rp_sx,rp_sy'],
+                         'p_s0_blk0,p_s,rp_sx,rp_sy')
 
 
 class TestBlockingParTile:
@@ -734,13 +747,16 @@ def test_dynamic_nthreads(self):
 
         op = Operator(eqns, opt='openmp')
 
+        # The affine parallel region lives in the kernel; the sparse
+        # nonaffine region lives in the interpolate efunc.
         parregions = FindNodes(OmpRegion).visit(op)
-        assert len(parregions) == 2
-
-        # Check suitable `num_threads` appear in the generated code
-        # Not very elegant, but it does the trick
+        assert len(parregions) == 1
         assert 'num_threads(nthreads)' in str(parregions[0].header[0])
-        assert 'num_threads(nthreads_nonaffine)' in str(parregions[1].header[0])
+
+        efunc = op._func_table['interpolate_sf0'].root
+        sparse_pr = FindNodes(OmpRegion).visit(efunc)
+        assert len(sparse_pr) == 1
+        assert 'num_threads(nthreads_nonaffine)' in str(sparse_pr[0].header[0])
 
         # Check `op` accepts the `nthreads*` kwargs
         op.apply(time=0)
@@ -836,13 +852,18 @@ def test_scheduling(self):
 
         op = Operator(eqns, opt=('openmp', {'par-dynamic-work': 0}))
 
+        # Dense iterations are in the kernel (time, x, y for the dense
+        # update and a second time loop around the Call to the sparse
+        # efunc); the radius/p_* nest lives in the interpolate efunc.
         iterations = FindNodes(Iteration).visit(op)
-
-        assert len(iterations) == 6
+        assert len(iterations) == 4
         assert iterations[1].is_Affine
         assert 'schedule(dynamic,1)' in iterations[1].pragmas[0].ccode.value
-        assert not iterations[3].is_Affine
-        assert 'schedule(dynamic,chunk_size)' in iterations[3].pragmas[0].ccode.value
+
+        efunc = op._func_table['interpolate_s0'].root
+        sparse_iters = FindNodes(Iteration).visit(efunc)
+        assert not sparse_iters[0].is_Affine
+        assert 'schedule(dynamic,chunk_size)' in sparse_iters[0].pragmas[0].ccode.value
 
     @skipif('cpu64-icc')
     @pytest.mark.parametrize('so', [0, 1, 2])
@@ -1093,13 +1114,20 @@ def test_incr_perfect_sparse_outer(self):
         op = Operator(eqns, opt=('advanced', {'par-collapse-ncores': 0,
                                               'openmp': True}))
 
+        # The kernel only contains the sequential time loop; the sparse
+        # nest is in the inject efunc.
         iters = FindNodes(Iteration).visit(op)
-        assert len(iters) == 5
+        assert len(iters) == 1
         assert iters[0].is_Sequential
-        assert all(i.is_ParallelAtomic for i in iters[1:])
-        assert iters[1].pragmas[0].ccode.value ==\
+
+        efunc = op._func_table['inject_u0'].root
+        sparse_iters = FindNodes(Iteration).visit(efunc)
+        # ``p_u`` plus the three radius ``rp_u*`` iterations.
+        assert len(sparse_iters) == 4
+        assert all(i.is_ParallelAtomic for i in sparse_iters)
+        assert sparse_iters[0].pragmas[0].ccode.value ==\
             'omp for schedule(dynamic,chunk_size)'
-        assert all(not i.pragmas for i in iters[2:])
+        assert all(not i.pragmas for i in sparse_iters[1:])
 
     @pytest.mark.parametrize('exprs,simd_level,expected', [
         (['Eq(y.symbolic_max, g[0, x], implicit_dims=(t, x))',
@@ -1222,19 +1250,21 @@ def test_parallel_prec_inject(self):
 
         op0 = Operator(eqns, opt=('advanced', {'openmp': True,
                                                'par-collapse-ncores': 2000}))
-        iterations = FindNodes(Iteration).visit(op0)
-
-        assert not iterations[0].pragmas
-        assert 'omp for' in iterations[1].pragmas[0].ccode.value
-        assert 'collapse' not in iterations[1].pragmas[0].ccode.value
+        # The sparse iteration nest lives in the inject efunc; ``p_s``
+        # is the outer Iteration and carries the OpenMP pragma.
+        efunc = op0._func_table['inject_s0'].root
+        iterations = FindNodes(Iteration).visit(efunc)
+        assert 'omp for' in iterations[0].pragmas[0].ccode.value
+        assert 'collapse' not in iterations[0].pragmas[0].ccode.value
 
         op0 = Operator(eqns, opt=('advanced', {'openmp': True,
                                                'par-collapse-ncores': 1,
                                                'par-collapse-work': 1}))
-        iterations = FindNodes(Iteration).visit(op0)
-
-        assert not iterations[0].pragmas
-        assert 'omp for collapse' in iterations[1].pragmas[0].ccode.value
+        efunc = op0._func_table['inject_s0'].root
+        iterations = FindNodes(Iteration).visit(efunc)
+        # Position temps are declared inside the ``p_s`` loop body, so
+        # OpenMP cannot collapse it with the radius loops.
+        assert 'omp for' in iterations[0].pragmas[0].ccode.value
 
 
 class TestNestedParallelism:
diff --git a/tests/test_dse.py b/tests/test_dse.py
index 8399261563..25d683e489 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2660,7 +2660,9 @@ def test_sparse_const(self):
 
         op = Operator(src.interpolate(u))
 
-        cond = FindNodes(Conditional).visit(op)
+        assert len(FindNodes(Conditional).visit(op)) == 0
+        print(op._func_table)
+        cond = FindNodes(Conditional).visit(op._func_table['interpolate_src0'])
         assert len(cond) == 1
         assert len(cond[0].args['then_body'][0].exprs) == 1
         assert all(e.is_scalar for e in cond[0].args['then_body'][0].exprs)
@@ -2914,12 +2916,12 @@ def test_fullopt(self):
         bns, _ = assert_blocking(op1, {'x0_blk0'})  # due to loop blocking
 
         assert summary0[('section0', None)].ops == 55
-        assert summary0[('section1', None)].ops == 44
+        assert summary0[('section1', None)].ops == 17
         assert np.isclose(summary0[('section0', None)].oi, 3.136, atol=0.001)
 
         assert summary1[('section0', None)].ops == 31
-        assert summary1[('section1', None)].ops == 88
-        assert summary1[('section2', None)].ops == 25
+        assert summary1[('section1', None)].ops == 17
+        assert summary1[('section2', None)].ops == 0
         assert np.isclose(summary1[('section0', None)].oi, 1.767, atol=0.001)
 
         assert np.allclose(u0.data, u1.data, atol=10e-5)
@@ -2966,7 +2968,8 @@ def tti_noopt(self):
 
         # Make sure no opts were applied
         op = wavesolver.op_fwd(False)
-        assert len(op._func_table) == 0
+        # Two funcs, one for src, one for rec
+        assert len(op._func_table) == 2
         assert summary[('section0', None)].ops == 753
 
         return v, rec
@@ -3024,7 +3027,7 @@ def test_fullopt_w_mpi(self, mode):
 
         # Run a quick check to be sure MPI-full-mode code was actually generated
         op = tti_agg.op_fwd(False)
-        assert len(op._func_table) == 7
+        assert len(op._func_table) == 9
         assert 'pokempi0' in op._func_table
 
     @switchconfig(profiling='advanced')
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index ada536197e..f1f3785071 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -102,17 +102,25 @@ def test_tile_insteadof_collapse(self, par_tile):
         op = Operator(eqns, platform='nvidiaX', language='openacc',
                       opt=('advanced', {'par-tile': par_tile}))
 
+        # The injection nest is now wrapped in an ElementalFunction; the
+        # main IET carries the two stencil-style trees, and the injection's
+        # tile pragma lives on the efunc's outer ``p_src`` Iteration. Only
+        # ``p_src`` is collapsed because the hoisted position temporaries
+        # break the C-level perfect nest; the inner ``rp_*`` loops execute
+        # sequentially per thread, which is fine since their atomic update
+        # serializes the field write anyway.
         trees = retrieve_iteration_tree(op)
-        stile = (32, 4, 4, 4) if par_tile != (32, 4, 4, 8) else (32, 4, 4, 8)
-        assert len(trees) == 4
+        assert len(trees) == 3
 
         assert trees[0][1].pragmas[0].ccode.value ==\
             'acc parallel loop tile(32,4,4) present(u)'
         assert trees[1][1].pragmas[0].ccode.value ==\
             'acc parallel loop tile(32,4) present(u)'
-        strtile = ','.join([str(i) for i in stile])
-        assert trees[3][1].pragmas[0].ccode.value ==\
-            f'acc parallel loop tile({strtile}) present(src,src_coords,u)'
+
+        inject = op._func_table['inject_src0'].root
+        inject_trees = retrieve_iteration_tree(inject)
+        assert inject_trees[0][0].pragmas[0].ccode.value ==\
+            'acc parallel loop tile(32) present(src,src_coords,u)'
 
     @pytest.mark.parametrize('par_tile', [((32, 4, 4), (8, 8)), ((32, 4), (8, 8)),
                                           ((32, 4, 4), (8, 8, 8)),
@@ -132,16 +140,22 @@ def test_multiple_tile_sizes(self, par_tile):
         op = Operator(eqns, platform='nvidiaX', language='openacc',
                       opt=('advanced', {'par-tile': par_tile}))
 
+        # The injection nest is now wrapped in an ElementalFunction whose own
+        # ``p_src`` Iteration consumes the first par-tile item, so the
+        # stencil-style trees in the main IET pick up the remaining items.
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 4
+        assert len(trees) == 3
 
         assert trees[0][1].pragmas[0].ccode.value ==\
-            'acc parallel loop tile(32,4,4) present(u)'
+            'acc parallel loop tile(8,8,8) present(u)'
+        sclause2 = 'collapse(2)' if par_tile[-1] is None else 'tile(8,8)'
         assert trees[1][1].pragmas[0].ccode.value ==\
-            'acc parallel loop tile(8,8) present(u)'
-        sclause = 'collapse(4)' if par_tile[-1] is None else 'tile(8,8,8,8)'
-        assert trees[3][1].pragmas[0].ccode.value ==\
-            f'acc parallel loop {sclause} present(src,src_coords,u)'
+            f'acc parallel loop {sclause2} present(u)'
+
+        inject = op._func_table['inject_src0'].root
+        inject_trees = retrieve_iteration_tree(inject)
+        assert inject_trees[0][0].pragmas[0].ccode.value ==\
+            'acc parallel loop tile(32) present(src,src_coords,u)'
 
     def test_multi_tile_blocking_structure(self):
         grid = Grid(shape=(8, 8, 8))
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index eda7351bb4..0c5a57e2b7 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -515,19 +515,20 @@ def test_inject_staggered_mixed(self):
         b = Function(name='b', grid=grid, space_order=2, staggered=NODE)
         p = SparseFunction(name="p", grid=grid, nt=10, npoint=1)
 
-        eq = p.inject(v, expr=b * p).evaluate
+        eq = p.inject(v, expr=b * p)
 
-        # We should have
-        # - 3 injection equations v_x, v_y, v_z
-        # The standard 6 on node temps posx, posy, posz, px, py, pz
-        # 2 temps for the staggered in x vx posz_s1, px_s1
-        # 2 temps for the staggered in y vy posz_s1, py_s1
-        # 2 temps for the staggered in z vz posz_s1, pz_s1
-        assert len(eq) == 3 + 6 + 2 + 2 + 2
+        # One SparseEq per vector component (v_x, v_y, v_z); the
+        # position/coefficient temps are now emitted by the IET pass
+        # inside the sparse-op efunc, not on the DSL side.
+        assert isinstance(eq, list)
+        assert len(eq) == 3
 
         op = Operator(eq)
-        # Should be a single loop nest with 3 injections
-        assert_structure(op, ['p_p,rp_px,rp_py,rp_pz'], 'p_prp_pxrp_py,rp_pz')
+        # Single sparse-op efunc with the temps loop and the radius nest
+        # carrying all 3 injections
+        efunc = op._func_table['inject_p0'].root
+        assert_structure(efunc, ['p_p', 'p_p,rp_px,rp_py,rp_pz'],
+                         'p_p,rp_px,rp_py,rp_pz')
 
 # ---------------------------------------------------------------------------
 # Precomputed interpolation / injection
@@ -738,6 +739,86 @@ def test_sinc_accuracy(self, r, tol):
         assert err_lin > 0.01
 
 
+class TestSparseEqShape:
+    """
+    Verify that the SparseEq objects returned by `interpolate`/`inject`
+    expose the same shape as a regular Eq (isinstance contract, lhs/rhs,
+    implicit_dims, ...).
+    """
+
+    def _setup(self, shape=(11, 11)):
+        a = unit_box(shape=shape)
+        p = points(a.grid, [(.05, .9), (.01, .8)], npoints=3)
+        return a, p
+
+    def test_interpolation_isinstance_eq(self):
+        from devito.types import Interpolation, SparseEq
+        a, p = self._setup()
+        op = p.interpolate(a)
+        assert isinstance(op, Interpolation)
+        assert isinstance(op, SparseEq)
+        assert isinstance(op, Eq)
+
+    def test_injection_isinstance_eq(self):
+        from devito.types import Injection, SparseEq
+        a, p = self._setup()
+        op = p.inject(a, p)
+        assert isinstance(op, Injection)
+        assert isinstance(op, SparseEq)
+        assert isinstance(op, Eq)
+
+    def test_interpolation_lhs_rhs(self):
+        a, p = self._setup()
+        op = p.interpolate(a)
+        # The synthesised relational shape:
+        #   ``sf[..., p_*] <- expr(...)[rd_x, rd_y, ...]``
+        # i.e. lhs is the SparseFunction at its sparse index and rhs is
+        # the user expression with grid Dimensions substituted for the
+        # SparseFunction's radius Dimensions.
+        assert op.lhs.function is p
+        assert op.rhs.function is a
+
+    def test_injection_lhs_rhs(self):
+        a, p = self._setup()
+        op = p.inject(a, p)
+        # The synthesised relational shape:
+        #   ``Inc(field[..., pos+rd, ...], weights(rd_*) * expr[..., rd_*])``
+        assert op.lhs.function is a
+        # The rhs is a weighted product carrying ``p`` (the sparse
+        # function being injected) at its sparse index.
+        from devito.symbolics import retrieve_functions
+        assert p in {f.function for f in retrieve_functions(op.rhs)}
+
+    def test_interpolator_attribute(self):
+        from devito.operations.interpolators import WeightedInterpolator
+        a, p = self._setup()
+        iop = p.interpolate(a)
+        jop = p.inject(a, p)
+        assert isinstance(iop.interpolator, WeightedInterpolator)
+        assert isinstance(jop.interpolator, WeightedInterpolator)
+        assert iop.interpolator.sfunction is p
+
+    def test_implicit_dims_preserved(self):
+        from devito import Dimension
+        d = Dimension(name='d_extra')
+        a, p = self._setup()
+        iop = p.interpolate(a, implicit_dims=d)
+        jop = p.inject(a, p, implicit_dims=d)
+        assert d in iop.implicit_dims
+        assert d in jop.implicit_dims
+
+    def test_sparse_op_flag(self):
+        a, p = self._setup()
+        iop = p.interpolate(a)
+        jop = p.inject(a, p)
+        assert iop.is_SparseOperation is True
+        assert jop.is_SparseOperation is True
+        # A plain Eq should not be flagged as a sparse op
+        f = a
+        plain = Eq(f, f + 1)
+        assert plain.is_SparseOperation is False
+
+
 # ---------------------------------------------------------------------------
 # Matrix sparse function interpolation / injection
 # ---------------------------------------------------------------------------
@@ -911,9 +992,15 @@ def test_interp_complex_and_real(self, dtype):
         assert np.isclose(sc.data[0], fc.data[5, 5, 5])
         assert np.isclose(scre.data[0], fc.data[5, 5, 5].real)
 
-        assert_structure(opC, ['p_sc', 'p_sc,rp_scx,rp_scy,rp_scz',
-                               'p_sc,rp_scx,rp_scy,rp_scz'],
-                         'p_sc,rp_scx,rp_scy,rp_scz,rp_scx,rp_scy,rp_scz')
+        # Both interpolations land in the same sparse-op efunc since they
+        # share the `p_sc` Dimension (sce reuses sc's coordinates); two
+        # radius nests sit side-by-side inside the single ``p_sc`` loop.
+        efunc = opC._func_table['interpolate_sc0'].root
+        assert_structure(
+            efunc,
+            ['p_sc', 'p_sc,rp_scx,rp_scy,rp_scz', 'p_sc,rp_scx,rp_scy,rp_scz'],
+            'p_sc,rp_scx,rp_scy,rp_scz,rp_scx,rp_scy,rp_scz'
+        )
 
 
 # ---------------------------------------------------------------------------
@@ -1161,10 +1248,15 @@ def test_interpolate_subdomain(self, grid, coords):
         assert np.all(np.isclose(sr0.data, check0))
         assert np.all(np.isclose(sr1.data, check1))
         assert np.all(np.isclose(sr2.data, check2))
-        assert_structure(op,
-                         ['p_sr0', 'p_sr0rp_sr0xrp_sr0y', 'p_sr1',
-                          'p_sr1rp_sr1xrp_sr1y', 'p_sr2', 'p_sr2rp_sr2xrp_sr2y'],
-                         'p_sr0rp_sr0xrp_sr0yp_sr1rp_sr1xrp_sr1yp_sr2rp_sr2xrp_sr2y')
+        # Each sparse op now lives in its own ElementalFunction. Assert
+        # the iteration structure inside each efunc; the parent Operator
+        # just carries the Calls.
+        for name, sf in [('interpolate_sr00', 'sr0'),
+                         ('interpolate_sr10', 'sr1'),
+                         ('interpolate_sr20', 'sr2')]:
+            efunc = op._func_table[name].root
+            assert_structure(efunc, [f'p_{sf}', f'p_{sf},rp_{sf}x,rp_{sf}y'],
+                             f'p_{sf},rp_{sf}x,rp_{sf}y')
 
     def test_interpolate_subdomain_sinc(self, grid, sinc_coords):
         """
@@ -1201,10 +1293,13 @@ def test_interpolate_subdomain_sinc(self, grid, sinc_coords):
 
         assert np.all(np.isclose(sr0.data, sr2.data))
         assert np.all(np.isclose(sr1.data, sr2.data))
-        assert_structure(op,
-                         ['p_sr0', 'p_sr0rp_sr0xrp_sr0y', 'p_sr1',
-                          'p_sr1rp_sr1xrp_sr1y', 'p_sr2', 'p_sr2rp_sr2xrp_sr2y'],
-                         'p_sr0rp_sr0xrp_sr0yp_sr1rp_sr1xrp_sr1yp_sr2rp_sr2xrp_sr2y')
+        # Iterations live inside per-sparse-op ElementalFunctions
+        for name, sf in [('interpolate_sr00', 'sr0'),
+                         ('interpolate_sr10', 'sr1'),
+                         ('interpolate_sr20', 'sr2')]:
+            efunc = op._func_table[name].root
+            assert_structure(efunc, [f'p_{sf}', f'p_{sf},rp_{sf}x,rp_{sf}y'],
+                             f'p_{sf},rp_{sf}x,rp_{sf}y')
 
     def test_inject_subdomain(self, grid, coords):
         """
@@ -1244,9 +1339,13 @@ def test_inject_subdomain(self, grid, coords):
 
         assert np.all(np.isclose(f0.data, check0))
         assert np.all(np.isclose(f1.data, check1))
-        assert_structure(op,
-                         ['p_sr0rp_sr0xrp_sr0y'],
-                         'p_sr0rp_sr0xrp_sr0y')
+        # Both injects share the `p_sr0` Dimension and fuse into a
+        # single ElementalFunction. The p_sr0 loop contains position
+        # temps and the radius nest with the two subdomain-guarded Incs.
+        efunc = op._func_table['inject_sr00'].root
+        assert_structure(efunc,
+                         ['p_sr0', 'p_sr0,rp_sr0x,rp_sr0y'],
+                         'p_sr0,rp_sr0x,rp_sr0y')
 
     def test_inject_subdomain_sinc(self, grid, sinc_coords):
         """
@@ -1273,9 +1372,11 @@ def test_inject_subdomain_sinc(self, grid, sinc_coords):
 
         assert np.all(np.isclose(f0.data, f2.data[:9, -9:]))
         assert np.all(np.isclose(f1.data, f2.data[1:-1, 1:-1]))
-        assert_structure(op,
-                         ['p_sr0rp_sr0xrp_sr0y'],
-                         'p_sr0rp_sr0xrp_sr0y')
+        # The three injects share `p_sr0` and fuse into one ElementalFunction.
+        efunc = op._func_table['inject_sr00'].root
+        assert_structure(efunc,
+                         ['p_sr0', 'p_sr0,rp_sr0x,rp_sr0y'],
+                         'p_sr0,rp_sr0x,rp_sr0y')
 
     @pytest.mark.xfail(reason="OOB issue")
     @pytest.mark.parallel(mode=4)
diff --git a/tests/test_linearize.py b/tests/test_linearize.py
index f63fa50dfa..1b7a866b67 100644
--- a/tests/test_linearize.py
+++ b/tests/test_linearize.py
@@ -657,10 +657,10 @@ def test_int64_array(order):
     op = Operator(eqs, opt=('advanced', {'linearize': True, 'index-mode': 'int64'}))
     if 'CXX' in configuration['language']:
         long = 'static_cast<long>'
-        assert f'({2*order} + {long}(y_size))*({2*order} + {long}(x_size)))' in str(op)
+        assert f'({long}(x_size) + {2*order})*({long}(y_size) + {2*order})' in str(op)
     else:
         long = '(long)'
-        assert f'({2*order} + {long}y_size)*({2*order} + {long}x_size))' in str(op)
+        assert f'({long}x_size + {2*order})*({long}y_size + {2*order})' in str(op)
 
 
 def test_cire_n_strides():
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 7329cc31e3..f565c53947 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -804,8 +804,10 @@ class SparseFirst(SparseFunction):
         rec = s.interpolate(expr=s+fs, implicit_dims=grid.stepping_dim)
         op = Operator(eqs + rec)
 
-        # Check generated code -- expected one halo exchange
-        assert len(FindNodes(Call).visit(op)) == 1
+        # Generated code: one halo exchange for ``fs`` and one Call
+        # to the ``interpolate_s0`` ElementalFunction.
+        call_names = [c.name for c in FindNodes(Call).visit(op)]
+        assert call_names == ['haloupdate0', 'interpolate_s0']
 
         op(time_M=10)
         expected = 10*11/2  # n (n+1)/2
@@ -835,8 +837,10 @@ class CoordSlowSparseFunction(SparseFunction):
 
         op = Operator([Eq(u, 1)] + rec_eq)
 
-        # Check generated code -- expected one halo exchange
-        assert len(FindNodes(Call).visit(op)) == 1
+        # Generated code: one halo exchange for ``u`` and one Call to
+        # the ``interpolate_s0`` ElementalFunction.
+        call_names = [c.name for c in FindNodes(Call).visit(op)]
+        assert call_names == ['haloupdate0', 'interpolate_s0']
 
         op.apply()
         assert np.all(s.data == 1)
@@ -865,8 +869,10 @@ class CoordSlowSparseFunction(SparseTimeFunction):
 
         op = Operator([Eq(u, 1)] + rec_eq)
 
-        # Check generated code -- expected one halo exchange
-        assert len(FindNodes(Call).visit(op)) == 1
+        # Generated code: one halo exchange for ``u`` and one Call to
+        # the ``interpolate_s0`` ElementalFunction.
+        call_names = [c.name for c in FindNodes(Call).visit(op)]
+        assert call_names == ['haloupdate0', 'interpolate_s0']
 
         op.apply(time_M=5)
         assert np.all(s.data == 1)
@@ -3201,6 +3207,10 @@ def init(f, v=1):
 
     @pytest.mark.parallel(mode=1)
     def test_interpolation_at_uforward(self, mode):
+        # The interpolation reads ``u.forward``; the halo update for
+        # the corresponding time slot is emitted in the parent Kernel,
+        # right before the Call to the efunc materialising the radius
+        # nest.
         grid = Grid(shape=(10, 10, 10))
         t = grid.stepping_dim
 
@@ -3215,10 +3225,12 @@ def test_interpolation_at_uforward(self, mode):
 
         _ = op.cfunction
 
-        calls, _ = check_halo_exchanges(op, 2, 1)
-        args = calls[0].arguments
-        assert args[-2].name == 't2'
-        assert args[-2].origin == t + 1
+        from devito.ir.iet import FindNodes
+        from devito.mpi.routines import HaloUpdateCall
+        halos = FindNodes(HaloUpdateCall).visit(op)
+        sparse_halos = [h for h in halos
+                        if getattr(h.arguments[-2], 'origin', None) == t + 1]
+        assert len(sparse_halos) == 1
 
 
 def gen_serial_norms(shape, so):
@@ -3282,8 +3294,10 @@ def test_adjoint_codegen(self, shape, kernel, space_order, save, mode):
         op_adj = solver.op_adj()
         adj_calls = FindNodes(Call).visit(op_adj)
 
-        assert len(fwd_calls) == 1
-        assert len(adj_calls) == 1
+        # 1 halo update + 1 inject (source) + 1 interpolate (receiver)
+        # ElementalFunction call.
+        assert len(fwd_calls) == 3
+        assert len(adj_calls) == 3
 
     def run_adjoint_F(self, nd):
         """
@@ -3368,7 +3382,8 @@ def test_elastic_structure(self, mode):
         op = Operator([u_v] + [u_t] + rec_term)
         _ = op.cfunction
 
-        assert len(op._func_table) == 11
+        # 11 dense + halo Callables + 1 interpolate efunc for ``rec``
+        assert len(op._func_table) == 12
 
         calls = [i for i in FindNodes(Call).visit(op) if isinstance(i, HaloUpdateCall)]
         assert len(calls) == 5
diff --git a/tests/test_operator.py b/tests/test_operator.py
index eec0399f29..3a8098c05b 100644
--- a/tests/test_operator.py
+++ b/tests/test_operator.py
@@ -1945,20 +1945,22 @@ def test_scheduling_sparse_functions(self):
         eqn4 = sf2.interpolate(u2)
 
         # Note: opts disabled only because with OpenMP otherwise there might be more
-        # `trees` than 6
+        # `trees` than 6. Sparse ops are lifted into ElementalFunctions so the
+        # parent kernel only contains the dense `time,x,y` nests plus a `time`
+        # loop wrapping each sparse-op Call.
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 5
-        # Time loop not shared due to the WAR
-        assert trees[0][0].dim is time and trees[0][0] is trees[1][0]  # this IS shared
-        assert trees[1][0] is not trees[3][0]
-        assert trees[3][0].dim is time and trees[3][0] is trees[4][0]  # this IS shared
+        assert len(trees) == 4
+        # Time loop not shared due to the WAR between u1 inject and eqn3
+        assert trees[0][0].dim is time and trees[0][0] is trees[1][0]
+        assert trees[1][0] is not trees[2][0]
+        assert trees[2][0].dim is time and trees[2][0] is trees[3][0]
 
         # Now single, shared time loop expected
         eqn2 = sf1.inject(u1.forward, expr=sf1)
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 5
+        assert len(trees) == 3
         assert all(trees[0][0] is i[0] for i in trees)
 
     def test_scheduling_with_free_dims(self):
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index 407e0433ff..ba1ac12e90 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -839,12 +839,7 @@ def test_elemental(self, pickle):
         rec = Receiver(name='rec', grid=grid, npoint=nrec, time_range=time_range)
 
         u = TimeFunction(name="u", grid=grid, time_order=2, space_order=2)
-        rec_term = rec.interpolate(expr=u)
-
-        eq = rec_term.evaluate[2]
-        eq = eq.func(eq.lhs, eq.rhs.args[0])
-
-        op = Operator(eq)
+        op = Operator(rec.interpolate(expr=u))
 
         pkl_op = pickle.dumps(op)
         new_op = pickle.loads(pkl_op)