Skip to content

Commit 036803e

Browse files
committed
Refactor metadata propagation in ReduceFnRunner to support extensible PipelineMetadata
1 parent 1f83bf0 commit 036803e

File tree

9 files changed

+418
-20
lines changed

9 files changed

+418
-20
lines changed
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.beam.runners.core;
19+
20+
import com.google.auto.value.AutoValue;
21+
import java.io.IOException;
22+
import java.io.InputStream;
23+
import java.io.OutputStream;
24+
import org.apache.beam.model.fnexecution.v1.BeamFnApi;
25+
import org.apache.beam.sdk.coders.AtomicCoder;
26+
import org.apache.beam.sdk.coders.ByteArrayCoder;
27+
import org.apache.beam.sdk.coders.NullableCoder;
28+
import org.apache.beam.sdk.values.CausedByDrain;
29+
30+
/**
31+
* Encapsulates metadata that propagates with elements in the pipeline.
32+
*
33+
* <p>This metadata is sent along with elements. It currently includes fields like {@link
34+
* CausedByDrain}, and is designed to be extensible to support future metadata fields such as
35+
* OpenTelemetry context or CDC (Change Data Capture) kind.
36+
*
37+
* <p>The purpose of this class is to group targeted metadata fields together. This makes it easier
38+
* to define combination strategies (e.g., when accumulating state in {@code ReduceFnRunner}) when
39+
* multiple elements are merged or grouped, without having to extend method signatures or state
40+
* handling for every new metadata field.
41+
*/
42+
@AutoValue
43+
public abstract class CombinedMetadata {
44+
public abstract CausedByDrain causedByDrain();
45+
46+
public static CombinedMetadata create(CausedByDrain causedByDrain) {
47+
return new AutoValue_CombinedMetadata(causedByDrain);
48+
}
49+
50+
public static CombinedMetadata createDefault() {
51+
return create(CausedByDrain.NORMAL);
52+
}
53+
54+
public static class Coder extends AtomicCoder<CombinedMetadata> {
55+
private static final Coder INSTANCE = new Coder();
56+
57+
public static Coder of() {
58+
return INSTANCE;
59+
}
60+
61+
@Override
62+
public void encode(CombinedMetadata value, OutputStream outStream) throws IOException {
63+
if (value == null) {
64+
NullableCoder.of(ByteArrayCoder.of()).encode(null, outStream);
65+
return;
66+
}
67+
BeamFnApi.Elements.ElementMetadata.Builder builder =
68+
BeamFnApi.Elements.ElementMetadata.newBuilder();
69+
builder.setDrain(
70+
value.causedByDrain() == CausedByDrain.CAUSED_BY_DRAIN
71+
? BeamFnApi.Elements.DrainMode.Enum.DRAINING
72+
: BeamFnApi.Elements.DrainMode.Enum.NOT_DRAINING);
73+
74+
NullableCoder.of(ByteArrayCoder.of()).encode(builder.build().toByteArray(), outStream);
75+
}
76+
77+
@Override
78+
public CombinedMetadata decode(InputStream inStream) throws IOException {
79+
byte[] bytes = NullableCoder.of(ByteArrayCoder.of()).decode(inStream);
80+
if (bytes == null) {
81+
return CombinedMetadata.createDefault();
82+
}
83+
BeamFnApi.Elements.ElementMetadata proto =
84+
BeamFnApi.Elements.ElementMetadata.parseFrom(bytes);
85+
86+
CausedByDrain causedByDrain =
87+
proto.getDrain() == BeamFnApi.Elements.DrainMode.Enum.DRAINING
88+
? CausedByDrain.CAUSED_BY_DRAIN
89+
: CausedByDrain.NORMAL;
90+
91+
return CombinedMetadata.create(causedByDrain);
92+
}
93+
}
94+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.beam.runners.core;
19+
20+
import org.apache.beam.sdk.values.CausedByDrain;
21+
22+
/** Combiner for CombinedMetadata. */
23+
class CombinedMetadataCombiner implements MetadataCombiner<CombinedMetadata> {
24+
private static final CombinedMetadataCombiner INSTANCE = new CombinedMetadataCombiner();
25+
26+
public static CombinedMetadataCombiner of() {
27+
return INSTANCE;
28+
}
29+
30+
private final CausedByDrainCombiner causedByDrainCombiner = CausedByDrainCombiner.of();
31+
32+
@Override
33+
public CombinedMetadata createAccumulator() {
34+
return CombinedMetadata.create(causedByDrainCombiner.createAccumulator());
35+
}
36+
37+
@Override
38+
public CombinedMetadata addInput(CombinedMetadata accumulator, CombinedMetadata input) {
39+
return CombinedMetadata.create(
40+
causedByDrainCombiner.addInput(accumulator.causedByDrain(), input.causedByDrain()));
41+
}
42+
43+
/** Combiner for CausedByDrain metadata. */
44+
static class CausedByDrainCombiner implements MetadataCombiner<CausedByDrain> {
45+
private static final CausedByDrainCombiner INSTANCE = new CausedByDrainCombiner();
46+
47+
public static CausedByDrainCombiner of() {
48+
return INSTANCE;
49+
}
50+
51+
@Override
52+
public CausedByDrain createAccumulator() {
53+
return CausedByDrain.NORMAL;
54+
}
55+
56+
@Override
57+
public CausedByDrain addInput(CausedByDrain current, CausedByDrain input) {
58+
if (current == CausedByDrain.CAUSED_BY_DRAIN || input == CausedByDrain.CAUSED_BY_DRAIN) {
59+
return CausedByDrain.CAUSED_BY_DRAIN;
60+
}
61+
return CausedByDrain.NORMAL;
62+
}
63+
}
64+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.beam.runners.core;
19+
20+
/** Interface for combining pipeline metadata. */
21+
interface MetadataCombiner<T> {
22+
T createAccumulator();
23+
24+
T addInput(T accumulator, T input);
25+
}

runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.apache.beam.sdk.metrics.Metrics;
4040
import org.apache.beam.sdk.options.PipelineOptions;
4141
import org.apache.beam.sdk.state.TimeDomain;
42+
import org.apache.beam.sdk.state.ValueState;
4243
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
4344
import org.apache.beam.sdk.transforms.windowing.PaneInfo;
4445
import org.apache.beam.sdk.transforms.windowing.PaneInfo.Timing;
@@ -107,6 +108,10 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
107108
* <li>It uses discarding or accumulation mode according to the {@link WindowingStrategy}.
108109
* </ul>
109110
*/
111+
static final StateTag<ValueState<CombinedMetadata>> METADATA_TAG =
112+
StateTags.makeSystemTagInternal(
113+
StateTags.value("combinedMetadata", CombinedMetadata.Coder.of()));
114+
110115
private final WindowingStrategy<Object, W> windowingStrategy;
111116

112117
private final WindowedValueReceiver<KV<K, OutputT>> outputter;
@@ -376,7 +381,7 @@ public void processElements(Iterable<WindowedValue<InputT>> values) throws Excep
376381
emit(
377382
contextFactory.base(window, StateStyle.DIRECT),
378383
contextFactory.base(window, StateStyle.RENAMED),
379-
CausedByDrain.NORMAL);
384+
CombinedMetadata.createDefault());
380385
}
381386

382387
// We're all done with merging and emitting elements so can compress the activeWindow state.
@@ -590,6 +595,17 @@ private void processElement(Map<W, W> windowToMergeResult, WindowedValue<InputT>
590595
value.getTimestamp(),
591596
StateStyle.DIRECT,
592597
value.causedByDrain());
598+
599+
ValueState<CombinedMetadata> metadataState = directContext.state().access(METADATA_TAG);
600+
CombinedMetadata currentMetadata = metadataState.read();
601+
if (currentMetadata == null) {
602+
currentMetadata = CombinedMetadata.createDefault();
603+
}
604+
CombinedMetadata inputMetadata = CombinedMetadata.create(value.causedByDrain());
605+
CombinedMetadata newMetadata =
606+
CombinedMetadataCombiner.of().addInput(currentMetadata, inputMetadata);
607+
metadataState.write(newMetadata);
608+
593609
if (triggerRunner.isClosed(directContext.state())) {
594610
// This window has already been closed.
595611
droppedDueToClosedWindow.inc();
@@ -792,7 +808,7 @@ public void onTimers(Iterable<TimerData> timers) throws Exception {
792808
renamedContext,
793809
true /* isFinished */,
794810
windowActivation.isEndOfWindow,
795-
windowActivation.causedByDrain);
811+
CombinedMetadata.create(windowActivation.causedByDrain));
796812
checkState(newHold == null, "Hold placed at %s despite isFinished being true.", newHold);
797813
}
798814

@@ -810,7 +826,10 @@ public void onTimers(Iterable<TimerData> timers) throws Exception {
810826
if (windowActivation.windowIsActiveAndOpen()
811827
&& triggerRunner.shouldFire(
812828
directContext.window(), directContext.timers(), directContext.state())) {
813-
emit(directContext, renamedContext, windowActivation.causedByDrain);
829+
emit(
830+
directContext,
831+
renamedContext,
832+
CombinedMetadata.create(windowActivation.causedByDrain));
814833
}
815834

816835
if (windowActivation.isEndOfWindow) {
@@ -874,6 +893,7 @@ private void clearAllState(
874893
triggerRunner.clearState(
875894
directContext.window(), directContext.timers(), directContext.state());
876895
paneInfoTracker.clear(directContext.state());
896+
directContext.state().access(METADATA_TAG).clear();
877897
} else {
878898
// If !windowIsActiveAndOpen then !activeWindows.isActive (1) or triggerRunner.isClosed (2).
879899
// For (1), if !activeWindows.isActive then the window must be merging and has been
@@ -934,8 +954,9 @@ private void prefetchEmit(
934954
private void emit(
935955
ReduceFn<K, InputT, OutputT, W>.Context directContext,
936956
ReduceFn<K, InputT, OutputT, W>.Context renamedContext,
937-
CausedByDrain causedByDrain)
957+
CombinedMetadata metadata)
938958
throws Exception {
959+
939960
checkState(
940961
triggerRunner.shouldFire(
941962
directContext.window(), directContext.timers(), directContext.state()));
@@ -950,13 +971,14 @@ private void emit(
950971
// Run onTrigger to produce the actual pane contents.
951972
// As a side effect it will clear all element holds, but not necessarily any
952973
// end-of-window or garbage collection holds.
953-
onTrigger(directContext, renamedContext, isFinished, false /*isEndOfWindow*/, causedByDrain);
974+
onTrigger(directContext, renamedContext, isFinished, false /*isEndOfWindow*/, metadata);
954975

955976
// Now that we've triggered, the pane is empty.
956977
nonEmptyPanes.clearPane(renamedContext.state());
957978

958979
// Cleanup buffered data if appropriate
959980
if (shouldDiscard) {
981+
directContext.state().access(METADATA_TAG).clear();
960982
// Cleanup flavor C: The user does not want any buffered data to persist between panes.
961983
reduceFn.clearState(renamedContext);
962984
}
@@ -1009,8 +1031,19 @@ private void prefetchOnTrigger(
10091031
ReduceFn<K, InputT, OutputT, W>.Context renamedContext,
10101032
final boolean isFinished,
10111033
boolean isEndOfWindow,
1012-
CausedByDrain causedByDrain)
1034+
CombinedMetadata metadata)
10131035
throws Exception {
1036+
ValueState<CombinedMetadata> metadataState = directContext.state().access(METADATA_TAG);
1037+
CombinedMetadata aggregatedMetadata = metadataState.read();
1038+
if (aggregatedMetadata == null) {
1039+
aggregatedMetadata = CombinedMetadata.createDefault();
1040+
}
1041+
CombinedMetadata fullyAggregatedMetadata =
1042+
CombinedMetadataCombiner.of().addInput(aggregatedMetadata, metadata);
1043+
final CausedByDrain aggregatedCausedByDrain = fullyAggregatedMetadata.causedByDrain();
1044+
if (isFinished) {
1045+
metadataState.clear();
1046+
}
10141047
// Extract the window hold, and as a side effect clear it.
10151048
final WatermarkHold.OldAndNewHolds pair =
10161049
watermarkHold.extractAndRelease(renamedContext, isFinished).read();
@@ -1081,12 +1114,12 @@ private void prefetchOnTrigger(
10811114
.setValue(KV.of(key, toOutput))
10821115
.setTimestamp(outputTimestamp)
10831116
.setWindows(windows)
1084-
.setCausedByDrain(causedByDrain)
1117+
.setCausedByDrain(aggregatedCausedByDrain)
10851118
.setPaneInfo(paneInfo)
10861119
.setReceiver(outputter)
10871120
.output();
10881121
},
1089-
causedByDrain);
1122+
aggregatedCausedByDrain);
10901123

10911124
reduceFn.onTrigger(renamedTriggerContext);
10921125
}

0 commit comments

Comments
 (0)