documentdb · SarthakDalmia1 · Jun 18, 2026
@@ -0,0 +1,95 @@
+"""
+$text query operator combined with non-text query predicates (implicit and
+explicit $and).
+
+Existing $text coverage exercises the operator in isolation and with a single
+co-located equality predicate. This file covers a richer set of compound
+filters: $text intersected with an equality, a range ($gt), an `$in`, an array
+equality, a `$ne`, an explicit `$and`, and a predicate that excludes every text
+match. In every case the result is the intersection of the text match and the
+scalar predicate.
+
+Oracle: MongoDB 7.0 (functional-tests CI baseline). The engine under test
+matches native behavior on every case; no engine divergences are tracked here.
+"""
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import (
+    QueryTestCase,
+)
+from documentdb_tests.framework.assertions import assertSuccess
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+DOCS = [
+    {"_id": 1, "content": "coffee and tea", "category": "drinks", "rating": 5, "tags": ["hot"]},
+    {"_id": 2, "content": "coffee beans roasted", "category": "food", "rating": 3, "tags": ["beans"]},
+    {"_id": 3, "content": "green tea leaves", "category": "drinks", "rating": 4, "tags": ["green"]},
+    {"_id": 4, "content": "python programming", "category": "tech", "rating": 5, "tags": ["code"]},
+]
+
+# Property [Compound Intersection]: $text composes with non-text predicates as a
+# conjunction; only documents matching both the text search and the scalar
+# predicate are returned.
+TEXT_COMPOUND_TESTS: list[QueryTestCase] = [
+    QueryTestCase(
+        id="text_and_equality",
+        filter={"$text": {"$search": "coffee"}, "category": "drinks"},
+        expected=[{"_id": 1}],
+        msg="$text intersected with an equality predicate returns the common match.",
+    ),
+    QueryTestCase(
+        id="text_and_range_gt",
+        filter={"$text": {"$search": "coffee"}, "rating": {"$gt": 4}},
+        expected=[{"_id": 1}],
+        msg="$text intersected with a $gt range keeps only the high-rated match.",
+    ),
+    QueryTestCase(
+        id="text_or_terms_and_in",
+        filter={"$text": {"$search": "coffee tea"}, "category": {"$in": ["drinks"]}},
+        expected=[{"_id": 1}, {"_id": 3}],
+        msg="$text OR-of-terms intersected with an $in keeps the drinks documents.",
+    ),
+    QueryTestCase(
+        id="text_explicit_and_with_range",
+        filter={"$and": [{"$text": {"$search": "coffee"}}, {"rating": {"$gte": 3}}]},
+        expected=[{"_id": 1}, {"_id": 2}],
+        msg="$text inside an explicit $and intersects with a $gte range predicate.",
+    ),
+    QueryTestCase(
+        id="text_and_array_equality",
+        filter={"$text": {"$search": "coffee"}, "tags": "beans"},
+        expected=[{"_id": 2}],
+        msg="$text intersected with an array-membership equality returns the match.",
+    ),
+    QueryTestCase(
+        id="text_and_not_equal",
+        filter={"$text": {"$search": "tea"}, "category": {"$ne": "food"}},
+        expected=[{"_id": 1}, {"_id": 3}],
+        msg="$text intersected with a $ne predicate excludes the food document.",
+    ),
+    QueryTestCase(
+        id="text_and_predicate_excludes_all",
+        filter={"$text": {"$search": "coffee"}, "category": "tech"},
+        expected=[],
+        msg="When the scalar predicate excludes every text match the result is empty.",
+    ),
+]
+
+
+@pytest.mark.parametrize("test", pytest_params(TEXT_COMPOUND_TESTS))
+def test_text_compound_predicates(collection, test: QueryTestCase):
+    """$text intersects with co-located non-text predicates as a conjunction."""
+    collection.create_index([("content", "text")])
+    collection.insert_many([dict(d) for d in DOCS])
+    result = execute_command(
+        collection,
+        {
+            "find": collection.name,
+            "filter": test.filter,
+            "projection": {"_id": 1},
+            "sort": {"_id": 1},
+        },
+    )
+    assertSuccess(result, test.expected, msg=test.msg)
@@ -0,0 +1,117 @@
+"""
+Placement and validation rules for `$meta: "textScore"` with the $text operator.
+
+Existing coverage asserts that a projected textScore is returned and that
+results can be ordered by it. This file covers the placement contract around
+the metadata: the score may be sorted on without being projected, it ranks
+documents by match frequency (assertions are on ordering, never on the
+engine-specific score value), and requesting the score in a projection or a
+sort without any `$text` query in the filter is rejected with the documented
+metadata-not-available error.
+
+Oracle: MongoDB 7.0 (functional-tests CI baseline). The engine under test
+matches native behavior on every case; no engine divergences are tracked here.
+"""
+
+import pytest
+
+from documentdb_tests.framework.assertions import assertFailureCode, assertProperties, assertSuccess
+from documentdb_tests.framework.error_codes import QUERY_METADATA_NOT_AVAILABLE_ERROR
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.property_checks import Exists, IsType, Len
+
+pytestmark = pytest.mark.find
+
+
+def _create_text_index(collection):
+    collection.create_index([("content", "text")])
+
+
+def test_text_score_projection_returns_double(collection):
+    """A projected textScore is a numeric (double) field on each matched document."""
+    _create_text_index(collection)
+    collection.insert_one({"_id": 1, "content": "coffee and more coffee"})
+    result = execute_command(
+        collection,
+        {
+            "find": collection.name,
+            "filter": {"$text": {"$search": "coffee"}},
+            "projection": {"score": {"$meta": "textScore"}},
+        },
+    )
+    assertProperties(
+        result,
+        {
+            "cursor.firstBatch": Len(1),
+            "cursor.firstBatch.0._id": Exists(),
+            "cursor.firstBatch.0.score": IsType("double"),
+        },
+        raw_res=True,
+        msg="A projected textScore should be a double on the matched document.",
+    )
+
+
+def test_text_score_sort_without_projection_ranks_by_frequency(collection):
+    """Sorting by textScore (without projecting it) orders documents by match frequency."""
+    _create_text_index(collection)
+    collection.insert_many(
+        [
+            {"_id": 1, "content": "coffee"},
+            {"_id": 2, "content": "coffee coffee coffee"},
+            {"_id": 3, "content": "coffee coffee"},
+        ]
+    )
+    result = execute_command(
+        collection,
+        {
+            "find": collection.name,
+            "filter": {"$text": {"$search": "coffee"}},
+            "sort": {"score": {"$meta": "textScore"}},
+            "projection": {"_id": 1},
+        },
+    )
+    # Assert ordering only; the absolute textScore value is engine-specific.
+    assertSuccess(
+        result,
+        [{"_id": 2}, {"_id": 3}, {"_id": 1}],
+        msg="textScore sort orders the most-frequent match first, even unprojected.",
+    )
+
+
+def test_text_score_projection_without_text_query_errors(collection):
+    """Projecting textScore without a $text query fails with the metadata-not-available code."""
+    _create_text_index(collection)
+    collection.insert_one({"_id": 1, "content": "coffee"})
+    result = execute_command(
+        collection,
+        {
+            "find": collection.name,
+            "filter": {},
+            "projection": {"score": {"$meta": "textScore"}},
+        },
+    )
+    assertFailureCode(
+        result,
+        QUERY_METADATA_NOT_AVAILABLE_ERROR,
+        msg="textScore projection requires a $text query in the filter.",
+    )
+
+
+def test_text_score_sort_without_text_query_errors(collection):
+    """Sorting by textScore without a $text query fails with the metadata-not-available code."""
+    _create_text_index(collection)
+    collection.insert_one({"_id": 1, "content": "coffee"})
+    result = execute_command(
+        collection,
+        {
+            "find": collection.name,
+            "filter": {},
+            "sort": {"score": {"$meta": "textScore"}},
+            "projection": {"score": {"$meta": "textScore"}},
+        },
+    )
+    assertFailureCode(
+        result,
+        QUERY_METADATA_NOT_AVAILABLE_ERROR,
+        msg="textScore sort requires a $text query in the filter.",
+    )
@@ -0,0 +1,132 @@
+"""$merge stage — pipeline integration with other stages (composition coverage).
+
+Existing $merge coverage focuses on whenMatched/whenNotMatched semantics, the
+``on`` field, and write-path behavior. This file mirrors the sibling
+``test_stages_combination_out`` / ``test_stages_combination_sort`` pattern for
+$merge: it verifies that $merge correctly consumes the output of a preceding
+stage — $match filters, $project reshapes, $group aggregates into the ``_id``
+key, $sort + $limit selects a top-k subset, $addFields enriches before a
+default whenMatched merge into an existing target, and $unwind + $group
+re-keys before writing.
+
+Oracle: MongoDB 7.0 (functional-tests CI baseline). The engine under test
+matches native behavior on every case; no engine divergences are tracked for
+this surface.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from documentdb_tests.compatibility.tests.core.operator.stages.merge.utils.merge_common import (
+    TARGET,
+    MergeTestCase,
+)
+from documentdb_tests.framework.assertions import assertResult
+from documentdb_tests.framework.executor import execute_command
+from documentdb_tests.framework.parametrize import pytest_params
+
+pytestmark = pytest.mark.aggregate
+
+SOURCE = [
+    {"_id": 1, "g": "a", "val": 10, "status": "on"},
+    {"_id": 2, "g": "b", "val": 20, "status": "off"},
+    {"_id": 3, "g": "a", "val": 30, "status": "on"},
+]
+
+# Property [Pipeline Integration]: $merge writes the output of the preceding
+# stage to the target collection, preserving the transformation that stage
+# produced.
+MERGE_COMBINATION_TESTS: list[MergeTestCase] = [
+    MergeTestCase(
+        "match_then_merge",
+        docs=SOURCE,
+        target_docs=[],
+        pipeline=[{"$match": {"status": "on"}}, {"$merge": {"into": TARGET}}],
+        expected=[
+            {"_id": 1, "g": "a", "val": 10, "status": "on"},
+            {"_id": 3, "g": "a", "val": 30, "status": "on"},
+        ],
+        msg="$merge writes only the documents that pass a preceding $match.",
+    ),
+    MergeTestCase(
+        "project_then_merge",
+        docs=SOURCE,
+        target_docs=[],
+        pipeline=[{"$project": {"val": 1}}, {"$merge": {"into": TARGET}}],
+        expected=[
+            {"_id": 1, "val": 10},
+            {"_id": 2, "val": 20},
+            {"_id": 3, "val": 30},
+        ],
+        msg="$merge writes the reshaped documents produced by a preceding $project.",
+    ),
+    MergeTestCase(
+        "group_then_merge",
+        docs=SOURCE,
+        target_docs=[],
+        pipeline=[
+            {"$group": {"_id": "$g", "total": {"$sum": "$val"}}},
+            {"$merge": {"into": TARGET}},
+        ],
+        expected=[
+            {"_id": "a", "total": 40},
+            {"_id": "b", "total": 20},
+        ],
+        msg="$merge writes $group results keyed by the group _id.",
+    ),
+    MergeTestCase(
+        "sort_limit_then_merge",
+        docs=SOURCE,
+        target_docs=[],
+        pipeline=[
+            {"$sort": {"val": -1}},
+            {"$limit": 2},
+            {"$merge": {"into": TARGET}},
+        ],
+        expected=[
+            {"_id": 2, "g": "b", "val": 20, "status": "off"},
+            {"_id": 3, "g": "a", "val": 30, "status": "on"},
+        ],
+        msg="$merge writes the top-k subset selected by a preceding $sort + $limit.",
+    ),
+    MergeTestCase(
+        "addfields_then_merge_into_existing",
+        docs=SOURCE,
+        target_docs=[{"_id": 1, "note": "kept"}],
+        pipeline=[
+            {"$addFields": {"doubled": {"$multiply": ["$val", 2]}}},
+            {"$merge": {"into": TARGET}},
+        ],
+        expected=[
+            {"_id": 1, "note": "kept", "g": "a", "val": 10, "status": "on", "doubled": 20},
+            {"_id": 2, "g": "b", "val": 20, "status": "off", "doubled": 40},
+            {"_id": 3, "g": "a", "val": 30, "status": "on", "doubled": 60},
+        ],
+        msg="Default whenMatched merge keeps target fields and adds $addFields output.",
+    ),
+    MergeTestCase(
+        "unwind_group_then_merge",
+        docs=[{"_id": 1, "tags": ["x", "y"]}, {"_id": 2, "tags": ["x"]}],
+        target_docs=[],
+        pipeline=[
+            {"$unwind": "$tags"},
+            {"$group": {"_id": "$tags", "n": {"$sum": 1}}},
+            {"$merge": {"into": TARGET}},
+        ],
+        expected=[
+            {"_id": "x", "n": 2},
+            {"_id": "y", "n": 1},
+        ],
+        msg="$merge writes counts produced by $unwind + $group re-keyed by tag.",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_case", pytest_params(MERGE_COMBINATION_TESTS))
+def test_stages_combination_merge(collection, test_case: MergeTestCase):
+    """$merge writes the output of a preceding aggregation stage to the target."""
+    target = test_case.prepare(collection)
+    execute_command(collection, test_case.build_command(collection, target))
+    result = execute_command(collection, {"find": target, "filter": {}, "sort": {"_id": 1}})
+    assertResult(result, expected=test_case.expected, msg=test_case.msg)