Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""
$text query operator combined with non-text query predicates (implicit and
explicit $and).

Existing $text coverage exercises the operator in isolation and with a single
co-located equality predicate. This file covers a richer set of compound
filters: $text intersected with an equality, a range ($gt), an `$in`, an array
equality, a `$ne`, an explicit `$and`, and a predicate that excludes every text
match. In every case the result is the intersection of the text match and the
scalar predicate.

Oracle: MongoDB 7.0 (functional-tests CI baseline). The engine under test
matches native behavior on every case; no engine divergences are tracked here.
"""

import pytest

from documentdb_tests.compatibility.tests.core.operator.query.utils.query_test_case import (
QueryTestCase,
)
from documentdb_tests.framework.assertions import assertSuccess
from documentdb_tests.framework.executor import execute_command
from documentdb_tests.framework.parametrize import pytest_params

DOCS = [
{"_id": 1, "content": "coffee and tea", "category": "drinks", "rating": 5, "tags": ["hot"]},
{"_id": 2, "content": "coffee beans roasted", "category": "food", "rating": 3, "tags": ["beans"]},
{"_id": 3, "content": "green tea leaves", "category": "drinks", "rating": 4, "tags": ["green"]},
{"_id": 4, "content": "python programming", "category": "tech", "rating": 5, "tags": ["code"]},
]

# Property [Compound Intersection]: $text composes with non-text predicates as a
# conjunction; only documents matching both the text search and the scalar
# predicate are returned.
TEXT_COMPOUND_TESTS: list[QueryTestCase] = [
QueryTestCase(
id="text_and_equality",
filter={"$text": {"$search": "coffee"}, "category": "drinks"},
expected=[{"_id": 1}],
msg="$text intersected with an equality predicate returns the common match.",
),
QueryTestCase(
id="text_and_range_gt",
filter={"$text": {"$search": "coffee"}, "rating": {"$gt": 4}},
expected=[{"_id": 1}],
msg="$text intersected with a $gt range keeps only the high-rated match.",
),
QueryTestCase(
id="text_or_terms_and_in",
filter={"$text": {"$search": "coffee tea"}, "category": {"$in": ["drinks"]}},
expected=[{"_id": 1}, {"_id": 3}],
msg="$text OR-of-terms intersected with an $in keeps the drinks documents.",
),
QueryTestCase(
id="text_explicit_and_with_range",
filter={"$and": [{"$text": {"$search": "coffee"}}, {"rating": {"$gte": 3}}]},
expected=[{"_id": 1}, {"_id": 2}],
msg="$text inside an explicit $and intersects with a $gte range predicate.",
),
QueryTestCase(
id="text_and_array_equality",
filter={"$text": {"$search": "coffee"}, "tags": "beans"},
expected=[{"_id": 2}],
msg="$text intersected with an array-membership equality returns the match.",
),
QueryTestCase(
id="text_and_not_equal",
filter={"$text": {"$search": "tea"}, "category": {"$ne": "food"}},
expected=[{"_id": 1}, {"_id": 3}],
msg="$text intersected with a $ne predicate excludes the food document.",
),
QueryTestCase(
id="text_and_predicate_excludes_all",
filter={"$text": {"$search": "coffee"}, "category": "tech"},
expected=[],
msg="When the scalar predicate excludes every text match the result is empty.",
),
]


@pytest.mark.parametrize("test", pytest_params(TEXT_COMPOUND_TESTS))
def test_text_compound_predicates(collection, test: QueryTestCase):
"""$text intersects with co-located non-text predicates as a conjunction."""
collection.create_index([("content", "text")])
collection.insert_many([dict(d) for d in DOCS])
result = execute_command(
collection,
{
"find": collection.name,
"filter": test.filter,
"projection": {"_id": 1},
"sort": {"_id": 1},
},
)
assertSuccess(result, test.expected, msg=test.msg)
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
Placement and validation rules for `$meta: "textScore"` with the $text operator.

Existing coverage asserts that a projected textScore is returned and that
results can be ordered by it. This file covers the placement contract around
the metadata: the score may be sorted on without being projected, it ranks
documents by match frequency (assertions are on ordering, never on the
engine-specific score value), and requesting the score in a projection or a
sort without any `$text` query in the filter is rejected with the documented
metadata-not-available error.

Oracle: MongoDB 7.0 (functional-tests CI baseline). The engine under test
matches native behavior on every case; no engine divergences are tracked here.
"""

import pytest

from documentdb_tests.framework.assertions import assertFailureCode, assertProperties, assertSuccess
from documentdb_tests.framework.error_codes import QUERY_METADATA_NOT_AVAILABLE_ERROR
from documentdb_tests.framework.executor import execute_command
from documentdb_tests.framework.property_checks import Exists, IsType, Len

pytestmark = pytest.mark.find


def _create_text_index(collection):
collection.create_index([("content", "text")])


def test_text_score_projection_returns_double(collection):
"""A projected textScore is a numeric (double) field on each matched document."""
_create_text_index(collection)
collection.insert_one({"_id": 1, "content": "coffee and more coffee"})
result = execute_command(
collection,
{
"find": collection.name,
"filter": {"$text": {"$search": "coffee"}},
"projection": {"score": {"$meta": "textScore"}},
},
)
assertProperties(
result,
{
"cursor.firstBatch": Len(1),
"cursor.firstBatch.0._id": Exists(),
"cursor.firstBatch.0.score": IsType("double"),
},
raw_res=True,
msg="A projected textScore should be a double on the matched document.",
)


def test_text_score_sort_without_projection_ranks_by_frequency(collection):
"""Sorting by textScore (without projecting it) orders documents by match frequency."""
_create_text_index(collection)
collection.insert_many(
[
{"_id": 1, "content": "coffee"},
{"_id": 2, "content": "coffee coffee coffee"},
{"_id": 3, "content": "coffee coffee"},
]
)
result = execute_command(
collection,
{
"find": collection.name,
"filter": {"$text": {"$search": "coffee"}},
"sort": {"score": {"$meta": "textScore"}},
"projection": {"_id": 1},
},
)
# Assert ordering only; the absolute textScore value is engine-specific.
assertSuccess(
result,
[{"_id": 2}, {"_id": 3}, {"_id": 1}],
msg="textScore sort orders the most-frequent match first, even unprojected.",
)


def test_text_score_projection_without_text_query_errors(collection):
"""Projecting textScore without a $text query fails with the metadata-not-available code."""
_create_text_index(collection)
collection.insert_one({"_id": 1, "content": "coffee"})
result = execute_command(
collection,
{
"find": collection.name,
"filter": {},
"projection": {"score": {"$meta": "textScore"}},
},
)
assertFailureCode(
result,
QUERY_METADATA_NOT_AVAILABLE_ERROR,
msg="textScore projection requires a $text query in the filter.",
)


def test_text_score_sort_without_text_query_errors(collection):
"""Sorting by textScore without a $text query fails with the metadata-not-available code."""
_create_text_index(collection)
collection.insert_one({"_id": 1, "content": "coffee"})
result = execute_command(
collection,
{
"find": collection.name,
"filter": {},
"sort": {"score": {"$meta": "textScore"}},
"projection": {"score": {"$meta": "textScore"}},
},
)
assertFailureCode(
result,
QUERY_METADATA_NOT_AVAILABLE_ERROR,
msg="textScore sort requires a $text query in the filter.",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""$merge stage — pipeline integration with other stages (composition coverage).

Existing $merge coverage focuses on whenMatched/whenNotMatched semantics, the
``on`` field, and write-path behavior. This file mirrors the sibling
``test_stages_combination_out`` / ``test_stages_combination_sort`` pattern for
$merge: it verifies that $merge correctly consumes the output of a preceding
stage — $match filters, $project reshapes, $group aggregates into the ``_id``
key, $sort + $limit selects a top-k subset, $addFields enriches before a
default whenMatched merge into an existing target, and $unwind + $group
re-keys before writing.

Oracle: MongoDB 7.0 (functional-tests CI baseline). The engine under test
matches native behavior on every case; no engine divergences are tracked for
this surface.
"""

from __future__ import annotations

import pytest

from documentdb_tests.compatibility.tests.core.operator.stages.merge.utils.merge_common import (
TARGET,
MergeTestCase,
)
from documentdb_tests.framework.assertions import assertResult
from documentdb_tests.framework.executor import execute_command
from documentdb_tests.framework.parametrize import pytest_params

pytestmark = pytest.mark.aggregate

SOURCE = [
{"_id": 1, "g": "a", "val": 10, "status": "on"},
{"_id": 2, "g": "b", "val": 20, "status": "off"},
{"_id": 3, "g": "a", "val": 30, "status": "on"},
]

# Property [Pipeline Integration]: $merge writes the output of the preceding
# stage to the target collection, preserving the transformation that stage
# produced.
MERGE_COMBINATION_TESTS: list[MergeTestCase] = [
MergeTestCase(
"match_then_merge",
docs=SOURCE,
target_docs=[],
pipeline=[{"$match": {"status": "on"}}, {"$merge": {"into": TARGET}}],
expected=[
{"_id": 1, "g": "a", "val": 10, "status": "on"},
{"_id": 3, "g": "a", "val": 30, "status": "on"},
],
msg="$merge writes only the documents that pass a preceding $match.",
),
MergeTestCase(
"project_then_merge",
docs=SOURCE,
target_docs=[],
pipeline=[{"$project": {"val": 1}}, {"$merge": {"into": TARGET}}],
expected=[
{"_id": 1, "val": 10},
{"_id": 2, "val": 20},
{"_id": 3, "val": 30},
],
msg="$merge writes the reshaped documents produced by a preceding $project.",
),
MergeTestCase(
"group_then_merge",
docs=SOURCE,
target_docs=[],
pipeline=[
{"$group": {"_id": "$g", "total": {"$sum": "$val"}}},
{"$merge": {"into": TARGET}},
],
expected=[
{"_id": "a", "total": 40},
{"_id": "b", "total": 20},
],
msg="$merge writes $group results keyed by the group _id.",
),
MergeTestCase(
"sort_limit_then_merge",
docs=SOURCE,
target_docs=[],
pipeline=[
{"$sort": {"val": -1}},
{"$limit": 2},
{"$merge": {"into": TARGET}},
],
expected=[
{"_id": 2, "g": "b", "val": 20, "status": "off"},
{"_id": 3, "g": "a", "val": 30, "status": "on"},
],
msg="$merge writes the top-k subset selected by a preceding $sort + $limit.",
),
MergeTestCase(
"addfields_then_merge_into_existing",
docs=SOURCE,
target_docs=[{"_id": 1, "note": "kept"}],
pipeline=[
{"$addFields": {"doubled": {"$multiply": ["$val", 2]}}},
{"$merge": {"into": TARGET}},
],
expected=[
{"_id": 1, "note": "kept", "g": "a", "val": 10, "status": "on", "doubled": 20},
{"_id": 2, "g": "b", "val": 20, "status": "off", "doubled": 40},
{"_id": 3, "g": "a", "val": 30, "status": "on", "doubled": 60},
],
msg="Default whenMatched merge keeps target fields and adds $addFields output.",
),
MergeTestCase(
"unwind_group_then_merge",
docs=[{"_id": 1, "tags": ["x", "y"]}, {"_id": 2, "tags": ["x"]}],
target_docs=[],
pipeline=[
{"$unwind": "$tags"},
{"$group": {"_id": "$tags", "n": {"$sum": 1}}},
{"$merge": {"into": TARGET}},
],
expected=[
{"_id": "x", "n": 2},
{"_id": "y", "n": 1},
],
msg="$merge writes counts produced by $unwind + $group re-keyed by tag.",
),
]


@pytest.mark.parametrize("test_case", pytest_params(MERGE_COMBINATION_TESTS))
def test_stages_combination_merge(collection, test_case: MergeTestCase):
"""$merge writes the output of a preceding aggregation stage to the target."""
target = test_case.prepare(collection)
execute_command(collection, test_case.build_command(collection, target))
result = execute_command(collection, {"find": target, "filter": {}, "sort": {"_id": 1}})
assertResult(result, expected=test_case.expected, msg=test_case.msg)
Loading