From 61661d9530633576532c3f50684639b4261ec512 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Tue, 9 Dec 2025 13:44:23 -0300 Subject: [PATCH 1/4] Add tests for ArrayField 'overlap' and 'in' subquery lookups --- tests/model_fields_/test_arrayfield.py | 15 +++++++++++++++ tests/model_fields_/test_embedded_model_array.py | 5 +++++ 2 files changed, 20 insertions(+) diff --git a/tests/model_fields_/test_arrayfield.py b/tests/model_fields_/test_arrayfield.py index e334b21dc..8d40214da 100644 --- a/tests/model_fields_/test_arrayfield.py +++ b/tests/model_fields_/test_arrayfield.py @@ -634,6 +634,21 @@ def test_overlap_values(self): self.objs[:3], ) + def test_overlap_empty_values(self): + qs = NullableIntegerArrayModel.objects.filter(order__lt=-30) + self.assertCountEqual( + NullableIntegerArrayModel.objects.filter( + field__overlap=qs.values_list("field"), + ), + [], + ) + self.assertCountEqual( + NullableIntegerArrayModel.objects.filter( + field__overlap=qs.values("field"), + ), + [], + ) + def test_index(self): self.assertSequenceEqual( NullableIntegerArrayModel.objects.filter(field__0=2), self.objs[1:3] diff --git a/tests/model_fields_/test_embedded_model_array.py b/tests/model_fields_/test_embedded_model_array.py index 8453f6379..499205e9b 100644 --- a/tests/model_fields_/test_embedded_model_array.py +++ b/tests/model_fields_/test_embedded_model_array.py @@ -520,6 +520,11 @@ def test_subquery_in_lookup(self): result = Exhibit.objects.filter(sections__number__in=subquery) self.assertCountEqual(result, [self.wonders, self.new_discoveries, self.egypt]) + def test_subquery_empty_in_lookup(self): + subquery = Audit.objects.filter(section_number=10).values_list("section_number", flat=True) + result = Exhibit.objects.filter(sections__number__in=subquery) + self.assertCountEqual(result, []) + def test_array_as_rhs(self): result = Exhibit.objects.filter(main_section__number__in=models.F("sections__number")) self.assertCountEqual(result, [self.new_discoveries]) From e37a2daace53ab503153dd7aad8002fab5edb70b Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Tue, 9 Dec 2025 21:03:11 -0300 Subject: [PATCH 2/4] INTPYTHON-833 Remove $facet from array output subqueries --- django_mongodb_backend/fields/array.py | 42 +++++---------- .../fields/embedded_model_array.py | 54 +++++++------------ django_mongodb_backend/lookups.py | 39 +++++--------- tests/lookup_/tests.py | 25 +++------ 4 files changed, 52 insertions(+), 108 deletions(-) diff --git a/django_mongodb_backend/fields/array.py b/django_mongodb_backend/fields/array.py index 84164c4d1..bf781359f 100644 --- a/django_mongodb_backend/fields/array.py +++ b/django_mongodb_backend/fields/array.py @@ -310,37 +310,23 @@ class ArrayOverlap(ArrayRHSMixin, FieldGetDbPrepValueMixin, Lookup): def get_subquery_wrapping_pipeline(self, compiler, connection, field_name, expr): return [ + {"$project": {"subquery_results": expr.as_mql(compiler, connection, as_expr=True)}}, + {"$unwind": "$subquery_results"}, { - "$facet": { - "group": [ - {"$project": {"tmp_name": expr.as_mql(compiler, connection, as_expr=True)}}, - { - "$unwind": "$tmp_name", - }, - { - "$group": { - "_id": None, - "tmp_name": {"$addToSet": "$tmp_name"}, - } - }, - ] - } - }, - { - "$project": { - field_name: { - "$ifNull": [ - { - "$getField": { - "input": {"$arrayElemAt": ["$group", 0]}, - "field": "tmp_name", - } - }, - [], - ] - } + "$group": { + "_id": None, + "subquery_results": {"$addToSet": "$subquery_results"}, } }, + # Workaround for https://jira.mongodb.org/browse/SERVER-114196: + # $$NOW becomes unavailable after $unionWith, so it must be stored + # beforehand to ensure it remains accessible later in the pipeline. + {"$addFields": {"__now": "$$NOW"}}, + # Add an extra empty document to handle default values on empty + # results. + {"$unionWith": {"pipeline": [{"$documents": [{"subquery_results": []}]}]}}, + {"$limit": 1}, + {"$project": {field_name: "$subquery_results"}}, ] def as_mql_expr(self, compiler, connection): diff --git a/django_mongodb_backend/fields/embedded_model_array.py b/django_mongodb_backend/fields/embedded_model_array.py index 501b78428..ef46d9784 100644 --- a/django_mongodb_backend/fields/embedded_model_array.py +++ b/django_mongodb_backend/fields/embedded_model_array.py @@ -150,44 +150,28 @@ def get_subquery_wrapping_pipeline(self, compiler, connection, field_name, expr) # structure of EmbeddedModelArrayField on the RHS behaves similar to # ArrayField. return [ + {"$project": {"subquery_results": expr.as_mql(compiler, connection, as_expr=True)}}, + # Use an $unwind followed by a $group to concatenate all the values + # from the RHS subquery. + {"$unwind": "$subquery_results"}, + # The $group stage collects values into an array using $addToSet. + # The use of {_id: null} results in a single grouped array, but + # because arrays from multiple documents are aggregated, the result + # is a list of lists. { - "$facet": { - "gathered_data": [ - {"$project": {"tmp_name": expr.as_mql(compiler, connection, as_expr=True)}}, - # To concatenate all the values from the RHS subquery, - # use an $unwind followed by a $group. - { - "$unwind": "$tmp_name", - }, - # The $group stage collects values into an array using - # $addToSet. The use of {_id: null} results in a - # single grouped array. However, because arrays from - # multiple documents are aggregated, the result is a - # list of lists. - { - "$group": { - "_id": None, - "tmp_name": {"$addToSet": "$tmp_name"}, - } - }, - ] - } - }, - { - "$project": { - field_name: { - "$ifNull": [ - { - "$getField": { - "input": {"$arrayElemAt": ["$gathered_data", 0]}, - "field": "tmp_name", - } - }, - [], - ] - } + "$group": { + "_id": None, + "subquery_results": {"$addToSet": "$subquery_results"}, } }, + # Workaround for https://jira.mongodb.org/browse/SERVER-114196: + # $$NOW becomes unavailable after $unionWith, so it must be stored + # beforehand to ensure it remains accessible later in the pipeline. + {"$addFields": {"__now": "$$NOW"}}, + # Add a dummy document in case of an empty result. + {"$unionWith": {"pipeline": [{"$documents": [{"subquery_results": []}]}]}}, + {"$limit": 1}, + {"$project": {field_name: "$subquery_results"}}, ] diff --git a/django_mongodb_backend/lookups.py b/django_mongodb_backend/lookups.py index 6b59fb961..2b78434a4 100644 --- a/django_mongodb_backend/lookups.py +++ b/django_mongodb_backend/lookups.py @@ -56,34 +56,21 @@ def inner(self, compiler, connection): def get_subquery_wrapping_pipeline(self, compiler, connection, field_name, expr): # noqa: ARG001 return [ { - "$facet": { - "group": [ - { - "$group": { - "_id": None, - "tmp_name": { - "$addToSet": expr.as_mql(compiler, connection, as_expr=True) - }, - } - } - ] - } - }, - { - "$project": { - field_name: { - "$ifNull": [ - { - "$getField": { - "input": {"$arrayElemAt": ["$group", 0]}, - "field": "tmp_name", - } - }, - [], - ] - } + "$group": { + "_id": None, + # Use a temporal name in order to support field_name="_id". + "subquery_results": {"$addToSet": expr.as_mql(compiler, connection, as_expr=True)}, } }, + # Workaround for https://jira.mongodb.org/browse/SERVER-114196: + # $$NOW becomes unavailable after $unionWith, so it must be stored + # beforehand to ensure it remains accessible later in the pipeline. + {"$addFields": {"__now": "$$NOW"}}, + # Add an extra empty document to handle default values on empty + # results. + {"$unionWith": {"pipeline": [{"$documents": [{"subquery_results": []}]}]}}, + {"$limit": 1}, + {"$project": {field_name: "$subquery_results"}}, ] diff --git a/tests/lookup_/tests.py b/tests/lookup_/tests.py index b6ac8a322..94b63f833 100644 --- a/tests/lookup_/tests.py +++ b/tests/lookup_/tests.py @@ -137,28 +137,15 @@ def test_subquery_filter_constant(self): "let": {}, "pipeline": [ {"$match": {"num": {"$gt": 2}}}, + {"$group": {"_id": None, "subquery_results": {"$addToSet": "$num"}}}, + {"$addFields": {"__now": "$$NOW"}}, { - "$facet": { - "group": [ - {"$group": {"_id": None, "tmp_name": {"$addToSet": "$num"}}} - ] - } - }, - { - "$project": { - "num": { - "$ifNull": [ - { - "$getField": { - "input": {"$arrayElemAt": ["$group", 0]}, - "field": "tmp_name", - } - }, - [], - ] - } + "$unionWith": { + "pipeline": [{"$documents": [{"subquery_results": []}]}] } }, + {"$limit": 1}, + {"$project": {"num": "$subquery_results"}}, ], } }, From 35cf5aaec8b919a1dfce3a5f0f254a783f713504 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Tue, 9 Dec 2025 21:03:21 -0300 Subject: [PATCH 3/4] INTPYTHON-833 Remove $facet when aggregation query has no HAVING clause --- django_mongodb_backend/aggregates.py | 3 +++ django_mongodb_backend/compiler.py | 23 ++++++++--------------- django_mongodb_backend/query.py | 18 ++++++++++++++++++ docs/releases/6.0.x.rst | 5 +++++ 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/django_mongodb_backend/aggregates.py b/django_mongodb_backend/aggregates.py index 7d8808d86..50feb5dd7 100644 --- a/django_mongodb_backend/aggregates.py +++ b/django_mongodb_backend/aggregates.py @@ -7,6 +7,7 @@ Variance, ) from django.db.models.expressions import Case, Value, When +from django.db.models.functions.comparison import Coalesce from django.db.models.lookups import IsNull from django.db.models.sql.where import WhereNode @@ -69,6 +70,8 @@ def count(self, compiler, connection, resolve_inner_expression=False): if resolve_inner_expression: return lhs_mql return {"$sum": lhs_mql} + # Normalize empty documents (introduced upstream) to an empty set fallback. + agg_expression = Coalesce(agg_expression, []) # If distinct=True or resolve_inner_expression=False, sum the size of the # set. return {"$size": agg_expression.as_mql(compiler, connection, as_expr=True)} diff --git a/django_mongodb_backend/compiler.py b/django_mongodb_backend/compiler.py index 8a5073dd7..c153b7486 100644 --- a/django_mongodb_backend/compiler.py +++ b/django_mongodb_backend/compiler.py @@ -38,6 +38,10 @@ def __init__(self, *args, **kwargs): self.subqueries = [] # Atlas search stage. self.search_pipeline = [] + # Does the aggregation have no GROUP BY fields and needs wrapping? + self.wrap_for_global_aggregation = False + # The MQL equivalent to a SQL HAVING clause. + self.having_match_mql = None def _get_group_alias_column(self, expr, annotation_group_idx): """Generate a dummy field for use in the ids fields in $group.""" @@ -234,21 +238,10 @@ def _build_aggregation_pipeline(self, ids, group): """Build the aggregation pipeline for grouping.""" pipeline = [] if not ids: - group["_id"] = None - pipeline.append({"$facet": {"group": [{"$group": group}]}}) - pipeline.append( - { - "$addFields": { - key: { - "$getField": { - "input": {"$arrayElemAt": ["$group", 0]}, - "field": key, - } - } - for key in group - } - } - ) + pipeline.append({"$group": {"_id": None, **group}}) + # Apply a global aggregation if there are no ids and no having + # clause. + self.wrap_for_global_aggregation = not bool(self.having) else: group["_id"] = ids pipeline.append({"$group": group}) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index 5b4f0ec51..cdc314bab 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -56,6 +56,7 @@ def __init__(self, compiler): # $lookup stage that encapsulates the pipeline for performing a nested # subquery. self.subquery_lookup = None + self.wrap_for_global_aggregation = compiler.wrap_for_global_aggregation def __repr__(self): return f"" @@ -91,6 +92,23 @@ def get_pipeline(self): pipeline.append({"$match": self.match_mql}) if self.aggregation_pipeline: pipeline.extend(self.aggregation_pipeline) + if self.wrap_for_global_aggregation: + pipeline.extend( + [ + # Workaround for https://jira.mongodb.org/browse/SERVER-114196: + # $$NOW becomes unavailable after $unionWith, so it must be + # stored beforehand to ensure it remains accessible later + # in the pipeline. + {"$addFields": {"__now": "$$NOW"}}, + # Add an empty extra document to handle default values on + # empty results. + {"$unionWith": {"pipeline": [{"$documents": [{}]}]}}, + # Limiting to one document ensures the original result + # takes precedence when present, otherwise the injected + # empty document is used. + {"$limit": 1}, + ] + ) if self.project_fields: pipeline.append({"$project": self.project_fields}) if self.combinator_pipeline: diff --git a/docs/releases/6.0.x.rst b/docs/releases/6.0.x.rst index d78c00052..82304a967 100644 --- a/docs/releases/6.0.x.rst +++ b/docs/releases/6.0.x.rst @@ -17,6 +17,11 @@ Bug fixes - ... +Performance improvements +------------------------ + +- Removed usage of ``$facet`` from aggregate queries. + 6.0.0 ===== From d093214b38e29f8afc4cbbf73608c84dcf70c332 Mon Sep 17 00:00:00 2001 From: Emanuel Lupi Date: Fri, 12 Dec 2025 21:51:17 -0300 Subject: [PATCH 4/4] Update comment --- django_mongodb_backend/aggregates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/django_mongodb_backend/aggregates.py b/django_mongodb_backend/aggregates.py index 50feb5dd7..23c384cc3 100644 --- a/django_mongodb_backend/aggregates.py +++ b/django_mongodb_backend/aggregates.py @@ -70,7 +70,7 @@ def count(self, compiler, connection, resolve_inner_expression=False): if resolve_inner_expression: return lhs_mql return {"$sum": lhs_mql} - # Normalize empty documents (introduced upstream) to an empty set fallback. + # Normalize empty documents (introduced by aggregation wrapping) to an empty set fallback. agg_expression = Coalesce(agg_expression, []) # If distinct=True or resolve_inner_expression=False, sum the size of the # set.