Skip to content

Commit 6d2dbca

Browse files
fix-loop-duplicates: optimize query (#13445)
1 parent 6f1573d commit 6d2dbca

1 file changed

Lines changed: 7 additions & 4 deletions

File tree

dojo/finding/helper.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -628,13 +628,16 @@ def engagement_post_delete(sender, instance, **kwargs):
628628
def fix_loop_duplicates():
629629
"""Due to bugs in the past and even currently when under high parallel load, there can be transitive duplicates."""
630630
""" i.e. A -> B -> C. This can lead to problems when deleting findingns, performing deduplication, etc """
631-
candidates = Finding.objects.filter(duplicate_finding__isnull=False, original_finding__isnull=False).order_by("-id")
631+
# Build base queryset without selecting full rows to minimize memory
632+
loop_qs = Finding.objects.filter(duplicate_finding__isnull=False, original_finding__isnull=False)
632633

633-
loop_count = len(candidates)
634+
# Use COUNT(*) at the DB instead of materializing the queryset
635+
loop_count = loop_qs.count()
634636

635637
if loop_count > 0:
636-
deduplicationLogger.info(f"Identified {len(candidates)} Findings with Loops")
637-
for find_id in candidates.values_list("id", flat=True):
638+
deduplicationLogger.info(f"Identified {loop_count} Findings with Loops")
639+
# Stream IDs only in descending order to avoid loading full Finding rows
640+
for find_id in loop_qs.order_by("-id").values_list("id", flat=True).iterator(chunk_size=1000):
638641
removeLoop(find_id, 50)
639642

640643
new_originals = Finding.objects.filter(duplicate_finding__isnull=True, duplicate=True)

0 commit comments

Comments
 (0)