Skip to content

Commit 8428317

Browse files
authored
Locations performance improvements (#14718)
* wip * comment * simplify * tests * linter fixes * comments * updates * remove celery stuff * refactor * test updates * linter * perf test updates * wip * testing * wip * perf test updates * wip rename * wip * cleanup * comments * rename * lint * clean on endpoints * remove cleaning until later * test updates * cache cleaned locations for tests * remove unnecessary guard * move clean call mmm * consolidate * restore clean mmm * fix * add test * linter * refactor * fixup * refactor * cleanup * coments/cleanup * product ref statuses * persist in txn * refactor * perf test updates * comments * linter * comments * comment * fixup
1 parent a5dd701 commit 8428317

12 files changed

Lines changed: 1553 additions & 388 deletions
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Generated by Django 5.2.12 on 2026-04-10 14:24
2+
3+
import django.core.validators
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
('dojo', '0263_language_type_unique_language'),
11+
]
12+
13+
operations = [
14+
migrations.AlterField(
15+
model_name='url',
16+
name='identity_hash',
17+
field=models.CharField(db_index=True, editable=False, help_text='The hash of the location for uniqueness', max_length=64, unique=True, validators=[django.core.validators.MinLengthValidator(64)]),
18+
),
19+
migrations.AlterField(
20+
model_name='urlevent',
21+
name='identity_hash',
22+
field=models.CharField(editable=False, help_text='The hash of the location for uniqueness', max_length=64, validators=[django.core.validators.MinLengthValidator(64)]),
23+
),
24+
]

dojo/importers/base_importer.py

Lines changed: 13 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313

1414
import dojo.finding.helper as finding_helper
1515
import dojo.risk_acceptance.helper as ra_helper
16-
from dojo.celery_dispatch import dojo_dispatch_task
17-
from dojo.importers.location_manager import LocationManager, UnsavedLocation
1816
from dojo.importers.options import ImporterOptions
1917
from dojo.jira_link.helper import is_keep_in_sync_with_jira
2018
from dojo.location.models import Location
@@ -80,8 +78,6 @@ def __init__(
8078
and will raise a `NotImplemented` exception
8179
"""
8280
ImporterOptions.__init__(self, *args, **kwargs)
83-
if settings.V3_FEATURE_LOCATIONS:
84-
self.location_manager = LocationManager()
8581

8682
def check_child_implementation_exception(self):
8783
"""
@@ -391,36 +387,20 @@ def apply_import_tags(
391387
for tag in self.tags:
392388
self.add_tags_safe(finding, tag)
393389

394-
if settings.V3_FEATURE_LOCATIONS:
395-
# Add any tags to any locations of the findings imported if necessary
396-
if self.apply_tags_to_endpoints and self.tags:
397-
# Collect all endpoints linked to the affected findings
398-
locations_qs = Location.objects.filter(findings__finding__in=findings_to_tag).distinct()
399-
try:
400-
bulk_add_tags_to_instances(
401-
tag_or_tags=self.tags,
402-
instances=locations_qs,
403-
tag_field_name="tags",
404-
)
405-
except IntegrityError:
406-
for finding in findings_to_tag:
407-
for location in finding.locations.all():
408-
for tag in self.tags:
409-
self.add_tags_safe(location.location, tag)
410-
# Add any tags to any endpoints of the findings imported if necessary
411-
elif self.apply_tags_to_endpoints and self.tags:
412-
endpoints_qs = Endpoint.objects.filter(finding__in=findings_to_tag).distinct()
390+
# Add any tags to any locations/endpoints of the findings imported if necessary
391+
if self.apply_tags_to_endpoints and self.tags:
392+
locations_qs = self.location_handler.get_locations_for_tagging(findings_to_tag)
413393
try:
414394
bulk_add_tags_to_instances(
415395
tag_or_tags=self.tags,
416-
instances=endpoints_qs,
396+
instances=locations_qs,
417397
tag_field_name="tags",
418398
)
419399
except IntegrityError:
420400
for finding in findings_to_tag:
421-
for endpoint in finding.endpoints.all():
401+
for location in self.location_handler.get_location_tag_fallback(finding):
422402
for tag in self.tags:
423-
self.add_tags_safe(endpoint, tag)
403+
self.add_tags_safe(location, tag)
424404

425405
def update_import_history(
426406
self,
@@ -467,14 +447,8 @@ def update_import_history(
467447
import_settings["apply_tags_to_endpoints"] = self.apply_tags_to_endpoints
468448
import_settings["group_by"] = self.group_by
469449
import_settings["create_finding_groups_for_all_findings"] = self.create_finding_groups_for_all_findings
470-
if settings.V3_FEATURE_LOCATIONS:
471-
# Add the list of locations that were added exclusively at import time
472-
if len(self.endpoints_to_add) > 0:
473-
import_settings["locations"] = [str(location) for location in self.endpoints_to_add]
474-
# TODO: Delete this after the move to Locations
475-
# Add the list of endpoints that were added exclusively at import time
476-
elif len(self.endpoints_to_add) > 0:
477-
import_settings["endpoints"] = [str(endpoint) for endpoint in self.endpoints_to_add]
450+
if len(self.endpoints_to_add) > 0:
451+
import_settings.update(self.location_handler.serialize_extra_locations(self.endpoints_to_add))
478452
# Create the test import object
479453
test_import = Test_Import.objects.create(
480454
test=self.test,
@@ -796,50 +770,13 @@ def process_request_response_pairs(
796770
def process_locations(
797771
self,
798772
finding: Finding,
799-
locations_to_add: list[UnsavedLocation],
773+
extra_locations_to_add: list | None = None,
800774
) -> None:
801775
"""
802-
Process any locations to add to the finding. Locations could come from two places
803-
- Directly from the report
804-
- Supplied by the user from the import form
805-
These locations will be processed in to Location objects and associated with the
806-
finding and product
807-
"""
808-
# Save the unsaved locations
809-
self.location_manager.chunk_locations_and_disperse(finding, finding.unsaved_locations)
810-
# Check for any that were added in the form
811-
if len(locations_to_add) > 0:
812-
logger.debug("locations_to_add: %s", locations_to_add)
813-
self.location_manager.chunk_locations_and_disperse(finding, locations_to_add)
814-
815-
# TODO: Delete this after the move to Locations
816-
def process_endpoints(
817-
self,
818-
finding: Finding,
819-
endpoints_to_add: list[Endpoint],
820-
) -> None:
776+
Record locations/endpoints from the finding + any form-added extras.
777+
Flushed to DB by location_handler.persist().
821778
"""
822-
Process any endpoints to add to the finding. Endpoints could come from two places
823-
- Directly from the report
824-
- Supplied by the user from the import form
825-
These endpoints will be processed in to endpoints objects and associated with the
826-
finding and and product
827-
"""
828-
if settings.V3_FEATURE_LOCATIONS:
829-
msg = "BaseImporter#process_endpoints() method is deprecated when V3_FEATURE_LOCATIONS is enabled"
830-
raise NotImplementedError(msg)
831-
832-
# Clean and record unsaved endpoints from the report
833-
self.endpoint_manager.clean_unsaved_endpoints(finding.unsaved_endpoints)
834-
for endpoint in finding.unsaved_endpoints:
835-
key = self.endpoint_manager.record_endpoint(endpoint)
836-
self.endpoint_manager.record_status_for_create(finding, key)
837-
# Record any endpoints added from the form
838-
if len(endpoints_to_add) > 0:
839-
logger.debug("endpoints_to_add: %s", endpoints_to_add)
840-
for endpoint in endpoints_to_add:
841-
key = self.endpoint_manager.record_endpoint(endpoint)
842-
self.endpoint_manager.record_status_for_create(finding, key)
779+
self.location_handler.record_for_finding(finding, extra_locations_to_add)
843780

844781
def sanitize_vulnerability_ids(self, finding) -> None:
845782
"""Remove undisired vulnerability id values"""
@@ -932,19 +869,7 @@ def mitigate_finding(
932869
# Remove risk acceptance if present (vulnerability is now fixed)
933870
# risk_unaccept will check if finding.risk_accepted is True before proceeding
934871
ra_helper.risk_unaccept(self.user, finding, perform_save=False, post_comments=False)
935-
if settings.V3_FEATURE_LOCATIONS:
936-
# Mitigate the location statuses
937-
dojo_dispatch_task(
938-
LocationManager.mitigate_location_status,
939-
finding.locations.all(),
940-
self.user,
941-
kwuser=self.user,
942-
sync=True,
943-
)
944-
else:
945-
# TODO: Delete this after the move to Locations
946-
# Accumulate endpoint statuses for bulk mitigate in persist()
947-
self.endpoint_manager.record_statuses_to_mitigate(finding.status_finding.all())
872+
self.location_handler.record_mitigations_for_finding(finding, self.user)
948873
# to avoid pushing a finding group multiple times, we push those outside of the loop
949874
if finding_groups_enabled and finding.finding_group:
950875
# don't try to dedupe findings that we are closing
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
"""
2+
Base class and handler for location/endpoint managers in the import pipeline.
3+
4+
BaseLocationManager defines the contract that both LocationManager (V3) and
5+
EndpointManager (legacy) must implement. LocationHandler is the facade that
6+
importers interact with — it picks the appropriate manager based on
7+
V3_FEATURE_LOCATIONS and delegates all calls through the shared interface.
8+
9+
This structure prevents drift between the two managers: adding an abstract
10+
method to BaseLocationManager forces both to implement it, and callers can
11+
only access methods exposed by LocationHandler.
12+
"""
13+
14+
from __future__ import annotations
15+
16+
from abc import ABC, abstractmethod
17+
from typing import TYPE_CHECKING
18+
19+
if TYPE_CHECKING:
20+
from dojo.models import Dojo_User, Finding, Product
21+
22+
23+
class BaseLocationManager(ABC):
24+
25+
"""
26+
Abstract base for import-pipeline managers that handle network identifiers
27+
(locations in V3, endpoints in legacy).
28+
29+
Subclasses must implement every abstract method. The importer never calls
30+
subclass-specific methods directly — it goes through LocationHandler.
31+
"""
32+
33+
def __init__(self, product: Product) -> None:
34+
self._product = product
35+
36+
@abstractmethod
37+
def clean_unsaved(self, finding: Finding) -> None:
38+
"""Clean the unsaved locations/endpoints on this finding."""
39+
40+
@abstractmethod
41+
def record_for_finding(self, finding: Finding, extra_locations: list | None = None) -> None:
42+
"""Record items from the finding + any form-added extras for later batch creation."""
43+
44+
@abstractmethod
45+
def update_status(self, existing_finding: Finding, new_finding: Finding, user: Dojo_User) -> None:
46+
"""Accumulate status changes (mitigate/reactivate) based on old vs new finding."""
47+
48+
@abstractmethod
49+
def record_reactivations_for_finding(self, finding: Finding) -> None:
50+
"""Record items on this finding for reactivation."""
51+
52+
@abstractmethod
53+
def record_mitigations_for_finding(self, finding: Finding, user: Dojo_User) -> None:
54+
"""Record items on this finding for mitigation."""
55+
56+
@abstractmethod
57+
def get_locations_for_tagging(self, findings: list[Finding]):
58+
"""Return a queryset of taggable objects linked to the given findings."""
59+
60+
@abstractmethod
61+
def get_location_tag_fallback(self, finding: Finding):
62+
"""Return an iterable of taggable objects for per-instance tag fallback."""
63+
64+
@abstractmethod
65+
def serialize_extra_locations(self, locations: list) -> dict:
66+
"""Serialize extra locations/endpoints for import history settings."""
67+
68+
@abstractmethod
69+
def persist(self) -> None:
70+
"""Flush all accumulated operations to the database."""
71+
72+
73+
class LocationHandler:
74+
75+
"""
76+
Facade used by importers. Delegates to the appropriate BaseLocationManager
77+
implementation based on V3_FEATURE_LOCATIONS.
78+
79+
Callers only see the methods defined here — they cannot reach into the
80+
internal manager to call implementation-specific methods. This prevents
81+
V3-only or endpoint-only code from leaking into shared importer logic.
82+
"""
83+
84+
def __init__(
85+
self,
86+
product: Product,
87+
*,
88+
v3_manager_class: type[BaseLocationManager] | None = None,
89+
v2_manager_class: type[BaseLocationManager] | None = None,
90+
) -> None:
91+
from django.conf import settings # noqa: PLC0415
92+
93+
from dojo.importers.endpoint_manager import EndpointManager # noqa: PLC0415
94+
from dojo.importers.location_manager import LocationManager # noqa: PLC0415
95+
96+
self._product = product
97+
if settings.V3_FEATURE_LOCATIONS:
98+
cls = v3_manager_class or LocationManager
99+
else:
100+
cls = v2_manager_class or EndpointManager
101+
self._manager: BaseLocationManager = cls(product)
102+
103+
# --- Delegates (one per BaseLocationManager method) ---
104+
105+
def clean_unsaved(self, finding: Finding) -> None:
106+
return self._manager.clean_unsaved(finding)
107+
108+
def record_for_finding(self, finding: Finding, extra_locations: list | None = None) -> None:
109+
return self._manager.record_for_finding(finding, extra_locations)
110+
111+
def update_status(self, existing_finding: Finding, new_finding: Finding, user: Dojo_User) -> None:
112+
return self._manager.update_status(existing_finding, new_finding, user)
113+
114+
def record_reactivations_for_finding(self, finding: Finding) -> None:
115+
return self._manager.record_reactivations_for_finding(finding)
116+
117+
def record_mitigations_for_finding(self, finding: Finding, user: Dojo_User) -> None:
118+
return self._manager.record_mitigations_for_finding(finding, user)
119+
120+
def get_locations_for_tagging(self, findings: list[Finding]):
121+
return self._manager.get_locations_for_tagging(findings)
122+
123+
def get_location_tag_fallback(self, finding: Finding):
124+
return self._manager.get_location_tag_fallback(finding)
125+
126+
def serialize_extra_locations(self, locations: list) -> dict:
127+
return self._manager.serialize_extra_locations(locations)
128+
129+
def persist(self) -> None:
130+
return self._manager.persist()

dojo/importers/default_importer.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from dojo.celery_dispatch import dojo_dispatch_task
1010
from dojo.finding import helper as finding_helper
1111
from dojo.importers.base_importer import BaseImporter, Parser
12-
from dojo.importers.endpoint_manager import EndpointManager
12+
from dojo.importers.base_location_manager import LocationHandler
1313
from dojo.importers.options import ImporterOptions
1414
from dojo.jira_link.helper import is_keep_in_sync_with_jira
1515
from dojo.models import (
@@ -58,8 +58,7 @@ def __init__(self, *args, **kwargs):
5858
import_type=Test_Import.IMPORT_TYPE,
5959
**kwargs,
6060
)
61-
if not settings.V3_FEATURE_LOCATIONS:
62-
self.endpoint_manager = EndpointManager(self.engagement.product)
61+
self.location_handler = LocationHandler(self.engagement.product)
6362

6463
def create_test(
6564
self,
@@ -240,13 +239,7 @@ def process_findings(
240239
)
241240
# Process any request/response pairs
242241
self.process_request_response_pairs(finding)
243-
if settings.V3_FEATURE_LOCATIONS:
244-
# Process any locations on the finding, or added on the form
245-
self.process_locations(finding, self.endpoints_to_add)
246-
else:
247-
# TODO: Delete this after the move to Locations
248-
# Process any endpoints on the finding, or added on the form
249-
self.process_endpoints(finding, self.endpoints_to_add)
242+
self.process_locations(finding, self.endpoints_to_add)
250243
# Parsers must use unsaved_tags to store tags, so we can clean them.
251244
# Accumulate for bulk application after the loop (O(unique_tags) instead of O(N·T)).
252245
cleaned_tags = clean_tags(finding.unsaved_tags)
@@ -267,16 +260,13 @@ def process_findings(
267260
logger.debug("process_findings: computed push_to_jira=%s", push_to_jira)
268261
batch_finding_ids.append(finding.id)
269262

270-
# If batch is full or we're at the end, persist endpoints and dispatch
263+
# If batch is full or we're at the end, persist locations/endpoints and dispatch
271264
if len(batch_finding_ids) >= batch_max_size or is_final_finding:
272-
if not settings.V3_FEATURE_LOCATIONS:
273-
self.endpoint_manager.persist(user=self.user)
274-
265+
self.location_handler.persist()
275266
# Apply parser-supplied tags for this batch before post-processing starts,
276267
# so rules/deduplication tasks see the tags already on the findings.
277268
bulk_apply_parser_tags(findings_with_parser_tags)
278269
findings_with_parser_tags.clear()
279-
280270
finding_ids_batch = list(batch_finding_ids)
281271
batch_finding_ids.clear()
282272
logger.debug("process_findings: dispatching batch with push_to_jira=%s (batch_size=%d, is_final=%s)",
@@ -404,9 +394,8 @@ def close_old_findings(
404394
finding_groups_enabled=self.findings_groups_enabled,
405395
product_grading_option=False,
406396
)
407-
# Persist any accumulated endpoint status mitigations
408-
if not settings.V3_FEATURE_LOCATIONS:
409-
self.endpoint_manager.persist(user=self.user)
397+
# Persist any accumulated location/endpoint status changes
398+
self.location_handler.persist()
410399
# push finding groups to jira since we only only want to push whole groups
411400
# We dont check if the finding jira sync is applicable quite yet until we can get in the loop
412401
# but this is a way to at least make it that far

0 commit comments

Comments
 (0)