Skip to content

Commit 7acce56

Browse files
progress
1 parent d0389de commit 7acce56

1 file changed

Lines changed: 311 additions & 9 deletions

File tree

unittests/test_importers_deduplication.py

Lines changed: 311 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,15 @@ def setUp(self):
3636
testuser.is_superuser = True
3737
testuser.is_staff = True
3838
testuser.save()
39-
UserContactInfo.objects.create(user=testuser, block_execution=False)
39+
UserContactInfo.objects.create(user=testuser, block_execution=True)
4040

4141
# Authenticate API client as admin for import endpoints
4242
self.login_as_admin()
4343

4444
self.system_settings(enable_webhooks_notifications=False)
4545
self.system_settings(enable_product_grade=False)
4646
self.system_settings(enable_github=False)
47+
self.system_settings(enable_deduplication=True)
4748

4849
# Warm up ContentType cache for relevant models. This is needed if we want to be able to run the test in isolation
4950
# As part of the test suite the ContentTYpe ids will already be cached and won't affect the query count.
@@ -52,20 +53,321 @@ def setUp(self):
5253
for model in [Development_Environment, Dojo_User, Endpoint, Endpoint_Status, Engagement, Finding, Product, Product_Type, User, Test]:
5354
ContentType.objects.get_for_model(model)
5455

55-
def test_one_import_no_duplicate_findings(self):
56+
# Internal helper methods for reusable test logic
57+
def _test_single_import_no_duplicates(self, filename, scan_type, scanner_name, expected_duplicates=0):
58+
"""Internal method to test single import with expected duplicates"""
59+
self.login_as_admin()
60+
5661
response_json = self.import_scan_with_params(
57-
STACK_HAWK_FILENAME,
58-
scan_type=STACK_HAWK_SCAN_TYPE,
62+
filename,
63+
scan_type=scan_type,
5964
minimum_severity="Info",
6065
active=True,
6166
verified=True,
6267
engagement=None,
63-
product_type_name="PT StackHawk",
64-
product_name="P StackHawk",
65-
engagement_name="E StackHawk",
68+
product_type_name=f"PT {scanner_name} Single",
69+
product_name=f"P {scanner_name} Single",
70+
engagement_name=f"E {scanner_name} Single",
6671
auto_create_context=True,
6772
)
6873

6974
test_id = response_json["test"]
70-
dup_count = Finding.objects.filter(test_id=test_id, duplicate=True).count()
71-
self.assertEqual(0, dup_count)
75+
test = Test.objects.get(id=test_id)
76+
77+
# Verify expected duplicates were created
78+
dup_count = Finding.objects.filter(test=test, duplicate=True).count()
79+
self.assertEqual(expected_duplicates, dup_count)
80+
81+
def _test_full_then_subset_duplicates(self, full_filename, subset_filename, scan_type, scanner_name, expected_duplicates):
82+
"""Internal method to test full scan then subset creates expected duplicates"""
83+
# First import: full scan
84+
response_json = self.import_scan_with_params(
85+
full_filename,
86+
scan_type=scan_type,
87+
minimum_severity="Info",
88+
active=True,
89+
verified=True,
90+
engagement=None,
91+
product_type_name=f"PT {scanner_name} Full",
92+
product_name=f"P {scanner_name} Full",
93+
engagement_name=f"E {scanner_name} Full",
94+
auto_create_context=True,
95+
)
96+
97+
first_test_id = response_json["test"]
98+
first_test = Test.objects.get(id=first_test_id)
99+
100+
# Verify first import has no duplicates
101+
first_dup_count = Finding.objects.filter(test=first_test, duplicate=True).count()
102+
self.assertEqual(0, first_dup_count)
103+
104+
# Second import: subset into the same engagement
105+
response_json = self.import_scan_with_params(
106+
subset_filename,
107+
scan_type=scan_type,
108+
minimum_severity="Info",
109+
active=True,
110+
verified=True,
111+
engagement=first_test.engagement.id, # Same engagement ID
112+
product_type_name=None, # Use existing
113+
product_name=None, # Use existing
114+
engagement_name=None, # Use existing
115+
auto_create_context=False,
116+
)
117+
118+
second_test_id = response_json["test"]
119+
second_test = Test.objects.get(id=second_test_id)
120+
121+
# The second test should contain expected duplicates
122+
second_test_dup_count = Finding.objects.filter(test=second_test, duplicate=True).count()
123+
self.assertEqual(expected_duplicates, second_test_dup_count)
124+
125+
# Engagement should have expected duplicates total
126+
eng_dup_count = Finding.objects.filter(test__engagement=first_test.engagement, duplicate=True).count()
127+
self.assertEqual(expected_duplicates, eng_dup_count)
128+
129+
# Product should have expected duplicates total
130+
prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count()
131+
self.assertEqual(expected_duplicates, prod_dup_count)
132+
133+
def _test_different_products_no_duplicates(self, filename, scan_type, scanner_name, expected_duplicates=0):
134+
"""Internal method to test importing into different products creates expected duplicates"""
135+
# First import: into Product A
136+
response_json = self.import_scan_with_params(
137+
filename,
138+
scan_type=scan_type,
139+
minimum_severity="Info",
140+
active=True,
141+
verified=True,
142+
engagement=None,
143+
product_type_name=f"PT {scanner_name} Product A",
144+
product_name=f"P {scanner_name} Product A",
145+
engagement_name=f"E {scanner_name} Product A",
146+
auto_create_context=True,
147+
)
148+
149+
first_test_id = response_json["test"]
150+
first_test = Test.objects.get(id=first_test_id)
151+
152+
# Verify first import has expected duplicates
153+
first_dup_count = Finding.objects.filter(test=first_test, duplicate=True).count()
154+
self.assertEqual(expected_duplicates, first_dup_count)
155+
156+
# Second import: same scan into Product B (different product)
157+
response_json = self.import_scan_with_params(
158+
filename,
159+
scan_type=scan_type,
160+
minimum_severity="Info",
161+
active=True,
162+
verified=True,
163+
engagement=None,
164+
product_type_name=f"PT {scanner_name} Product B",
165+
product_name=f"P {scanner_name} Product B",
166+
engagement_name=f"E {scanner_name} Product B",
167+
auto_create_context=True,
168+
)
169+
170+
second_test_id = response_json["test"]
171+
second_test = Test.objects.get(id=second_test_id)
172+
173+
# The second test should contain expected duplicates (different products don't deduplicate)
174+
second_test_dup_count = Finding.objects.filter(test=second_test, duplicate=True).count()
175+
self.assertEqual(expected_duplicates, second_test_dup_count)
176+
177+
# First product should still have expected duplicates
178+
first_prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count()
179+
self.assertEqual(expected_duplicates, first_prod_dup_count)
180+
181+
# Second product should have expected duplicates
182+
second_prod_dup_count = Finding.objects.filter(test__engagement__product=second_test.engagement.product, duplicate=True).count()
183+
self.assertEqual(expected_duplicates, second_prod_dup_count)
184+
185+
def _test_same_product_different_engagements_duplicates(self, filename, scan_type, scanner_name, expected_duplicates):
186+
"""Internal method to test importing into same product but different engagements creates expected duplicates"""
187+
# First import: into Engagement 1
188+
response_json = self.import_scan_with_params(
189+
filename,
190+
scan_type=scan_type,
191+
minimum_severity="Info",
192+
active=True,
193+
verified=True,
194+
engagement=None,
195+
product_type_name=f"PT {scanner_name} SameProd",
196+
product_name=f"P {scanner_name} SameProd",
197+
engagement_name=f"E {scanner_name} SameProd 1",
198+
auto_create_context=True,
199+
)
200+
first_test = Test.objects.get(id=response_json["test"])
201+
202+
# Second import: into Engagement 2 (same product)
203+
response_json = self.import_scan_with_params(
204+
filename,
205+
scan_type=scan_type,
206+
minimum_severity="Info",
207+
active=True,
208+
verified=True,
209+
engagement=None,
210+
product_type_name=None, # Use existing
211+
product_name=f"P {scanner_name} SameProd", # Same product
212+
engagement_name=f"E {scanner_name} SameProd 2", # Different engagement
213+
auto_create_context=True,
214+
)
215+
Test.objects.get(id=response_json["test"])
216+
217+
# Product should have expected duplicates total
218+
prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count()
219+
self.assertEqual(expected_duplicates, prod_dup_count)
220+
221+
def _test_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self, filename, scan_type, scanner_name, expected_duplicates=0):
222+
"""Internal method to test importing into same product but different engagements with dedupe_on_engagements creates expected duplicates"""
223+
# First import: into Engagement A
224+
response_json = self.import_scan_with_params(
225+
filename,
226+
scan_type=scan_type,
227+
minimum_severity="Info",
228+
active=True,
229+
verified=True,
230+
engagement=None,
231+
product_type_name=f"PT {scanner_name} DedupeEng",
232+
product_name=f"P {scanner_name} DedupeEng",
233+
engagement_name=f"E {scanner_name} DedupeEng A",
234+
auto_create_context=True,
235+
)
236+
first_test = Test.objects.get(id=response_json["test"])
237+
238+
# Set deduplication_on_engagement to True for the engagement
239+
first_test.engagement.deduplication_on_engagement = True
240+
first_test.engagement.save()
241+
242+
# Second import: into Engagement B (same product, different engagement)
243+
response_json = self.import_scan_with_params(
244+
filename,
245+
scan_type=scan_type,
246+
minimum_severity="Info",
247+
active=True,
248+
verified=True,
249+
engagement=None,
250+
product_type_name=None, # Use existing
251+
product_name=f"P {scanner_name} DedupeEng", # Same product
252+
engagement_name=f"E {scanner_name} DedupeEng B", # Different engagement
253+
auto_create_context=True,
254+
)
255+
second_test = Test.objects.get(id=response_json["test"])
256+
257+
# The second test should contain expected duplicates because deduplication_on_engagement is True
258+
second_test_dup_count = Finding.objects.filter(test=second_test, duplicate=True).count()
259+
self.assertEqual(expected_duplicates, second_test_dup_count)
260+
261+
# Product should have expected duplicates total
262+
prod_dup_count = Finding.objects.filter(test__engagement__product=first_test.engagement.product, duplicate=True).count()
263+
self.assertEqual(expected_duplicates, prod_dup_count)
264+
265+
# Test cases for ZAP (LEGACY algorithm)
266+
def test_zap_single_import_no_duplicates(self):
267+
"""Test that importing ZAP scan (LEGACY algorithm) creates 0 duplicate findings"""
268+
self._test_single_import_no_duplicates("scans/zap/5_zap_sample_one.xml", "ZAP Scan", "ZAP")
269+
270+
def test_zap_full_then_subset_duplicates(self):
271+
"""Test that importing full ZAP scan then subset creates duplicates"""
272+
# For now, use the same file for both full and subset since we don't have a proper subset
273+
# This will test the same file imported twice into the same engagement
274+
self._test_full_then_subset_duplicates("scans/zap/5_zap_sample_one.xml", "scans/zap/5_zap_sample_one.xml", "ZAP Scan", "ZAP", 2)
275+
276+
def test_zap_different_products_no_duplicates(self):
277+
"""Test that importing ZAP scan into different products creates 0 duplicates"""
278+
self._test_different_products_no_duplicates("scans/zap/5_zap_sample_one.xml", "ZAP Scan", "ZAP")
279+
280+
def test_zap_same_product_different_engagements_duplicates(self):
281+
"""Test that importing ZAP scan into same product but different engagements creates duplicates"""
282+
self._test_same_product_different_engagements_duplicates("scans/zap/5_zap_sample_one.xml", "ZAP Scan", "ZAP", 2)
283+
284+
def test_zap_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
285+
"""Test that importing ZAP scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
286+
self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/zap/5_zap_sample_one.xml", "ZAP Scan", "ZAP")
287+
288+
# Test cases for Checkmarx (UNIQUE_ID_FROM_TOOL algorithm)
289+
def test_checkmarx_single_import_no_duplicates(self):
290+
"""Test that importing Checkmarx scan (UNIQUE_ID_FROM_TOOL algorithm) creates 0 duplicate findings"""
291+
self._test_single_import_no_duplicates("scans/checkmarx/single_finding.xml", "Checkmarx Scan detailed", "Checkmarx")
292+
293+
def test_checkmarx_full_then_subset_duplicates(self):
294+
"""Test that importing full Checkmarx scan then subset creates duplicates"""
295+
# For now, use the same file for both full and subset
296+
self._test_full_then_subset_duplicates("scans/checkmarx/single_finding.xml", "scans/checkmarx/single_finding.xml", "Checkmarx Scan detailed", "Checkmarx", 1)
297+
298+
def test_checkmarx_different_products_no_duplicates(self):
299+
"""Test that importing Checkmarx scan into different products creates 0 duplicates"""
300+
self._test_different_products_no_duplicates("scans/checkmarx/single_finding.xml", "Checkmarx Scan detailed", "Checkmarx")
301+
302+
def test_checkmarx_same_product_different_engagements_duplicates(self):
303+
"""Test that importing Checkmarx scan into same product but different engagements creates duplicates"""
304+
self._test_same_product_different_engagements_duplicates("scans/checkmarx/single_finding.xml", "Checkmarx Scan detailed", "Checkmarx", 1)
305+
306+
def test_checkmarx_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
307+
"""Test that importing Checkmarx scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
308+
self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/checkmarx/single_finding.xml", "Checkmarx Scan detailed", "Checkmarx")
309+
310+
# Test cases for Trivy (HASH_CODE algorithm)
311+
def test_trivy_single_import_no_duplicates(self):
312+
"""Test that importing Trivy scan (HASH_CODE algorithm) creates 0 duplicate findings"""
313+
self._test_single_import_no_duplicates("scans/trivy/issue_9092.json", "Trivy Scan", "Trivy")
314+
315+
def test_trivy_full_then_subset_duplicates(self):
316+
"""Test that importing full Trivy scan then subset creates duplicates"""
317+
# For now, use the same file for both full and subset
318+
self._test_full_then_subset_duplicates("scans/trivy/issue_9092.json", "scans/trivy/issue_9092.json", "Trivy Scan", "Trivy", 1)
319+
320+
def test_trivy_different_products_no_duplicates(self):
321+
"""Test that importing Trivy scan into different products creates 0 duplicates"""
322+
self._test_different_products_no_duplicates("scans/trivy/issue_9092.json", "Trivy Scan", "Trivy")
323+
324+
def test_trivy_same_product_different_engagements_duplicates(self):
325+
"""Test that importing Trivy scan into same product but different engagements creates duplicates"""
326+
self._test_same_product_different_engagements_duplicates("scans/trivy/issue_9092.json", "Trivy Scan", "Trivy", 1)
327+
328+
def test_trivy_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
329+
"""Test that importing Trivy scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
330+
self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/trivy/issue_9092.json", "Trivy Scan", "Trivy")
331+
332+
# Test cases for Veracode (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm)
333+
def test_veracode_single_import_no_duplicates(self):
334+
"""Test that importing Veracode scan (UNIQUE_ID_FROM_TOOL_OR_HASH_CODE algorithm) creates 0 duplicate findings"""
335+
self._test_single_import_no_duplicates("scans/veracode/one_finding.xml", "Veracode Scan", "Veracode")
336+
337+
def test_veracode_full_then_subset_duplicates(self):
338+
"""Test that importing full Veracode scan then subset creates duplicates"""
339+
# For now, use the same file for both full and subset
340+
self._test_full_then_subset_duplicates("scans/veracode/one_finding.xml", "scans/veracode/one_finding.xml", "Veracode Scan", "Veracode", 1)
341+
342+
def test_veracode_different_products_no_duplicates(self):
343+
"""Test that importing Veracode scan into different products creates 0 duplicates"""
344+
self._test_different_products_no_duplicates("scans/veracode/one_finding.xml", "Veracode Scan", "Veracode")
345+
346+
def test_veracode_same_product_different_engagements_duplicates(self):
347+
"""Test that importing Veracode scan into same product but different engagements creates duplicates"""
348+
self._test_same_product_different_engagements_duplicates("scans/veracode/one_finding.xml", "Veracode Scan", "Veracode", 1)
349+
350+
def test_veracode_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
351+
"""Test that importing Veracode scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
352+
self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/veracode/one_finding.xml", "Veracode Scan", "Veracode")
353+
354+
# Test cases for StackHawk (HASH_CODE algorithm)
355+
def test_stackhawk_single_import_no_duplicates(self):
356+
"""Test that importing StackHawk scan (HASH_CODE algorithm) creates 0 duplicate findings"""
357+
self._test_single_import_no_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", "StackHawk HawkScan", "StackHawk")
358+
359+
def test_stackhawk_full_then_subset_duplicates(self):
360+
"""Test that importing full StackHawk scan then subset creates duplicates"""
361+
self._test_full_then_subset_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", "scans/stackhawk/stackhawk_many_vul_without_duplicated_findings_subset.json", "StackHawk HawkScan", "StackHawk", 5)
362+
363+
def test_stackhawk_different_products_no_duplicates(self):
364+
"""Test that importing StackHawk scan into different products creates 0 duplicates"""
365+
self._test_different_products_no_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", "StackHawk HawkScan", "StackHawk")
366+
367+
def test_stackhawk_same_product_different_engagements_duplicates(self):
368+
"""Test that importing StackHawk scan into same product but different engagements creates duplicates"""
369+
self._test_same_product_different_engagements_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", "StackHawk HawkScan", "StackHawk", 6)
370+
371+
def test_stackhawk_same_product_different_engagements_dedupe_on_engagements_no_duplicates(self):
372+
"""Test that importing StackHawk scan into same product but different engagements with dedupe_on_engagements creates 0 duplicates"""
373+
self._test_same_product_different_engagements_dedupe_on_engagements_no_duplicates("scans/stackhawk/stackhawk_many_vul_without_duplicated_findings.json", "StackHawk HawkScan", "StackHawk")

0 commit comments

Comments
 (0)