Skip to content

Commit 6266610

Browse files
fix(parsers): use unsaved_tags instead of tags= in Finding constructor for performance
Passing tags= directly to the Finding() constructor triggers expensive tagulous processing for every finding. Using finding.unsaved_tags instead bypasses this overhead and lets the import pipeline handle tags efficiently. Affected parsers: jfrog_xray_unified, dependency_check, cargo_audit, anchore_grype, threat_composer. Benchmark on 14,219 findings: 99s -> 7.97s (12x faster).
1 parent 4a3ee14 commit 6266610

5 files changed

Lines changed: 96 additions & 133 deletions

File tree

dojo/tools/anchore_grype/parser.py

Lines changed: 19 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313

1414
class AnchoreGrypeParser:
15-
1615
"""
1716
Anchore Grype JSON report format generated with `-o json` option.
1817
@@ -66,7 +65,8 @@ def get_findings(self, file, test):
6665
rel_epss = related_vulnerability.get("epss")
6766
rel_vuln_id = related_vulnerability.get("id")
6867
vulnerability_ids = self.get_vulnerability_ids(
69-
vuln_id, related_vulnerabilities,
68+
vuln_id,
69+
related_vulnerabilities,
7070
)
7171

7272
matches = item["matchDetails"]
@@ -77,37 +77,25 @@ def get_findings(self, file, test):
7777
artifact_purl = artifact.get("purl")
7878
artifact_location = artifact.get("locations")
7979
file_path = None
80-
if (
81-
artifact_location
82-
and len(artifact_location) > 0
83-
and artifact_location[0].get("path")
84-
):
80+
if artifact_location and len(artifact_location) > 0 and artifact_location[0].get("path"):
8581
file_path = artifact_location[0].get("path")
8682

8783
finding_title = f"{vuln_id} in {artifact_name}:{artifact_version}"
8884

8985
finding_tags = None
9086
finding_description = ""
9187
if vuln_namespace:
92-
finding_description += (
93-
f"**Vulnerability Namespace:** {vuln_namespace}"
94-
)
88+
finding_description += f"**Vulnerability Namespace:** {vuln_namespace}"
9589
if vuln_description:
96-
finding_description += (
97-
f"\n**Vulnerability Description:** {vuln_description}"
98-
)
90+
finding_description += f"\n**Vulnerability Description:** {vuln_description}"
9991
if rel_description and rel_description != vuln_description:
10092
finding_description += f"\n**Related Vulnerability Description:** {rel_description}"
10193
if matches:
10294
if isinstance(item["matchDetails"], dict):
103-
finding_description += (
104-
f"\n**Matcher:** {matches['matcher']}"
105-
)
95+
finding_description += f"\n**Matcher:** {matches['matcher']}"
10696
finding_tags = [matches["matcher"].replace("-matcher", "")]
10797
elif len(matches) == 1:
108-
finding_description += (
109-
f"\n**Matcher:** {matches[0]['matcher']}"
110-
)
98+
finding_description += f"\n**Matcher:** {matches[0]['matcher']}"
11199
finding_tags = [
112100
matches[0]["matcher"].replace("-matcher", ""),
113101
]
@@ -138,30 +126,22 @@ def get_findings(self, file, test):
138126

139127
finding_references = ""
140128
if vuln_datasource:
141-
finding_references += (
142-
f"**Vulnerability Datasource:** {vuln_datasource}\n"
143-
)
129+
finding_references += f"**Vulnerability Datasource:** {vuln_datasource}\n"
144130
if vuln_urls:
145131
if len(vuln_urls) == 1:
146132
if vuln_urls[0] != vuln_datasource:
147-
finding_references += (
148-
f"**Vulnerability URL:** {vuln_urls[0]}\n"
149-
)
133+
finding_references += f"**Vulnerability URL:** {vuln_urls[0]}\n"
150134
else:
151135
finding_references += "**Vulnerability URLs:**\n"
152136
for url in vuln_urls:
153137
if url != vuln_datasource:
154138
finding_references += f"- {url}\n"
155139
if rel_datasource:
156-
finding_references += (
157-
f"**Related Vulnerability Datasource:** {rel_datasource}\n"
158-
)
140+
finding_references += f"**Related Vulnerability Datasource:** {rel_datasource}\n"
159141
if rel_urls:
160142
if len(rel_urls) == 1:
161143
if rel_urls[0] != vuln_datasource:
162-
finding_references += (
163-
f"**Related Vulnerability URL:** {rel_urls[0]}\n"
164-
)
144+
finding_references += f"**Related Vulnerability URL:** {rel_urls[0]}\n"
165145
else:
166146
finding_references += "**Related Vulnerability URLs:**\n"
167147
for url in rel_urls:
@@ -202,14 +182,14 @@ def get_findings(self, file, test):
202182
component_name=artifact_name,
203183
component_version=artifact_version.replace("\x00", ""),
204184
vuln_id_from_tool=vuln_id,
205-
tags=finding_tags,
206185
static_finding=True,
207186
dynamic_finding=False,
208187
nb_occurences=1,
209188
file_path=file_path,
210189
fix_available=fix_available,
211190
fix_version=fix_version,
212191
)
192+
dupes[dupe_key].unsaved_tags = finding_tags
213193
dupes[dupe_key].unsaved_vulnerability_ids = vulnerability_ids
214194
if settings.V3_FEATURE_LOCATIONS and artifact_purl:
215195
dupes[dupe_key].unsaved_locations.append(
@@ -229,7 +209,8 @@ def get_cvss(self, cvss):
229209
vector = cvss_item["vector"]
230210
cvss_objects = cvss_parser.parse_cvss_from_text(vector)
231211
if len(cvss_objects) > 0 and isinstance(
232-
cvss_objects[0], CVSS3,
212+
cvss_objects[0],
213+
CVSS3,
233214
):
234215
return vector
235216
return None
@@ -259,8 +240,11 @@ def get_vulnerability_ids(self, vuln_id, related_vulnerabilities):
259240
if vuln_id:
260241
vulnerability_ids.append(vuln_id)
261242
if related_vulnerabilities:
262-
vulnerability_ids.extend(related_vulnerability_id for related_vulnerability in related_vulnerabilities
263-
if (related_vulnerability_id := related_vulnerability.get("id")))
243+
vulnerability_ids.extend(
244+
related_vulnerability_id
245+
for related_vulnerability in related_vulnerabilities
246+
if (related_vulnerability_id := related_vulnerability.get("id"))
247+
)
264248
if vulnerability_ids:
265249
return vulnerability_ids
266250
return None

dojo/tools/cargo_audit/parser.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99

1010
class CargoAuditParser:
11-
1211
"""A class that can be used to parse the cargo audit JSON report file"""
1312

1413
def get_fields(self) -> list[str]:
@@ -80,24 +79,13 @@ def get_findings(self, filename, test):
8079
vuln_id = advisory.get("id")
8180
vulnerability_ids = [advisory.get("id")]
8281
categories = f"**Categories:** {', '.join(advisory['categories'])}" if "categories" in advisory else ""
83-
description = (
84-
categories
85-
+ f"\n**Description:** `{advisory.get('description')}`"
86-
)
82+
description = categories + f"\n**Description:** `{advisory.get('description')}`"
8783

88-
if (
89-
item["affected"] is not None
90-
and "functions" in item["affected"]
91-
):
84+
if item["affected"] is not None and "functions" in item["affected"]:
9285
affected_func = [
93-
f'{func}: {", ".join(versions)}'
94-
for func, versions in item["affected"][
95-
"functions"
96-
].items()
86+
f"{func}: {', '.join(versions)}" for func, versions in item["affected"]["functions"].items()
9787
]
98-
description += (
99-
f"\n**Affected functions**: {', '.join(affected_func)}"
100-
)
88+
description += f"\n**Affected functions**: {', '.join(affected_func)}"
10189

10290
references = f"{advisory.get('url')}\n" + "\n".join(
10391
advisory["references"],
@@ -130,7 +118,6 @@ def get_findings(self, filename, test):
130118
title=title,
131119
test=test,
132120
severity=severity,
133-
tags=tags,
134121
description=description,
135122
component_name=package_name,
136123
component_version=package_version,
@@ -140,6 +127,7 @@ def get_findings(self, filename, test):
140127
references=references,
141128
mitigation=mitigation,
142129
)
130+
finding.unsaved_tags = tags
143131
finding.unsaved_vulnerability_ids = vulnerability_ids
144132
if settings.V3_FEATURE_LOCATIONS and package_name:
145133
finding.unsaved_locations.append(

0 commit comments

Comments
 (0)