Skip to content

Commit e8f1e51

Browse files
authored
Merge pull request #14408 from DefectDojo/zip-handling-consolidation
Refactor zip handling with safe_open_zip and safe_read_all_zip
2 parents 1267c72 + 285d5db commit e8f1e51

6 files changed

Lines changed: 87 additions & 21 deletions

File tree

dojo/tools/blackduck/importer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from collections.abc import Iterable
88
from pathlib import Path
99

10+
from dojo.tools.utils import safe_open_zip
11+
1012
from .model import BlackduckFinding
1113

1214

@@ -48,7 +50,7 @@ def _process_zipfile(self, report):
4850
files = {}
4951
security_issues = {}
5052

51-
with zipfile.ZipFile(report) as zipf:
53+
with safe_open_zip(report) as zipf:
5254
for full_file_name in zipf.namelist():
5355
file_name = full_file_name.split("/")[-1]
5456
# Backwards compatibility, newer versions of Blackduck have a source file rather

dojo/tools/blackduck_component_risk/importer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import zipfile
55
from pathlib import Path
66

7+
from dojo.tools.utils import safe_open_zip
8+
79
logger = logging.getLogger(__name__)
810

911

@@ -42,7 +44,7 @@ def _process_zipfile(self, report: Path) -> (dict, dict, dict):
4244
components = {}
4345
source = {}
4446
try:
45-
with zipfile.ZipFile(report) as zipf:
47+
with safe_open_zip(report) as zipf:
4648
c_file = False
4749
s_file = False
4850
for full_file_name in zipf.namelist():

dojo/tools/fortify/fpr_parser.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import logging
22
import re
3-
import zipfile
43
from xml.etree.ElementTree import Element
54

65
from defusedxml import ElementTree
76

87
from dojo.models import Finding, Test
98
from dojo.tools.fortify.fortify_data import DescriptionData, RuleData, SnippetData, VulnerabilityData
9+
from dojo.tools.utils import safe_read_all_zip
1010

1111
logger = logging.getLogger(__name__)
1212

@@ -26,13 +26,9 @@ def __init__(self):
2626
pass
2727

2828
def parse_fpr(self, filename, test):
29-
if str(filename.__class__) == "<class '_io.TextIOWrapper'>":
30-
input_zip = zipfile.ZipFile(filename.name, "r")
31-
else:
32-
input_zip = zipfile.ZipFile(filename, "r")
3329
# Read each file from the zip artifact into a dict with the format of
3430
# filename: file_content
35-
zip_data = {name: input_zip.read(name) for name in input_zip.namelist()}
31+
zip_data = safe_read_all_zip(filename)
3632
root, self.namespaces = self.identify_root(zip_data, "audit.fvdl", "No audit.fvdl file found in the zip")
3733
audit_log, self.namespaces_audit_log = self.identify_root(zip_data, "audit.xml")
3834
return self.convert_vulnerabilities_to_findings(root, audit_log, test)

dojo/tools/ms_defender/parser.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import json
22
import logging
3-
import zipfile
43

54
from django.conf import settings
65

76
from dojo.models import Endpoint, Finding
7+
from dojo.tools.utils import safe_read_all_zip
88
from dojo.url.models import URL
99

1010
logger = logging.getLogger(__name__)
@@ -37,12 +37,7 @@ def get_findings(self, file, test):
3737
logger.warning("Error parsing JSON file %s: %s", file.name, e)
3838
return []
3939
elif str(file.name).endswith(".zip"):
40-
if str(file.__class__) == "<class '_io.TextIOWrapper'>":
41-
input_zip = zipfile.ZipFile(file.name, "r")
42-
else:
43-
input_zip = zipfile.ZipFile(file, "r")
44-
45-
zipdata = {name: input_zip.read(name) for name in input_zip.namelist()}
40+
zipdata = safe_read_all_zip(file)
4641
vulnerabilityfiles = []
4742
machinefiles = []
4843
for content in list(zipdata):

dojo/tools/sonarqube/parser.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import json
22
import logging
3-
import zipfile
43

54
from lxml import etree
65

76
from dojo.tools.sonarqube.sonarqube_restapi_json import SonarQubeRESTAPIJSON
87
from dojo.tools.sonarqube.sonarqube_restapi_zip import SonarQubeRESTAPIZIP
98
from dojo.tools.sonarqube.soprasteria_html import SonarQubeSoprasteriaHTML
109
from dojo.tools.sonarqube.soprasteria_json import SonarQubeSoprasteriaJSON
10+
from dojo.tools.utils import safe_read_all_zip
1111

1212
logger = logging.getLogger(__name__)
1313

@@ -38,11 +38,7 @@ def get_findings(self, file, test):
3838
return SonarQubeRESTAPIJSON().get_json_items(json_content, test, self.mode)
3939
return []
4040
if file.name.endswith(".zip"):
41-
if str(file.__class__) == "<class '_io.TextIOWrapper'>":
42-
input_zip = zipfile.ZipFile(file.name, "r")
43-
else:
44-
input_zip = zipfile.ZipFile(file, "r")
45-
zipdata = {name: input_zip.read(name) for name in input_zip.namelist()}
41+
zipdata = safe_read_all_zip(file)
4642
return SonarQubeRESTAPIZIP().get_items(zipdata, test, self.mode)
4743
parser = etree.HTMLParser()
4844
tree = etree.parse(file, parser)

dojo/tools/utils.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,83 @@
1+
import io
12
import json
23
import logging
4+
import zipfile
35

46
logger = logging.getLogger(__name__)
57

8+
# Zip bomb protection limits
9+
MAX_ZIP_MEMBERS = 1000
10+
MAX_ZIP_MEMBER_SIZE = 512 * 1024 * 1024 # 512 MB per member (uncompressed)
11+
MAX_ZIP_TOTAL_SIZE = 1 * 1024 * 1024 * 1024 # 1 GB total (uncompressed)
12+
MAX_ZIP_RATIO = 100 # max compression ratio (uncompressed / compressed)
13+
14+
15+
def safe_open_zip(file):
16+
"""
17+
Open a zip file with protection against zip bomb attacks.
18+
19+
Validates member count, per-member uncompressed size, total uncompressed
20+
size, and compression ratios using the central-directory metadata before
21+
any data is extracted.
22+
23+
Accepts a file-like object or an io.TextIOWrapper (in which case
24+
file.name is used as the path).
25+
26+
Returns an open ZipFile. Use as a context manager or call .close()
27+
explicitly when done.
28+
29+
Raises ValueError if any limit is exceeded.
30+
"""
31+
zf = zipfile.ZipFile(file.name, "r") if isinstance(file, io.TextIOWrapper) else zipfile.ZipFile(file, "r")
32+
33+
infos = zf.infolist()
34+
35+
if len(infos) > MAX_ZIP_MEMBERS:
36+
zf.close()
37+
msg = f"Zip file contains {len(infos)} members, exceeding the limit of {MAX_ZIP_MEMBERS}."
38+
raise ValueError(msg)
39+
40+
total_size = 0
41+
for info in infos:
42+
if info.file_size > MAX_ZIP_MEMBER_SIZE:
43+
zf.close()
44+
msg = (
45+
f"Zip member '{info.filename}' has uncompressed size {info.file_size} bytes, "
46+
f"exceeding the per-member limit of {MAX_ZIP_MEMBER_SIZE} bytes."
47+
)
48+
raise ValueError(msg)
49+
if info.compress_size > 0 and (info.file_size / info.compress_size) > MAX_ZIP_RATIO:
50+
zf.close()
51+
ratio = info.file_size / info.compress_size
52+
msg = (
53+
f"Zip member '{info.filename}' has a compression ratio of "
54+
f"{ratio:.1f}:1, exceeding the limit of {MAX_ZIP_RATIO}:1."
55+
)
56+
raise ValueError(msg)
57+
total_size += info.file_size
58+
if total_size > MAX_ZIP_TOTAL_SIZE:
59+
zf.close()
60+
msg = f"Zip file total uncompressed size exceeds the limit of {MAX_ZIP_TOTAL_SIZE} bytes."
61+
raise ValueError(msg)
62+
63+
return zf
64+
65+
66+
def safe_read_all_zip(file):
67+
"""
68+
Open a zip file safely and read all members into a dict {name: bytes}.
69+
70+
Applies the same zip bomb protections as safe_open_zip before reading
71+
any data.
72+
73+
Raises ValueError if any limit is exceeded.
74+
"""
75+
zf = safe_open_zip(file)
76+
try:
77+
return {name: zf.read(name) for name in zf.namelist()}
78+
finally:
79+
zf.close()
80+
681

782
def get_npm_cwe(item_node):
883
"""

0 commit comments

Comments
 (0)