diff --git a/python/ql/lib/change-notes/2025-12-09-add-modelling-of-zstd-compression.md b/python/ql/lib/change-notes/2025-12-09-add-modelling-of-zstd-compression.md new file mode 100644 index 000000000000..8ec42ca0db2e --- /dev/null +++ b/python/ql/lib/change-notes/2025-12-09-add-modelling-of-zstd-compression.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The `compression.zstd` library (added in Python 3.14) is now supported by the `py/decompression-bomb` query. diff --git a/python/ql/src/experimental/semmle/python/security/DecompressionBomb.qll b/python/ql/src/experimental/semmle/python/security/DecompressionBomb.qll index a2e50d0ade5d..87a47ebeb00c 100644 --- a/python/ql/src/experimental/semmle/python/security/DecompressionBomb.qll +++ b/python/ql/src/experimental/semmle/python/security/DecompressionBomb.qll @@ -364,6 +364,46 @@ module Lzma { } } +/** Provides sinks and additional taint steps related to the `zstd` library in Python 3.14+. */ +module Zstd { + private API::Node zstdInstance() { + result = API::moduleImport("compression").getMember("zstd").getMember(["ZstdFile", "open"]) + } + + /** + * The Decompression Sinks of `zstd` library + * + * `zstd.open(sink)` + * `zstd.ZstdFile(sink)` + * + * only read mode is sink + */ + class DecompressionSink extends DecompressionBomb::Sink { + DecompressionSink() { + exists(API::CallNode zstdCall | zstdCall = zstdInstance().getACall() | + this = zstdCall.getParameter(0, "filename").asSink() and + ( + not exists( + zstdCall + .getParameter(1, "mode") + .getAValueReachingSink() + .asExpr() + .(StringLiteral) + .getText() + ) or + zstdCall + .getParameter(1, "mode") + .getAValueReachingSink() + .asExpr() + .(StringLiteral) + .getText() + .matches("%r%") + ) + ) + } + } +} + /** * `io.TextIOWrapper(ip, encoding='utf-8')` like following: * ```python diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected index 87b07df086fa..17c28aa1d95d 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected @@ -36,15 +36,17 @@ edges | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:45:17:45:25 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:49:15:49:23 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:50:19:50:27 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:54:40:54:48 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:56:23:56:31 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:57:21:57:29 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:54:15:54:23 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:55:19:55:27 | ControlFlowNode for file_path | provenance | | | test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:59:40:59:48 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:60:22:60:30 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:61:21:61:29 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:62:42:62:50 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:63:23:63:31 | ControlFlowNode for file_path | provenance | | -| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:64:36:64:44 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:61:23:61:31 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:62:21:62:29 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:64:40:64:48 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:65:22:65:30 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:66:21:66:29 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:67:42:67:50 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:68:23:68:31 | ControlFlowNode for file_path | provenance | | +| test.py:28:26:28:34 | ControlFlowNode for file_path | test.py:69:36:69:44 | ControlFlowNode for file_path | provenance | | nodes | test.py:10:16:10:24 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:11:5:11:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | @@ -79,15 +81,17 @@ nodes | test.py:45:17:45:25 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:49:15:49:23 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:50:19:50:27 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:54:40:54:48 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:56:23:56:31 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:57:21:57:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:54:15:54:23 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:55:19:55:27 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | | test.py:59:40:59:48 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:60:22:60:30 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:61:21:61:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:62:42:62:50 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:63:23:63:31 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | -| test.py:64:36:64:44 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:61:23:61:31 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:62:21:62:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:64:40:64:48 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:65:22:65:30 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:66:21:66:29 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:67:42:67:50 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:68:23:68:31 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | +| test.py:69:36:69:44 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path | subpaths #select | test.py:11:5:11:52 | ControlFlowNode for Attribute() | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | @@ -107,12 +111,14 @@ subpaths | test.py:45:17:45:25 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:45:17:45:25 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:49:15:49:23 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:49:15:49:23 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:50:19:50:27 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:50:19:50:27 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:54:40:54:48 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:54:40:54:48 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:56:23:56:31 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:56:23:56:31 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:57:21:57:29 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:57:21:57:29 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:54:15:54:23 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:54:15:54:23 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:55:19:55:27 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:55:19:55:27 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | | test.py:59:40:59:48 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:59:40:59:48 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:60:22:60:30 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:60:22:60:30 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:61:21:61:29 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:61:21:61:29 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:62:42:62:50 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:62:42:62:50 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:63:23:63:31 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:63:23:63:31 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | -| test.py:64:36:64:44 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:64:36:64:44 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:61:23:61:31 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:61:23:61:31 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:62:21:62:29 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:62:21:62:29 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:64:40:64:48 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:64:40:64:48 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:65:22:65:30 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:65:22:65:30 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:66:21:66:29 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:66:21:66:29 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:67:42:67:50 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:67:42:67:50 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:68:23:68:31 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:68:23:68:31 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | +| test.py:69:36:69:44 | ControlFlowNode for file_path | test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:69:36:69:44 | ControlFlowNode for file_path | This uncontrolled file extraction is $@. | test.py:10:16:10:24 | ControlFlowNode for file_path | depends on this user controlled data | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-409/test.py b/python/ql/test/experimental/query-tests/Security/CWE-409/test.py index 06113bf9fe46..feb6b1293bb2 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-409/test.py +++ b/python/ql/test/experimental/query-tests/Security/CWE-409/test.py @@ -49,6 +49,11 @@ async def bomb(file_path): gzip.open(file_path) # $ result=BAD gzip.GzipFile(file_path) # $ result=BAD + from compression import zstd + + zstd.open(file_path) # $ result=BAD + zstd.ZstdFile(file_path).read() # $ result=BAD + import pandas pandas.read_csv(filepath_or_buffer=file_path) # $ result=BAD