Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
b67694b
Python: Remove imprecise container steps
yoff Sep 17, 2024
facb3b6
Python: recover taint for % format strings
yoff Sep 17, 2024
93e7ab5
Python: adjust test expectations
yoff Nov 1, 2024
9a18003
Python: conversion step for `format_map`
yoff Nov 11, 2024
3275c81
Python: reset test expectations
yoff Nov 11, 2024
f669a4f
Python: Make sure all imprecise taint bubbles up
yoff Nov 11, 2024
0ecca91
Python: typo
yoff Nov 13, 2024
fa9426c
Python: extra tests for comprehension
yoff Nov 13, 2024
fa758d6
python: fix test
yoff Dec 3, 2024
e877929
Update test results
owen-mc May 21, 2026
ec13e1b
Add wildcard `ContentSet`s to avoid performance problems
owen-mc May 27, 2026
80c6f08
Fix TODO in `containerStep`
owen-mc May 28, 2026
812e8e6
Add change note
owen-mc May 28, 2026
df15a71
Add a `ContentSet` for any tuple or dictionary element
owen-mc May 28, 2026
aee33a0
Add missing code for `TAnyTupleOrDictionaryElement`
owen-mc May 29, 2026
b384404
Address review comment
owen-mc May 31, 2026
ad97b6d
Use access path for `str.join` model
owen-mc Jun 2, 2026
dede5bc
Track flow through `tuple()` with list with tainted elements
owen-mc Jun 2, 2026
c3ef1dd
Add MaD models for lxml and xml etree.fromstringlist
owen-mc Jun 2, 2026
f62ebef
Adjust expected test output
owen-mc Jun 2, 2026
20ce679
Accept changed edges in test output
owen-mc Jun 2, 2026
b27d08e
Update edges in expected test output
owen-mc Jun 2, 2026
04341c4
Tweak model for str.join
owen-mc Jun 2, 2026
5042fde
Remove imprecise model for `list()`
owen-mc Jun 2, 2026
6f2cc43
Remove imprecise model for `tuple()`
owen-mc Jun 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/ql/consistency-queries/DataFlowConsistency.ql
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ private module Input implements InputSig<Location, PythonDataFlow> {
// parameter, but dataflow-consistency queries should _not_ complain about there not
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
or
Private::Conversions::readStep(n, _, _)
}

predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Python taint tracking is now more precise for values flowing through container contents, such as list, set, tuple, and dictionary elements. This may remove some false positive alerts.
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
* As of 2024-04-02 the type-tracking library only supports precise content, so there is
* no reason to include steps for list content right now.
*/
predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
predicate storeStepCommon(Node nodeFrom, Content c, Node nodeTo) {
tupleStoreStep(nodeFrom, c, nodeTo)
or
dictStoreStep(nodeFrom, c, nodeTo)
Expand All @@ -767,29 +767,31 @@ predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
* content `c`.
*/
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
storeStepCommon(nodeFrom, c, nodeTo)
or
listStoreStep(nodeFrom, c, nodeTo)
or
setStoreStep(nodeFrom, c, nodeTo)
or
attributeStoreStep(nodeFrom, c, nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
or
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
predicate storeStep(Node nodeFrom, ContentSet cs, Node nodeTo) {
exists(Content c | cs = singleton(c) |
storeStepCommon(nodeFrom, c, nodeTo)
or
listStoreStep(nodeFrom, c, nodeTo)
or
setStoreStep(nodeFrom, c, nodeTo)
or
attributeStoreStep(nodeFrom, c, nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
or
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
or
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
or
yieldStoreStep(nodeFrom, c, nodeTo)
or
VariableCapture::storeStep(nodeFrom, c, nodeTo)
)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c,
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), cs,
nodeTo.(FlowSummaryNode).getSummaryNode())
or
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
or
yieldStoreStep(nodeFrom, c, nodeTo)
or
VariableCapture::storeStep(nodeFrom, c, nodeTo)
}

/**
Expand Down Expand Up @@ -985,7 +987,7 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) {
/**
* Subset of `readStep` that should be shared with type-tracking.
*/
predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
predicate readStepCommon(Node nodeFrom, Content c, Node nodeTo) {
subscriptReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
Expand All @@ -994,21 +996,25 @@ predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
/**
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
*/
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
readStepCommon(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
or
forReadStep(nodeFrom, c, nodeTo)
or
attributeReadStep(nodeFrom, c, nodeTo)
predicate readStep(Node nodeFrom, ContentSet cs, Node nodeTo) {
exists(Content c | cs = singleton(c) |
readStepCommon(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
or
forReadStep(nodeFrom, c, nodeTo)
or
attributeReadStep(nodeFrom, c, nodeTo)
or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
or
VariableCapture::readStep(nodeFrom, c, nodeTo)
)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c,
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), cs,
nodeTo.(FlowSummaryNode).getSummaryNode())
or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
or
VariableCapture::readStep(nodeFrom, c, nodeTo)
Conversions::readStep(nodeFrom, cs, nodeTo)
}

/** Data flows from a sequence to a subscript of the sequence. */
Expand Down Expand Up @@ -1064,23 +1070,68 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
nodeTo.accesses(nodeFrom, c.getAttribute())
}

module Conversions {
private import semmle.python.Concepts

predicate decoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
exists(Decoding decoding |
nodeFrom = decoding.getAnInput() and
nodeTo = decoding.getOutput()
) and
c.isAnyTupleOrDictionaryElement()
}

predicate encoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
exists(Encoding encoding |
nodeFrom = encoding.getAnInput() and
nodeTo = encoding.getOutput()
) and
c.isAnyTupleOrDictionaryElement()
}

predicate formatReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
// % formatting
exists(BinaryExprNode fmt | fmt = nodeTo.asCfgNode() |
fmt.getOp() instanceof Mod and
fmt.getRight() = nodeFrom.asCfgNode()
) and
c.isAnyTupleElement()
or
// format_map
// see https://docs.python.org/3/library/stdtypes.html#str.format_map
nodeTo.(MethodCallNode).calls(_, "format_map") and
nodeTo.(MethodCallNode).getArg(0) = nodeFrom and
c.isAnyDictionaryElement()
}

predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
decoderReadStep(nodeFrom, c, nodeTo)
or
encoderReadStep(nodeFrom, c, nodeTo)
or
formatReadStep(nodeFrom, c, nodeTo)
}
}

/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, ContentSet c) {
matchClearStep(n, c)
or
attributeClearStep(n, c)
or
dictClearStep(n, c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), c)
or
dictSplatParameterNodeClearStep(n, c)
predicate clearsContent(Node n, ContentSet cs) {
exists(Content c | cs = singleton(c) |
matchClearStep(n, c)
or
attributeClearStep(n, c)
or
dictClearStep(n, c)
or
dictSplatParameterNodeClearStep(n, c)
or
VariableCapture::clearsContent(n, c)
)
or
VariableCapture::clearsContent(n, c)
FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), cs)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -898,19 +898,78 @@ class CapturedVariableContent extends Content, TCapturedVariableContent {
override string getMaDRepresentation() { none() }
}

/**
* An entity that represents a set of `Content`s.
*
* Most `ContentSet`s are singletons (i.e. they consist of a single `Content`),
* but `AnyDictionaryElement` and `AnyTupleElement` act as wildcards on the
* read side: a read at such a `ContentSet` matches any specific dictionary
* key / tuple index store, as well as (for dictionaries) the
* "unknown-bucket" Content `DictionaryElementAnyContent`.
*
* Keeping these as wildcard `ContentSet`s (rather than enumerating one
* `ContentSet` per key/index) keeps the dataflow `readSetEx` relation small
* when implicit reads are used (e.g. at sinks via `defaultImplicitTaintRead`).
*/
private newtype TContentSet =
TSingletonContent(Content c) or
TAnyTupleElement() or
TAnyDictionaryElement() or
TAnyTupleOrDictionaryElement()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs logic in getAReadContent.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops. Added in a new commit.


/**
* An entity that represents a set of `Content`s.
*
* The set may be interpreted differently depending on whether it is
* stored into (`getAStoreContent`) or read from (`getAReadContent`).
*/
class ContentSet instanceof Content {
class ContentSet extends TContentSet {
/** Holds if this content set is the singleton `{c}`. */
predicate isSingleton(Content c) { this = TSingletonContent(c) }

/** Holds if this content set is the wildcard for all tuple elements. */
predicate isAnyTupleElement() { this = TAnyTupleElement() }

/** Holds if this content set is the wildcard for all dictionary elements. */
predicate isAnyDictionaryElement() { this = TAnyDictionaryElement() }

/** Holds if this content set is the wildcard for all tuple elements or dictionary elements. */
predicate isAnyTupleOrDictionaryElement() { this = TAnyTupleOrDictionaryElement() }

/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent() { result = this }
Content getAStoreContent() { this = TSingletonContent(result) }

/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() { result = this }
Content getAReadContent() {
this = TSingletonContent(result)
or
// Wildcard expansion: a read at "any tuple element" matches a store at any
// specific tuple index. (Stores always target a specific index, so we don't
// need a `TupleElementAnyContent` Content kind here.)
this = TAnyTupleElement() and result instanceof TupleElementContent
or
this = TAnyDictionaryElement() and
(result instanceof DictionaryElementContent or result instanceof DictionaryElementAnyContent)
or
this = TAnyTupleOrDictionaryElement() and
(
result instanceof TupleElementContent or
result instanceof DictionaryElementContent or
result instanceof DictionaryElementAnyContent
)
}

/** Gets a textual representation of this content set. */
string toString() { result = super.toString() }
string toString() {
exists(Content c | this = TSingletonContent(c) | result = c.toString())
or
this = TAnyTupleElement() and result = "Any tuple element"
or
this = TAnyDictionaryElement() and result = "Any dictionary element"
or
this = TAnyTupleOrDictionaryElement() and result = "Any tuple or dictionary element"
}
}

/** Gets the singleton `ContentSet` wrapping the `Content` `c`. */
ContentSet singleton(Content c) { result = TSingletonContent(c) }
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,29 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
}

string encodeContent(ContentSet cs, string arg) {
cs = TListElementContent() and result = "ListElement" and arg = ""
or
cs = TSetElementContent() and result = "SetElement" and arg = ""
or
exists(int index |
cs = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString()
exists(Content c | cs.isSingleton(c) |
c = TListElementContent() and result = "ListElement" and arg = ""
or
c = TSetElementContent() and result = "SetElement" and arg = ""
or
exists(int index |
c = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString()
)
or
exists(string key |
c = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key
)
or
c = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = ""
or
exists(string attr | c = TAttributeContent(attr) and result = "Attribute" and arg = attr)
)
or
exists(string key |
cs = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key
)
cs.isAnyTupleElement() and result = "AnyTupleElement" and arg = ""
or
cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = ""
cs.isAnyDictionaryElement() and result = "AnyDictionaryElement" and arg = ""
or
exists(string attr | cs = TAttributeContent(attr) and result = "Attribute" and arg = attr)
cs.isAnyTupleOrDictionaryElement() and result = "AnyTupleOrDictionaryElement" and arg = ""
}

bindingset[token]
Expand Down Expand Up @@ -139,27 +147,29 @@ module Private {
predicate withContent = SC::withContent/1;

/** Gets a summary component that represents a list element. */
SummaryComponent listElement() { result = content(any(ListElementContent c)) }
SummaryComponent listElement() { result = content(singleton(any(ListElementContent c))) }

/** Gets a summary component that represents a set element. */
SummaryComponent setElement() { result = content(any(SetElementContent c)) }
SummaryComponent setElement() { result = content(singleton(any(SetElementContent c))) }

/** Gets a summary component that represents a tuple element. */
SummaryComponent tupleElement(int index) {
exists(TupleElementContent c | c.getIndex() = index and result = content(c))
exists(TupleElementContent c | c.getIndex() = index and result = content(singleton(c)))
}

/** Gets a summary component that represents a dictionary element. */
SummaryComponent dictionaryElement(string key) {
exists(DictionaryElementContent c | c.getKey() = key and result = content(c))
exists(DictionaryElementContent c | c.getKey() = key and result = content(singleton(c)))
}

/** Gets a summary component that represents a dictionary element at any key. */
SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) }
SummaryComponent dictionaryElementAny() {
result = content(singleton(any(DictionaryElementAnyContent c)))
}

/** Gets a summary component that represents an attribute element. */
SummaryComponent attribute(string attr) {
exists(AttributeContent c | c.getAttribute() = attr and result = content(c))
exists(AttributeContent c | c.getAttribute() = attr and result = content(singleton(c)))
}

/** Gets a summary component that represents the return value of a call. */
Expand Down
Loading
Loading