close
The Wayback Machine - https://web.archive.org/web/20220103132756/https://github.com/github/codeql/pull/7475/files
Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JS: Improve performance of ATM queries on large databases #7475

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
@@ -53,7 +53,11 @@
/**
* Holds if `n` appears to be a numeric value.
*/
predicate isNumeric(DataFlow::Node n) { isReadFrom(n, ".*index.*") }
// Performance optimisation: This predicate operates on a large set of
// starting nodes, so use binding hints to suggest computing that set last.
predicate isNumeric(DataFlow::Node n) {
getAnAccessedName(pragma[only_bind_into](n)).regexpMatch(".*index.*")

Check failure on line 59 in javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/StandardEndpointFilters.qll

Code scanning

Inefficient string comparison Error

Use matches("%index%") instead
}

/**
* Holds if `n` is an argument to a library without sinks.
@@ -462,15 +462,15 @@ module AccessPath {
ReachableBasicBlock bb, Root root, string path, int ranking, AccessPathKind type
) {
result =
rank[ranking](ControlFlowNode ref |
rank[ranking](ControlFlowNode ref, int i |
ref = getAccessTo(root, path, _) and
ref.getBasicBlock() = bb and
ref = bb.getNode(i) and
// Prunes the accesses where there does not exists a read and write within the same basicblock.
// This could be more precise, but doing it like this avoids massive joins.
hasRead(bb) and
hasWrite(bb)
|
ref order by any(int i | ref = bb.getNode(i))
ref order by i
) and
result = getAccessTo(root, path, type)
}
@@ -492,7 +492,7 @@ module AccessPath {
*/
pragma[noinline]
private predicate hasWrite(ReachableBasicBlock bb) {
bb = getAccessTo(_, _, AccessPathRead()).getBasicBlock()
bb = getAccessTo(_, _, AccessPathWrite()).getBasicBlock()
}

/**
@@ -565,9 +565,12 @@ module AccessPath {
)
or
// across basic blocks.
exists(Root root, string path |
exists(Root root, string path, ReachableBasicBlock readBlock |
read.asExpr() = getAccessTo(root, path, AccessPathRead()) and
getAWriteBlock(root, path).strictlyDominates(read.getBasicBlock())
readBlock = read.getBasicBlock() and
// Performance optimisation: check that `read` is in a *reachable* basic block
// before looking for a dominating write block.
getAWriteBlock(root, path).strictlyDominates(pragma[only_bind_out](readBlock))
)
}
}
@@ -20,7 +20,7 @@
* Gets an abstract value that may be assigned to this variable.
*/
pragma[nomagic]
AbstractValue getALocalValue() { result = getADef().getAnAssignedValue() }

Check warning on line 23 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.

/**
* Gets a definition of this variable.
@@ -44,7 +44,7 @@
private class SsaDefinitionWithNonLocalFlow extends SsaExplicitDefinition {
CallWithNonLocalAnalyzedReturnFlow source;

SsaDefinitionWithNonLocalFlow() { source = getDef().getSource().flow() }

Check warning on line 47 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.

CallWithNonLocalAnalyzedReturnFlow getSource() { result = source }
}
@@ -84,10 +84,10 @@
* cannot be analyzed completely.
*/
AbstractValue getAnAssignedValue() {
result = getAnRhsValue()

Check warning on line 87 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
or
exists(DataFlow::Incompleteness cause |
isIncomplete(cause) and result = TIndefiniteAbstractValue(cause)

Check warning on line 90 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
)
}

@@ -96,7 +96,7 @@
* may evaluate to.
*/
AbstractValue getAnRhsValue() {
result = getRhs().getALocalValue()

Check warning on line 99 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
or
this = any(ForInStmt fis).getIteratorExpr() and result = abstractValueOfType(TTString())
or
@@ -109,7 +109,7 @@
* this `VarDef`.
*/
DataFlow::AnalyzedNode getRhs() {
result = getSource().analyze() and getTarget() instanceof VarRef

Check warning on line 112 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.

Check warning on line 112 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
or
result.asExpr() = this.(CompoundAssignExpr)
or
@@ -132,7 +132,7 @@
or
exists(ComprehensionBlock cb | this = cb.getIterator()) and cause = "yield"
or
getTarget() instanceof DestructuringPattern and cause = "heap"

Check warning on line 135 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
}

/**
@@ -199,9 +199,9 @@
*/
private class AnalyzedExplicitDefinition extends AnalyzedSsaDefinition, SsaExplicitDefinition {
override AbstractValue getAnRhsValue() {
result = getDef().(AnalyzedVarDef).getAnAssignedValue()

Check warning on line 202 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
or
result = getRhsNode().analyze().getALocalValue()

Check warning on line 204 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
}
}

@@ -209,7 +209,7 @@
* Flow analysis for SSA definitions corresponding to implicit variable initialization.
*/
private class AnalyzedImplicitInit extends AnalyzedSsaDefinition, SsaImplicitInit {
override AbstractValue getAnRhsValue() { result = getImplicitInitValue(getSourceVariable()) }

Check warning on line 212 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
}

/**
@@ -217,7 +217,7 @@
*/
private class AnalyzedVariableCapture extends AnalyzedSsaDefinition, SsaVariableCapture {
override AbstractValue getAnRhsValue() {
exists(LocalVariable v | v = getSourceVariable() |
exists(LocalVariable v | v = this.getSourceVariable() |
result = v.(AnalyzedCapturedVariable).getALocalValue()
or
result = any(AnalyzedExplicitDefinition def | def.getSourceVariable() = v).getAnRhsValue()
@@ -232,7 +232,7 @@
*/
private class AnalyzedPhiNode extends AnalyzedSsaDefinition, SsaPhiNode {
override AbstractValue getAnRhsValue() {
result = getAnInput().(AnalyzedSsaDefinition).getAnRhsValue()

Check warning on line 235 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
}
}

@@ -242,14 +242,14 @@
class AnalyzedRefinement extends AnalyzedSsaDefinition, SsaRefinementNode {
override AbstractValue getAnRhsValue() {
// default implementation: don't refine
result = getAnInputRhsValue()

Check warning on line 245 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
}

/**
* Gets an abstract value that one of the inputs of this refinement may evaluate to.
*/
AbstractValue getAnInputRhsValue() {
result = getAnInput().(AnalyzedSsaDefinition).getAnRhsValue()

Check warning on line 252 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
}
}

@@ -260,7 +260,7 @@
* into sets of more precise abstract values to enable them to be refined.
*/
class AnalyzedConditionGuard extends AnalyzedRefinement {
AnalyzedConditionGuard() { getGuard() instanceof ConditionGuardNode }

Check warning on line 263 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.

override AbstractValue getAnInputRhsValue() {
exists(AbstractValue input | input = super.getAnInputRhsValue() |
@@ -278,13 +278,13 @@
* the beginning of `s` to those that are truthy.
*/
class AnalyzedPositiveConditionGuard extends AnalyzedRefinement {
AnalyzedPositiveConditionGuard() { getGuard().(ConditionGuardNode).getOutcome() = true }

Check warning on line 281 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.

override AbstractValue getAnRhsValue() {
result = getAnInputRhsValue() and

Check warning on line 284 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
exists(RefinementContext ctxt |
ctxt = TVarRefinementContext(this, getSourceVariable(), result) and

Check warning on line 286 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
getRefinement().eval(ctxt).getABooleanValue() = true

Check warning on line 287 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
)
}
}
@@ -296,13 +296,13 @@
* the beginning of `t` to those that are falsy.
*/
class AnalyzedNegativeConditionGuard extends AnalyzedRefinement {
AnalyzedNegativeConditionGuard() { getGuard().(ConditionGuardNode).getOutcome() = false }

Check warning on line 299 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.

override AbstractValue getAnRhsValue() {
result = getAnInputRhsValue() and

Check warning on line 302 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
exists(RefinementContext ctxt |
ctxt = TVarRefinementContext(this, getSourceVariable(), result) and

Check warning on line 304 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
getRefinement().eval(ctxt).getABooleanValue() = false

Check warning on line 305 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
)
}
}
@@ -391,7 +391,7 @@
* of the global object.
*/
private DataFlow::PropWrite getAnAssigningPropWrite() {
result.getPropertyName() = getVariableName() and

Check warning on line 394 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
result.getBase().analyze().getALocalValue() instanceof AbstractGlobalObject
}

@@ -402,7 +402,7 @@
override AbstractValue getALocalValue() {
result = super.getALocalValue()
or
result = getAnAssigningPropWrite().getRhs().analyze().getALocalValue()

Check warning on line 405 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
or
result = agv.getAnAssignedValue()
}
@@ -670,8 +670,8 @@
abstract DataFlow::InvokeNode getAnInvocation();

override predicate argumentPassing(Parameter p, Expr arg) {
exists(DataFlow::InvokeNode invk, int argIdx | invk = getAnInvocation() |

Check warning on line 673 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
p = getParameter(argIdx) and

Check warning on line 674 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
not p.isRestParameter() and
arg = invk.getArgument(argIdx).asExpr()
)
@@ -679,13 +679,13 @@

override predicate mayReceiveArgument(Parameter p) {
exists(int argIdx |
p = getParameter(argIdx) and

Check warning on line 682 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
getAnInvocation().getNumArgument() > argIdx

Check warning on line 683 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
)
or
// All parameters may receive an argument if invoked with a spread argument
p = getAParameter() and

Check warning on line 687 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
getAnInvocation().asExpr().(InvokeExpr).isSpreadArgument(_)

Check warning on line 688 in javascript/ql/lib/semmle/javascript/dataflow/internal/VariableTypeInference.qll

Code scanning

Using implicit `this` Warning

Use of implicit this.
}
}

@@ -56,9 +56,7 @@ predicate isGeneratedCodeFile(File f) { isGenerated(f.getATopLevel()) }
predicate isTestFile(File f) {
exists(Test t | t.getFile() = f)
or
exists(string stemExt | stemExt = "test" or stemExt = "spec" |
f = getTestFile(any(File orig), stemExt)
)
f = getATestFile(_)
or
f.getAbsolutePath().regexpMatch(".*/__(mocks|tests)__/.*")
}
@@ -40,14 +40,41 @@ class BDDTest extends Test, @call_expr {

/**
* Gets the test file for `f` with stem extension `stemExt`.
* That is, a file named file named `<base>.<stemExt>.<ext>` in the
* That is, a file named `<base>.<stemExt>.<ext>` in the
* same directory as `f` which is named `<base>.<ext>`.
*/
bindingset[stemExt]
File getTestFile(File f, string stemExt) {
result = f.getParentContainer().getFile(f.getStem() + "." + stemExt + "." + f.getExtension())
}

/**
* Gets a test file for `f`.
* That is, a file named `<base>.<stemExt>.<ext>` in the
* same directory as `f`, where `f` is named `<base>.<ext>` and
* `<stemExt>` is a well-known test file identifier, such as `test` or `spec`.
*/
File getATestFile(File f) {
result = f.getParentContainer().getFile(getATestFileName(f))
}

/**
* Gets a name of a test file for `f`.
* That is, `<base>.<stemExt>.<ext>` where
* `f` is named `<base>.<ext>` and `<stemExt>` is
* a well-known test file identifier, such as `test` or `spec`.
*/
// Helper predicate factored out for performance.
// This predicate is linear in the size of f, and forces
// callers to join only once against f rather than two separate joins
// when computing the stem and the extension.
// This loses some flexibility because callers cannot specify
// an arbitrary stemExt.
pragma[nomagic]
private string getATestFileName(File f) {
result = f.getStem() + "." + ["test", "spec"] + "." + f.getExtension()
}

/**
* A Jest test, that is, an invocation of a global function named
* `test` where the first argument is a string and the second
@@ -16,15 +16,23 @@ import javascript
*/
bindingset[regexp]
predicate isReadFrom(DataFlow::Node read, string regexp) {
getAnAccessedName(read).regexpMatch(regexp)
}

/**
* Gets the "name" accessed by `read`. The "name" is one of:
* - the name of the read variable, if `read` is a variable read
* - the name of the read property, if `read` is a property read
* - the suffix of the getter-method name, if `read` is a getter invocation, for example "Number" in "getNumber"
*/
string getAnAccessedName(DataFlow::Node read) {
exists(DataFlow::Node actualRead |
actualRead = read.asExpr().getUnderlyingValue().(LogOrExpr).getAnOperand().flow() or // unfold `x || y` once
actualRead = read
|
exists(string name | name.regexpMatch(regexp) |
actualRead.asExpr().getUnderlyingValue().(VarAccess).getName() = name or
actualRead.(DataFlow::PropRead).getPropertyName() = name or
actualRead.(DataFlow::InvokeNode).getCalleeName() = "get" + name
)
actualRead.asExpr().getUnderlyingValue().(VarAccess).getName() = result or
actualRead.(DataFlow::PropRead).getPropertyName() = result or
actualRead.(DataFlow::InvokeNode).getCalleeName() = "get" + result
)
}

@@ -50,8 +50,12 @@ module CorsMisconfigurationForCredentials {
|
routeHandler.getAResponseHeader(_) = origin and
routeHandler.getAResponseHeader(_) = credentials and
origin.definesExplicitly("access-control-allow-origin", this.asExpr()) and
credentials.definesExplicitly("access-control-allow-credentials", credentialsValue)
// Performance optimisation: start with the set of all route handlers
// rather than the set of all exprs.
pragma[only_bind_into](origin)
.definesExplicitly("access-control-allow-origin", this.asExpr()) and
pragma[only_bind_into](credentials)
.definesExplicitly("access-control-allow-credentials", credentialsValue)
|
credentialsValue.mayHaveBooleanValue(true) or
credentialsValue.mayHaveStringValue("true")
@@ -437,8 +437,27 @@ module DomBasedXss {
b = phi.getAnInput().getDefinition() and
count(phi.getAnInput()) = 2 and
not a = b and
sanitizer = DataFlow::valueNode(a.getDef().getSource()) and
sanitizer.getAnArgument().asExpr().(VarAccess).getVariable() = b.getSourceVariable()
/*
* Performance optimisation:
*
* When join-ordering and evaluating this conjunction,
* it is preferable to start with the relatively small set of
* `sanitizer` calls, then compute the set of SSA variables accessed
* as the arguments of those sanitizer calls, then reason about how
* those variables are used in phi nodes.
*
* Use directional binding pragmas to encourage this join order,
* starting with `sanitizer`.
*
* Without these pragmas, the join orderer may choose the opposite order:
* start with all `phi` nodes, then compute the set of SSA variables involved,
* then the (potentially large) set of accesses to those variables,
* then the set of accesses used as the argument of a sanitizer call.
*/

pragma[only_bind_out](sanitizer) = DataFlow::valueNode(a.getDef().getSource()) and
pragma[only_bind_out](sanitizer.getAnArgument().asExpr()) =
b.getSourceVariable().getAnAccess()
|
pred = DataFlow::ssaDefinitionNode(b) and
succ = DataFlow::ssaDefinitionNode(phi)