Skip to content

Commit

Permalink
LineageTaint: don't follow Return edges after a Call has been traversed
Browse files Browse the repository at this point in the history
Summary:
Blindly following Call then Return edges can yield a huge number of false positive paths as it makes
the pathdinfing completely insensitive to the actual call stack (pathfinding could branch into
a callee through [Call] and return into a completely different caller through [Return]).

When processing a function call, Lineage generates [Summary] edges that can be used instead to
correctly follow function calls.

One still needs to follow [Return] edges in the beginning of the paths and [Call] in the end, to
support sources returned from nested function calls and similarly calls to nested sinks.

A future work is to be able to expand up to a certain depth of function calls to see what happens
inside instead of simply having a Derive edge generated.

Reviewed By: rgrig

Differential Revision: D57157099

fbshipit-source-id: 6d4435b157ae6b8a237aab17b291c37f6870d926
  • Loading branch information
Thibault Suzanne authored and facebook-github-bot committed May 10, 2024
1 parent aad98ef commit bcb4b88
Showing 1 changed file with 46 additions and 27 deletions.
73 changes: 46 additions & 27 deletions infer/src/backend/LineageTaint.ml
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,16 @@ let find_callers caller_table procname =


(** Collects the reachable subgraph from a given source node in the lineage graph of one procedures.
Returns this subgraph and a list of nodes from other procedures to explore. *)
let collect_reachable_in_procedure caller_table ~init:subgraph procname graph node =
let rec dfs todo acc_subgraph interproc_todo =
Returns this subgraph and two lists of nodes from other procedures to explore, the first one
being reached through Return edges and the second one though Calls.
If [follow_return] is false then the Return list will be empty (ie. [Return] edges will be
ignored). *)
let collect_reachable_in_procedure ~follow_return caller_table ~init:subgraph procname graph node =
let rec dfs todo acc_subgraph return_todo call_todo =
match todo with
| [] ->
(acc_subgraph, interproc_todo)
(acc_subgraph, return_todo, call_todo)
| vertex :: next ->
let acc_subgraph', todo' =
Lineage.G.fold_succ_e
Expand All @@ -57,34 +61,48 @@ let collect_reachable_in_procedure caller_table ~init:subgraph procname graph no
else (Lineage.G.add_edge_e acc edge, Lineage.G.E.dst edge :: todo) )
graph vertex (acc_subgraph, next)
in
let interproc_todo' =
let callers = find_callers caller_table procname in
let return_todo' =
if follow_return then
let callers = find_callers caller_table procname in
match vertex with
| Lineage.Vertex.Return [] ->
List.fold callers ~init:return_todo ~f:(fun acc caller ->
(caller, Lineage.Vertex.ReturnOf procname) :: acc )
| Lineage.Vertex.Return (_ :: _) ->
L.die UserError
"Structures as returned values aren't supported yet. Re-run the lineage analysis \
with --lineage-field-depth=0."
| _ ->
return_todo
else return_todo
in
let call_todo' =
match vertex with
| Lineage.Vertex.Return [] ->
List.fold callers ~init:interproc_todo ~f:(fun acc caller ->
(caller, Lineage.Vertex.ReturnOf procname) :: acc )
| Lineage.Vertex.Return (_ :: _) ->
L.die UserError
"Structures as returned values aren't supported yet. Re-run the lineage analysis \
with --lineage-field-depth=0."
| Lineage.Vertex.ArgumentOf (callee, index) ->
(callee, Lineage.Vertex.Argument (index, [])) :: interproc_todo
(callee, Lineage.Vertex.Argument (index, [])) :: call_todo
| _ ->
interproc_todo
call_todo
in
dfs todo' acc_subgraph' interproc_todo'
dfs todo' acc_subgraph' return_todo' call_todo'
in
dfs [node] subgraph []
dfs [node] subgraph [] []


(** Collect the reachable lineage subgraphs from a node over all program procedures. Returns it as a
map from each procname to its subgraph. *)
map from each procname to its subgraph.
The result will not traverse paths that include a Call edge followed (even not immediately) by a
Return edge (but will include the corresponding Summary edges). It works by first collecting all
the nodes reachable by traversing Return edges, then from this set the ones reachable through
Call edges but not Return. *)
let collect_reachable caller_table procname node =
let rec aux todo acc_graphs =
match todo with
| [] ->
let rec aux ~follow_return todo todo_later acc_graphs =
match (todo, todo_later) with
| [], [] ->
acc_graphs
| (procname, node) :: todo_next ->
| [], _ :: _ ->
aux ~follow_return:false todo_later [] acc_graphs
| (procname, node) :: todo_next, _ ->
let lineage_graph =
match Summary.OnDisk.get ~lazy_payloads:false procname with
| None ->
Expand All @@ -105,15 +123,16 @@ let collect_reachable caller_table procname node =
| Some g ->
g
in
let reachable_subgraph, interproc_todo =
collect_reachable_in_procedure caller_table procname lineage_graph node
let reachable_subgraph, return_todo, call_todo =
collect_reachable_in_procedure ~follow_return caller_table procname lineage_graph node
~init:init_reachable_subgraph
in
let acc_reachable_graphs' = Map.set acc_graphs ~key:procname ~data:reachable_subgraph in
let todo' = List.rev_append interproc_todo todo_next in
aux todo' acc_reachable_graphs'
let todo' = List.rev_append return_todo todo_next in
let todo_later' = List.rev_append call_todo todo_later in
aux ~follow_return todo' todo_later' acc_reachable_graphs'
in
aux [(procname, node)] (Map.empty (module Procname))
aux ~follow_return:true [(procname, node)] [] (Map.empty (module Procname))


(** Similar to collect_reachable for a subgraph from which you can reach a given node. *)
Expand Down

0 comments on commit bcb4b88

Please sign in to comment.