Skip to content

Commit

Permalink
create turbo-static for compile time graph analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
arlyon committed May 15, 2024
1 parent d49e885 commit b0c665b
Show file tree
Hide file tree
Showing 10 changed files with 1,114 additions and 43 deletions.
228 changes: 186 additions & 42 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ resolver = "2"

members = [
"crates/node-file-trace",
"crates/tower-uds",
"crates/tower-uds",
"crates/turbo-static",
"crates/turbo-tasks*",
"crates/turbopack*",
"crates/turborepo*",
Expand Down
2 changes: 2 additions & 0 deletions crates/turbo-static/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
file
graph.cypherl
25 changes: 25 additions & 0 deletions crates/turbo-static/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[package]
name = "turbo-static"
version = "0.1.0"
edition = "2021"

[dependencies]
bincode = "1.3.3"
clap = { workspace = true, features = ["derive"] }
crossbeam-channel = "0.5.12"
fjall = { version = "0.6.3", features = ["bloom"] }
ignore = "0.4.22"
itertools.workspace = true
lsp-server = "0.7.6"
lsp-types = "0.95.1"
proc-macro2 = { workspace = true, features = ["span-locations"] }
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
serde_path_to_error = "0.1.16"
syn = { version = "2", features = ["parsing", "full", "visit", "extra-traits"] }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
tracing.workspace = true
walkdir = "2.5.0"

[lints]
workspace = true
29 changes: 29 additions & 0 deletions crates/turbo-static/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Turbo Static

Leverages rust-analyzer to build a complete view into the static dependency graph for
your turbo tasks project.

## How it works

- find all occurences of #[turbo_tasks::function] across all the packages you want to query
- for each of the tasks we find, query rust analyzer to see which tasks call them
- apply some very basis control flow analysis to determine whether the call is make 1 time, 0/1 times, or 0+ times,
corresponding to direct calls, conditionals, or for loops. nested conditionals collapse

## Usage

This uses an in memory persisted database to cache rust-analyzer queries.
To reset the cache, pass the `--reindex` flag. Running will produce a
`graph.cypherl` file which can be loaded into any cypher-compatible database.

```bash
# run neoj4
docker run \
--publish=7474:7474 --publish=7687:7687 \
--volume=$HOME/neo4j/data:/data \
neo4j
# run it passing in the root folders you want to analyze.
# the system will recursively parse all rust code looking
# for turbo tasks functions
cargo run --release -- ../../../turbo ../../../next.js
```
125 changes: 125 additions & 0 deletions crates/turbo-static/src/call_resolver.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
use fjall::PartitionCreateOptions;

use crate::{lsp_client::RAClient, Identifier, IdentifierReference};

pub struct CallResolver<'a> {
client: &'a mut RAClient,
fjall: &'a fjall::Keyspace,
handle: fjall::PartitionHandle,
}

impl<'a> CallResolver<'a> {
pub fn new(client: &'a mut RAClient, fjall: &'a fjall::Keyspace) -> Self {
let handle = fjall
.open_partition("links", PartitionCreateOptions::default())
.unwrap();
Self {
client,
fjall,
handle,
}
}

pub fn cached(&self) -> usize {
self.handle.len().unwrap()
}

pub fn cleared(mut self) -> Self {
self.fjall.delete_partition(self.handle).unwrap();
self.handle = self
.fjall
.open_partition("links", PartitionCreateOptions::default())
.unwrap();
self
}

pub fn resolve(&mut self, ident: &Identifier) -> Vec<IdentifierReference> {
if let Some(data) = self.handle.get(ident.to_string()).unwrap() {
tracing::info!("skipping {}", ident);
return bincode::deserialize(&data).unwrap();
};

tracing::info!("checking {}", ident);

let mut count = 0;
let _response = loop {
let response = self.client.request(lsp_server::Request {
id: 1.into(),
method: "textDocument/prepareCallHierarchy".to_string(),
params: serde_json::to_value(&lsp_types::CallHierarchyPrepareParams {
text_document_position_params: lsp_types::TextDocumentPositionParams {
position: ident.range.start,
text_document: lsp_types::TextDocumentIdentifier {
uri: lsp_types::Url::from_file_path(&ident.path).unwrap(),
},
},
work_done_progress_params: lsp_types::WorkDoneProgressParams {
work_done_token: Some(lsp_types::ProgressToken::String(
"prepare".to_string(),
)),
},
})
.unwrap(),
});
if let Some(Some(value)) = response.result.as_ref().map(|r| r.as_array()) {
if !value.is_empty() {
break value.to_owned();
}
count += 1;
}

// textDocument/prepareCallHierarchy will sometimes return an empty array so try
// at most 5 times
if count > 5 {
tracing::warn!("discovered isolated task {}", ident);
break vec![];
}

std::thread::sleep(std::time::Duration::from_secs(1));
};

// callHierarchy/incomingCalls
let response = self.client.request(lsp_server::Request {
id: 1.into(),
method: "callHierarchy/incomingCalls".to_string(),
params: serde_json::to_value(lsp_types::CallHierarchyIncomingCallsParams {
partial_result_params: lsp_types::PartialResultParams::default(),
item: lsp_types::CallHierarchyItem {
name: ident.name.to_owned(),
kind: lsp_types::SymbolKind::FUNCTION,
data: None,
tags: None,
detail: None,
uri: lsp_types::Url::from_file_path(&ident.path).unwrap(),
range: ident.range,
selection_range: ident.range,
},
work_done_progress_params: lsp_types::WorkDoneProgressParams {
work_done_token: Some(lsp_types::ProgressToken::String("prepare".to_string())),
},
})
.unwrap(),
});

let links = if let Some(e) = response.error {
tracing::warn!("unable to resolve {}: {:?}", ident, e);
vec![]
} else {
let response: Result<Vec<lsp_types::CallHierarchyIncomingCall>, _> =
serde_path_to_error::deserialize(response.result.unwrap());

response
.unwrap()
.into_iter()
.map(|i| i.into())
.collect::<Vec<IdentifierReference>>()
};

let data = bincode::serialize(&links).unwrap();

tracing::debug!("links: {:?}", links);

self.handle.insert(ident.to_string(), data).unwrap();
links
}
}
99 changes: 99 additions & 0 deletions crates/turbo-static/src/identifier.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use std::{fs, path::PathBuf};

use lsp_types::{CallHierarchyIncomingCall, CallHierarchyItem, Range};

/// A task that references another, with the range of the reference
#[derive(Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize, Clone, Debug)]
pub struct IdentifierReference {
pub identifier: Identifier,
pub references: Vec<Range>, // the places where this identifier is used
}

/// identifies a task by its file, and range in the file
#[derive(Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize, Clone)]
pub struct Identifier {
pub path: String,
// technically you can derive this from the name and range but it's easier to just store it
pub name: String,
// post_transform_name: Option<String>,
pub range: lsp_types::Range,
}

impl Identifier {
/// check the span matches and the text matches
///
/// `same_location` is used to check if the location of the identifier is
/// the same as the other
pub fn equals_ident(&self, other: &syn::Ident, match_location: bool) -> bool {
*other == self.name
&& (!match_location
|| (self.range.start.line == other.span().start().line as u32
&& self.range.start.character == other.span().start().column as u32))
}

fn get_name(item: &CallHierarchyItem) -> String {
// open file, find range inside, extract text
let file = fs::read_to_string(item.uri.path()).unwrap();
let start = item.selection_range.start;
let end = item.selection_range.end;
file.lines()
.nth(start.line as usize)
.unwrap()
.chars()
.skip(start.character as usize)
.take(end.character as usize - start.character as usize)
.collect()
}
}

impl From<(PathBuf, syn::Ident)> for Identifier {
fn from((path, ident): (PathBuf, syn::Ident)) -> Self {
Self {
path: path.display().to_string(),
name: ident.to_string(),
// post_transform_name: None,
range: Range {
start: lsp_types::Position {
line: ident.span().start().line as u32 - 1,
character: ident.span().start().column as u32,
},
end: lsp_types::Position {
line: ident.span().end().line as u32 - 1,
character: ident.span().end().column as u32,
},
},
}
}
}

impl From<CallHierarchyIncomingCall> for IdentifierReference {
fn from(item: CallHierarchyIncomingCall) -> Self {
Self {
identifier: Identifier {
name: Identifier::get_name(&item.from),
// post_transform_name: Some(item.from.name),
path: item.from.uri.path().to_owned(),
range: item.from.selection_range,
},
references: item.from_ranges,
}
}
}

impl std::fmt::Debug for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
}
}

impl std::fmt::Display for Identifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}:{}#{}",
self.path,
self.range.start.line,
self.name.to_string(),
)
}
}

0 comments on commit b0c665b

Please sign in to comment.