perf(lsp): Cache semantic tokens for open documents (#23799)

VScode will typically send a `textDocument/semanticTokens/full` request
followed by `textDocument/semanticTokens/range`, and occassionally
request semantic tokens even when we know nothing has changed. Semantic
tokens also get refreshed on each change. Computing semantic tokens is
relatively heavy in TSC, so we should avoid it as much as possible.

Caches the semantic tokens for open documents, to avoid making TSC do
unnecessary work. Results in a noticeable improvement in local
benchmarking

before:
```
Starting Deno benchmark
-> Start benchmarking lsp
   - Simple Startup/Shutdown 
      (10 runs, mean: 383ms)
   - Big Document/Several Edits 
      (5 runs, mean: 1079ms)
   - Find/Replace
      (10 runs, mean: 59ms)
   - Code Lens
      (10 runs, mean: 440ms)
   - deco-cx/apps Multiple Edits + Navigation
      (5 runs, mean: 9921ms)
<- End benchmarking lsp
```

after:
```
Starting Deno benchmark
-> Start benchmarking lsp
   - Simple Startup/Shutdown 
      (10 runs, mean: 395ms)
   - Big Document/Several Edits 
      (5 runs, mean: 1024ms)
   - Find/Replace
      (10 runs, mean: 56ms)
   - Code Lens
      (10 runs, mean: 438ms)
   - deco-cx/apps Multiple Edits + Navigation
      (5 runs, mean: 8927ms)
<- End benchmarking lsp
```
This commit is contained in:
Nathan Whitaker 2024-05-14 18:51:48 -07:00 committed by GitHub
parent 1a788b58a0
commit 36d877be4a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 420 additions and 3 deletions

View File

@ -143,6 +143,16 @@ impl AssetOrDocument {
}
}
pub fn maybe_semantic_tokens(&self) -> Option<lsp::SemanticTokens> {
match self {
AssetOrDocument::Asset(_) => None,
AssetOrDocument::Document(d) => d
.open_data
.as_ref()
.and_then(|d| d.maybe_semantic_tokens.lock().clone()),
}
}
pub fn text(&self) -> Arc<str> {
match self {
AssetOrDocument::Asset(a) => a.text(),
@ -249,6 +259,7 @@ fn get_maybe_test_module_fut(
pub struct DocumentOpenData {
lsp_version: i32,
maybe_parsed_source: Option<ParsedSourceResult>,
maybe_semantic_tokens: Arc<Mutex<Option<lsp::SemanticTokens>>>,
}
#[derive(Debug)]
@ -330,6 +341,7 @@ impl Document {
open_data: maybe_lsp_version.map(|v| DocumentOpenData {
lsp_version: v,
maybe_parsed_source,
maybe_semantic_tokens: Default::default(),
}),
resolver,
specifier,
@ -421,6 +433,8 @@ impl Document {
open_data: self.open_data.as_ref().map(|d| DocumentOpenData {
lsp_version: d.lsp_version,
maybe_parsed_source,
// reset semantic tokens
maybe_semantic_tokens: Default::default(),
}),
resolver,
specifier: self.specifier.clone(),
@ -499,6 +513,7 @@ impl Document {
open_data: self.open_data.is_some().then_some(DocumentOpenData {
lsp_version: version,
maybe_parsed_source,
maybe_semantic_tokens: Default::default(),
}),
resolver: self.resolver.clone(),
}))
@ -652,6 +667,15 @@ impl Document {
) {
*self.maybe_navigation_tree.lock() = Some(navigation_tree);
}
pub fn cache_semantic_tokens_full(
&self,
semantic_tokens: lsp::SemanticTokens,
) {
if let Some(open_data) = self.open_data.as_ref() {
*open_data.maybe_semantic_tokens.lock() = Some(semantic_tokens);
}
}
}
fn resolve_media_type(

View File

@ -2529,6 +2529,16 @@ impl Inner {
.performance
.mark_with_args("lsp.semantic_tokens_full", &params);
let asset_or_doc = self.get_asset_or_document(&specifier)?;
if let Some(tokens) = asset_or_doc.maybe_semantic_tokens() {
let response = if !tokens.data.is_empty() {
Some(SemanticTokensResult::Tokens(tokens.clone()))
} else {
None
};
self.performance.measure(mark);
return Ok(response);
}
let line_index = asset_or_doc.line_index();
let semantic_classification = self
@ -2542,6 +2552,11 @@ impl Inner {
let semantic_tokens =
semantic_classification.to_semantic_tokens(line_index)?;
if let Some(doc) = asset_or_doc.document() {
doc.cache_semantic_tokens_full(semantic_tokens.clone());
}
let response = if !semantic_tokens.data.is_empty() {
Some(SemanticTokensResult::Tokens(semantic_tokens))
} else {
@ -2566,6 +2581,18 @@ impl Inner {
.performance
.mark_with_args("lsp.semantic_tokens_range", &params);
let asset_or_doc = self.get_asset_or_document(&specifier)?;
if let Some(tokens) = asset_or_doc.maybe_semantic_tokens() {
let tokens =
super::semantic_tokens::tokens_within_range(&tokens, params.range);
let response = if !tokens.data.is_empty() {
Some(SemanticTokensRangeResult::Tokens(tokens))
} else {
None
};
self.performance.measure(mark);
return Ok(response);
}
let line_index = asset_or_doc.line_index();
let semantic_classification = self

View File

@ -7,6 +7,7 @@
use std::ops::Index;
use std::ops::IndexMut;
use tower_lsp::lsp_types as lsp;
use tower_lsp::lsp_types::SemanticToken;
use tower_lsp::lsp_types::SemanticTokenModifier;
use tower_lsp::lsp_types::SemanticTokenType;
@ -247,6 +248,54 @@ impl SemanticTokensBuilder {
}
}
pub fn tokens_within_range(
tokens: &SemanticTokens,
range: lsp::Range,
) -> SemanticTokens {
let mut line = 0;
let mut character = 0;
let mut first_token_line = 0;
let mut first_token_char = 0;
let mut keep_start_idx = tokens.data.len();
let mut keep_end_idx = keep_start_idx;
for (i, token) in tokens.data.iter().enumerate() {
if token.delta_line != 0 {
character = 0;
}
line += token.delta_line;
character += token.delta_start;
let token_start = lsp::Position::new(line, character);
if i < keep_start_idx && token_start >= range.start {
keep_start_idx = i;
first_token_line = line;
first_token_char = character;
}
if token_start > range.end {
keep_end_idx = i;
break;
}
}
if keep_end_idx == keep_start_idx {
return SemanticTokens {
result_id: None,
data: Vec::new(),
};
}
let mut data = tokens.data[keep_start_idx..keep_end_idx].to_vec();
// we need to adjust the delta_line and delta_start on the first token
// as it is relative to 0 now, not the previous token
let first_token = &mut data[0];
first_token.delta_line = first_token_line;
first_token.delta_start = first_token_char;
SemanticTokens {
result_id: None,
data,
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -352,4 +401,129 @@ mod tests {
]
);
}
#[test]
fn test_tokens_within_range() {
let mut builder = SemanticTokensBuilder::new();
builder.push(1, 0, 5, 0, 0);
builder.push(2, 1, 1, 1, 0);
builder.push(2, 2, 3, 2, 0);
builder.push(2, 5, 5, 3, 0);
builder.push(3, 0, 4, 4, 0);
builder.push(5, 2, 3, 5, 0);
let tokens = builder.build(None);
let range = lsp::Range {
start: lsp::Position {
line: 2,
character: 2,
},
end: lsp::Position {
line: 4,
character: 0,
},
};
let result = tokens_within_range(&tokens, range);
assert_eq!(
result.data,
vec![
// line 2 char 2
SemanticToken {
delta_line: 2,
delta_start: 2,
length: 3,
token_type: 2,
token_modifiers_bitset: 0
},
// line 2 char 5
SemanticToken {
delta_line: 0,
delta_start: 3,
length: 5,
token_type: 3,
token_modifiers_bitset: 0
},
// line 3 char 0
SemanticToken {
delta_line: 1,
delta_start: 0,
length: 4,
token_type: 4,
token_modifiers_bitset: 0
}
]
);
}
#[test]
fn test_tokens_within_range_include_end() {
let mut builder = SemanticTokensBuilder::new();
builder.push(1, 0, 1, 0, 0);
builder.push(2, 1, 2, 1, 0);
builder.push(2, 3, 3, 2, 0);
builder.push(3, 0, 4, 3, 0);
let tokens = builder.build(None);
let range = lsp::Range {
start: lsp::Position {
line: 2,
character: 2,
},
end: lsp::Position {
line: 3,
character: 4,
},
};
let result = tokens_within_range(&tokens, range);
assert_eq!(
result.data,
vec![
// line 2 char 3
SemanticToken {
delta_line: 2,
delta_start: 3,
length: 3,
token_type: 2,
token_modifiers_bitset: 0
},
// line 3 char 0
SemanticToken {
delta_line: 1,
delta_start: 0,
length: 4,
token_type: 3,
token_modifiers_bitset: 0
}
]
);
}
#[test]
fn test_tokens_within_range_empty() {
let mut builder = SemanticTokensBuilder::new();
builder.push(1, 0, 1, 0, 0);
builder.push(2, 1, 2, 1, 0);
builder.push(2, 3, 3, 2, 0);
builder.push(3, 0, 4, 3, 0);
let tokens = builder.build(None);
let range = lsp::Range {
start: lsp::Position {
line: 3,
character: 2,
},
end: lsp::Position {
line: 3,
character: 4,
},
};
let result = tokens_within_range(&tokens, range);
assert_eq!(result.data, vec![]);
assert_eq!(
tokens_within_range(&SemanticTokens::default(), range).data,
vec![]
);
}
}

View File

@ -12698,3 +12698,87 @@ fn lsp_ts_code_fix_any_param() {
panic!("failed to find 'Infer parameter types from usage' fix in fixes: {fixes:#?}");
}
#[test]
fn lsp_semantic_token_caching() {
let context = TestContextBuilder::new().use_temp_cwd().build();
let temp_dir = context.temp_dir().path();
let mut client: LspClient = context
.new_lsp_command()
.collect_perf()
.set_root_dir(temp_dir.clone())
.build();
client.initialize_default();
let a = source_file(
temp_dir.join("a.ts"),
r#"
export const a = 1;
export const b = 2;
export const bar = () => "bar";
function foo(fun: (number, number, number) => number, c: number) {
const double = (x) => x * 2;
return fun(double(a), b, c);
}"#,
);
client.did_open_file(&a);
// requesting a range won't cache the tokens, so this will
// be computed
let res = client.write_request(
"textDocument/semanticTokens/range",
json!({
"textDocument": a.identifier(),
"range": {
"start": a.range_of("const bar").start,
"end": a.range_of("}").end,
}
}),
);
assert_eq!(
client
.perf()
.measure_count("tsc.request.getEncodedSemanticClassifications"),
1,
);
// requesting for the full doc should compute and cache the tokens
let _full = client.write_request(
"textDocument/semanticTokens/full",
json!({
"textDocument": a.identifier(),
}),
);
assert_eq!(
client
.perf()
.measure_count("tsc.request.getEncodedSemanticClassifications"),
2,
);
// use the cached tokens
let res_cached = client.write_request(
"textDocument/semanticTokens/range",
json!({
"textDocument": a.identifier(),
"range": {
"start": a.range_of("const bar").start,
"end": a.range_of("}").end,
}
}),
);
// make sure we actually used the cache
assert_eq!(
client
.perf()
.measure_count("tsc.request.getEncodedSemanticClassifications"),
2,
);
assert_eq!(res, res_cached);
}

View File

@ -470,6 +470,7 @@ pub struct LspClientBuilder {
use_diagnostic_sync: bool,
deno_dir: TempDir,
envs: HashMap<OsString, OsString>,
collect_perf: bool,
}
impl LspClientBuilder {
@ -488,6 +489,7 @@ impl LspClientBuilder {
use_diagnostic_sync: true,
deno_dir,
envs: Default::default(),
collect_perf: false,
}
}
@ -514,6 +516,15 @@ impl LspClientBuilder {
self
}
/// Whether to collect performance records (marks / measures, as emitted
/// by the lsp in the `performance` module).
/// Implies `capture_stderr`.
pub fn collect_perf(mut self) -> Self {
self.capture_stderr = true;
self.collect_perf = true;
self
}
/// Whether to use the synchronization messages to better sync diagnostics
/// between the test client and server.
pub fn use_diagnostic_sync(mut self, value: bool) -> Self {
@ -577,10 +588,12 @@ impl LspClientBuilder {
let stdin = child.stdin.take().unwrap();
let writer = io::BufWriter::new(stdin);
let stderr_lines_rx = if self.capture_stderr {
let (stderr_lines_rx, perf_rx) = if self.capture_stderr {
let stderr = child.stderr.take().unwrap();
let print_stderr = self.print_stderr;
let (tx, rx) = mpsc::channel::<String>();
let (perf_tx, perf_rx) =
self.collect_perf.then(mpsc::channel::<PerfRecord>).unzip();
std::thread::spawn(move || {
let stderr = BufReader::new(stderr);
for line in stderr.lines() {
@ -589,6 +602,22 @@ impl LspClientBuilder {
if print_stderr {
eprintln!("{}", line);
}
if let Some(tx) = perf_tx.as_ref() {
// look for perf records
if line.starts_with('{') && line.ends_with("},") {
match serde_json::from_str::<PerfRecord>(
line.trim_end_matches(','),
) {
Ok(record) => {
tx.send(record).unwrap();
continue;
}
Err(err) => {
eprintln!("failed to parse perf record: {:#}", err);
}
}
}
}
tx.send(line).unwrap();
}
Err(err) => {
@ -597,9 +626,9 @@ impl LspClientBuilder {
}
}
});
Some(rx)
(Some(rx), perf_rx)
} else {
None
(None, None)
};
Ok(LspClient {
@ -613,10 +642,76 @@ impl LspClientBuilder {
stderr_lines_rx,
config: json!("{}"),
supports_workspace_configuration: false,
perf: perf_rx.map(Perf::new),
})
}
}
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "camelCase", tag = "type")]
/// A performance record, emitted by the `lsp::performance`
/// module.
pub enum PerfRecord {
Mark(PerfMark),
Measure(PerfMeasure),
}
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub struct PerfMeasure {
name: String,
count: u32,
duration: f64,
}
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "camelCase")]
pub struct PerfMark {
name: String,
#[serde(default)]
count: Option<u32>,
#[serde(default)]
args: Option<Value>,
}
#[derive(Debug)]
pub struct Perf {
records: Vec<PerfRecord>,
measures_counts: HashMap<String, u32>,
rx: mpsc::Receiver<PerfRecord>,
}
impl Perf {
fn new(rx: mpsc::Receiver<PerfRecord>) -> Self {
Self {
records: Default::default(),
measures_counts: Default::default(),
rx,
}
}
fn drain(&mut self) {
while let Ok(record) = self.rx.try_recv() {
if let PerfRecord::Measure(measure) = &record {
*self
.measures_counts
.entry(measure.name.clone())
.or_default() += 1;
}
self.records.push(record);
}
}
pub fn measures(&self) -> impl IntoIterator<Item = &PerfMeasure> {
self.records.iter().filter_map(|record| match record {
PerfRecord::Measure(measure) => Some(measure),
_ => None,
})
}
pub fn measure_count(&self, name: &str) -> u32 {
self.measures_counts.get(name).copied().unwrap_or_default()
}
}
pub struct LspClient {
child: Child,
reader: LspStdoutReader,
@ -628,6 +723,7 @@ pub struct LspClient {
stderr_lines_rx: Option<mpsc::Receiver<String>>,
config: serde_json::Value,
supports_workspace_configuration: bool,
perf: Option<Perf>,
}
impl Drop for LspClient {
@ -661,6 +757,15 @@ impl LspClient {
self.reader.pending_len()
}
pub fn perf(&mut self) -> &Perf {
let perf = self
.perf
.as_mut()
.expect("must setup with client_builder.collect_perf()");
perf.drain();
perf
}
#[track_caller]
pub fn wait_until_stderr_line(
&self,
@ -733,6 +838,9 @@ impl LspClient {
"tlsCertificate": null,
"unsafelyIgnoreCertificateErrors": null,
"unstable": false,
// setting this causes performance records to be logged
// to stderr
"internalDebug": self.perf.is_some(),
} }),
)
}