perf(lsp): Cache semantic tokens for open documents (#23799)

VScode will typically send a `textDocument/semanticTokens/full` request followed by `textDocument/semanticTokens/range`, and occassionally request semantic tokens even when we know nothing has changed. Semantic tokens also get refreshed on each change. Computing semantic tokens is relatively heavy in TSC, so we should avoid it as much as possible. Caches the semantic tokens for open documents, to avoid making TSC do unnecessary work. Results in a noticeable improvement in local benchmarking before: ``` Starting Deno benchmark -> Start benchmarking lsp - Simple Startup/Shutdown (10 runs, mean: 383ms) - Big Document/Several Edits (5 runs, mean: 1079ms) - Find/Replace (10 runs, mean: 59ms) - Code Lens (10 runs, mean: 440ms) - deco-cx/apps Multiple Edits + Navigation (5 runs, mean: 9921ms) <- End benchmarking lsp ``` after: ``` Starting Deno benchmark -> Start benchmarking lsp - Simple Startup/Shutdown (10 runs, mean: 395ms) - Big Document/Several Edits (5 runs, mean: 1024ms) - Find/Replace (10 runs, mean: 56ms) - Code Lens (10 runs, mean: 438ms) - deco-cx/apps Multiple Edits + Navigation (5 runs, mean: 8927ms) <- End benchmarking lsp ```
2024-11-21 20:38:55 +00:00 · 2024-05-14 18:51:48 -07:00 · 2024-05-14 18:51:48 -07:00 · 36d877be4a
commit 36d877be4a
parent 1a788b58a0
5 changed files with 420 additions and 3 deletions
--- a/cli/lsp/documents.rs
+++ b/cli/lsp/documents.rs
@ -143,6 +143,16 @@ impl AssetOrDocument {
    }
  }

+  pub fn maybe_semantic_tokens(&self) -> Option<lsp::SemanticTokens> {
+    match self {
+      AssetOrDocument::Asset(_) => None,
+      AssetOrDocument::Document(d) => d
+        .open_data
+        .as_ref()
+        .and_then(|d| d.maybe_semantic_tokens.lock().clone()),
+    }
+  }
+
  pub fn text(&self) -> Arc<str> {
    match self {
      AssetOrDocument::Asset(a) => a.text(),
@ -249,6 +259,7 @@ fn get_maybe_test_module_fut(
 pub struct DocumentOpenData {
  lsp_version: i32,
  maybe_parsed_source: Option<ParsedSourceResult>,
+  maybe_semantic_tokens: Arc<Mutex<Option<lsp::SemanticTokens>>>,
 }

 #[derive(Debug)]
@ -330,6 +341,7 @@ impl Document {
      open_data: maybe_lsp_version.map(|v| DocumentOpenData {
        lsp_version: v,
        maybe_parsed_source,
+        maybe_semantic_tokens: Default::default(),
      }),
      resolver,
      specifier,
@ -421,6 +433,8 @@ impl Document {
      open_data: self.open_data.as_ref().map(|d| DocumentOpenData {
        lsp_version: d.lsp_version,
        maybe_parsed_source,
+        // reset semantic tokens
+        maybe_semantic_tokens: Default::default(),
      }),
      resolver,
      specifier: self.specifier.clone(),
@ -499,6 +513,7 @@ impl Document {
      open_data: self.open_data.is_some().then_some(DocumentOpenData {
        lsp_version: version,
        maybe_parsed_source,
+        maybe_semantic_tokens: Default::default(),
      }),
      resolver: self.resolver.clone(),
    }))
@ -652,6 +667,15 @@ impl Document {
  ) {
    *self.maybe_navigation_tree.lock() = Some(navigation_tree);
  }
+
+  pub fn cache_semantic_tokens_full(
+    &self,
+    semantic_tokens: lsp::SemanticTokens,
+  ) {
+    if let Some(open_data) = self.open_data.as_ref() {
+      *open_data.maybe_semantic_tokens.lock() = Some(semantic_tokens);
+    }
+  }
 }

 fn resolve_media_type(
--- a/cli/lsp/language_server.rs
+++ b/cli/lsp/language_server.rs
@ -2529,6 +2529,16 @@ impl Inner {
      .performance
      .mark_with_args("lsp.semantic_tokens_full", &params);
    let asset_or_doc = self.get_asset_or_document(&specifier)?;
+    if let Some(tokens) = asset_or_doc.maybe_semantic_tokens() {
+      let response = if !tokens.data.is_empty() {
+        Some(SemanticTokensResult::Tokens(tokens.clone()))
+      } else {
+        None
+      };
+      self.performance.measure(mark);
+      return Ok(response);
+    }
+
    let line_index = asset_or_doc.line_index();

    let semantic_classification = self
@ -2542,6 +2552,11 @@ impl Inner {

    let semantic_tokens =
      semantic_classification.to_semantic_tokens(line_index)?;
+
+    if let Some(doc) = asset_or_doc.document() {
+      doc.cache_semantic_tokens_full(semantic_tokens.clone());
+    }
+
    let response = if !semantic_tokens.data.is_empty() {
      Some(SemanticTokensResult::Tokens(semantic_tokens))
    } else {
@ -2566,6 +2581,18 @@ impl Inner {
      .performance
      .mark_with_args("lsp.semantic_tokens_range", &params);
    let asset_or_doc = self.get_asset_or_document(&specifier)?;
+    if let Some(tokens) = asset_or_doc.maybe_semantic_tokens() {
+      let tokens =
+        super::semantic_tokens::tokens_within_range(&tokens, params.range);
+      let response = if !tokens.data.is_empty() {
+        Some(SemanticTokensRangeResult::Tokens(tokens))
+      } else {
+        None
+      };
+      self.performance.measure(mark);
+      return Ok(response);
+    }
+
    let line_index = asset_or_doc.line_index();

    let semantic_classification = self
--- a/cli/lsp/semantic_tokens.rs
+++ b/cli/lsp/semantic_tokens.rs
@ -7,6 +7,7 @@

 use std::ops::Index;
 use std::ops::IndexMut;
+use tower_lsp::lsp_types as lsp;
 use tower_lsp::lsp_types::SemanticToken;
 use tower_lsp::lsp_types::SemanticTokenModifier;
 use tower_lsp::lsp_types::SemanticTokenType;
@ -247,6 +248,54 @@ impl SemanticTokensBuilder {
  }
 }

+pub fn tokens_within_range(
+  tokens: &SemanticTokens,
+  range: lsp::Range,
+) -> SemanticTokens {
+  let mut line = 0;
+  let mut character = 0;
+
+  let mut first_token_line = 0;
+  let mut first_token_char = 0;
+  let mut keep_start_idx = tokens.data.len();
+  let mut keep_end_idx = keep_start_idx;
+  for (i, token) in tokens.data.iter().enumerate() {
+    if token.delta_line != 0 {
+      character = 0;
+    }
+    line += token.delta_line;
+    character += token.delta_start;
+    let token_start = lsp::Position::new(line, character);
+    if i < keep_start_idx && token_start >= range.start {
+      keep_start_idx = i;
+      first_token_line = line;
+      first_token_char = character;
+    }
+    if token_start > range.end {
+      keep_end_idx = i;
+      break;
+    }
+  }
+  if keep_end_idx == keep_start_idx {
+    return SemanticTokens {
+      result_id: None,
+      data: Vec::new(),
+    };
+  }
+
+  let mut data = tokens.data[keep_start_idx..keep_end_idx].to_vec();
+  // we need to adjust the delta_line and delta_start on the first token
+  // as it is relative to 0 now, not the previous token
+  let first_token = &mut data[0];
+  first_token.delta_line = first_token_line;
+  first_token.delta_start = first_token_char;
+
+  SemanticTokens {
+    result_id: None,
+    data,
+  }
+}
+
 #[cfg(test)]
 mod tests {
  use super::*;
@ -352,4 +401,129 @@ mod tests {
      ]
    );
  }
+
+  #[test]
+  fn test_tokens_within_range() {
+    let mut builder = SemanticTokensBuilder::new();
+    builder.push(1, 0, 5, 0, 0);
+    builder.push(2, 1, 1, 1, 0);
+    builder.push(2, 2, 3, 2, 0);
+    builder.push(2, 5, 5, 3, 0);
+    builder.push(3, 0, 4, 4, 0);
+    builder.push(5, 2, 3, 5, 0);
+    let tokens = builder.build(None);
+    let range = lsp::Range {
+      start: lsp::Position {
+        line: 2,
+        character: 2,
+      },
+      end: lsp::Position {
+        line: 4,
+        character: 0,
+      },
+    };
+
+    let result = tokens_within_range(&tokens, range);
+
+    assert_eq!(
+      result.data,
+      vec![
+        // line 2 char 2
+        SemanticToken {
+          delta_line: 2,
+          delta_start: 2,
+          length: 3,
+          token_type: 2,
+          token_modifiers_bitset: 0
+        },
+        // line 2 char 5
+        SemanticToken {
+          delta_line: 0,
+          delta_start: 3,
+          length: 5,
+          token_type: 3,
+          token_modifiers_bitset: 0
+        },
+        // line 3 char 0
+        SemanticToken {
+          delta_line: 1,
+          delta_start: 0,
+          length: 4,
+          token_type: 4,
+          token_modifiers_bitset: 0
+        }
+      ]
+    );
+  }
+
+  #[test]
+  fn test_tokens_within_range_include_end() {
+    let mut builder = SemanticTokensBuilder::new();
+    builder.push(1, 0, 1, 0, 0);
+    builder.push(2, 1, 2, 1, 0);
+    builder.push(2, 3, 3, 2, 0);
+    builder.push(3, 0, 4, 3, 0);
+    let tokens = builder.build(None);
+    let range = lsp::Range {
+      start: lsp::Position {
+        line: 2,
+        character: 2,
+      },
+      end: lsp::Position {
+        line: 3,
+        character: 4,
+      },
+    };
+    let result = tokens_within_range(&tokens, range);
+
+    assert_eq!(
+      result.data,
+      vec![
+        // line 2 char 3
+        SemanticToken {
+          delta_line: 2,
+          delta_start: 3,
+          length: 3,
+          token_type: 2,
+          token_modifiers_bitset: 0
+        },
+        // line 3 char 0
+        SemanticToken {
+          delta_line: 1,
+          delta_start: 0,
+          length: 4,
+          token_type: 3,
+          token_modifiers_bitset: 0
+        }
+      ]
+    );
+  }
+
+  #[test]
+  fn test_tokens_within_range_empty() {
+    let mut builder = SemanticTokensBuilder::new();
+    builder.push(1, 0, 1, 0, 0);
+    builder.push(2, 1, 2, 1, 0);
+    builder.push(2, 3, 3, 2, 0);
+    builder.push(3, 0, 4, 3, 0);
+    let tokens = builder.build(None);
+    let range = lsp::Range {
+      start: lsp::Position {
+        line: 3,
+        character: 2,
+      },
+      end: lsp::Position {
+        line: 3,
+        character: 4,
+      },
+    };
+    let result = tokens_within_range(&tokens, range);
+
+    assert_eq!(result.data, vec![]);
+
+    assert_eq!(
+      tokens_within_range(&SemanticTokens::default(), range).data,
+      vec![]
+    );
+  }
 }
--- a/tests/integration/lsp_tests.rs
+++ b/tests/integration/lsp_tests.rs
@ -12698,3 +12698,87 @@ fn lsp_ts_code_fix_any_param() {

  panic!("failed to find 'Infer parameter types from usage' fix in fixes: {fixes:#?}");
 }
+
+#[test]
+fn lsp_semantic_token_caching() {
+  let context = TestContextBuilder::new().use_temp_cwd().build();
+  let temp_dir = context.temp_dir().path();
+
+  let mut client: LspClient = context
+    .new_lsp_command()
+    .collect_perf()
+    .set_root_dir(temp_dir.clone())
+    .build();
+  client.initialize_default();
+
+  let a = source_file(
+    temp_dir.join("a.ts"),
+    r#"
+    export const a = 1;
+    export const b = 2;
+    export const bar = () => "bar";
+    function foo(fun: (number, number, number) => number, c: number) {
+      const double = (x) => x * 2;
+      return fun(double(a), b, c);
+    }"#,
+  );
+
+  client.did_open_file(&a);
+
+  // requesting a range won't cache the tokens, so this will
+  // be computed
+  let res = client.write_request(
+    "textDocument/semanticTokens/range",
+    json!({
+      "textDocument": a.identifier(),
+      "range": {
+        "start": a.range_of("const bar").start,
+        "end": a.range_of("}").end,
+      }
+    }),
+  );
+
+  assert_eq!(
+    client
+      .perf()
+      .measure_count("tsc.request.getEncodedSemanticClassifications"),
+    1,
+  );
+
+  // requesting for the full doc should compute and cache the tokens
+  let _full = client.write_request(
+    "textDocument/semanticTokens/full",
+    json!({
+      "textDocument": a.identifier(),
+    }),
+  );
+
+  assert_eq!(
+    client
+      .perf()
+      .measure_count("tsc.request.getEncodedSemanticClassifications"),
+    2,
+  );
+
+  // use the cached tokens
+  let res_cached = client.write_request(
+    "textDocument/semanticTokens/range",
+    json!({
+      "textDocument": a.identifier(),
+      "range": {
+        "start": a.range_of("const bar").start,
+        "end": a.range_of("}").end,
+      }
+    }),
+  );
+
+  // make sure we actually used the cache
+  assert_eq!(
+    client
+      .perf()
+      .measure_count("tsc.request.getEncodedSemanticClassifications"),
+    2,
+  );
+
+  assert_eq!(res, res_cached);
+}
--- a/tests/util/server/src/lsp.rs
+++ b/tests/util/server/src/lsp.rs
@ -470,6 +470,7 @@ pub struct LspClientBuilder {
  use_diagnostic_sync: bool,
  deno_dir: TempDir,
  envs: HashMap<OsString, OsString>,
+  collect_perf: bool,
 }

 impl LspClientBuilder {
@ -488,6 +489,7 @@ impl LspClientBuilder {
      use_diagnostic_sync: true,
      deno_dir,
      envs: Default::default(),
+      collect_perf: false,
    }
  }

@ -514,6 +516,15 @@ impl LspClientBuilder {
    self
  }

+  /// Whether to collect performance records (marks / measures, as emitted
+  /// by the lsp in the `performance` module).
+  /// Implies `capture_stderr`.
+  pub fn collect_perf(mut self) -> Self {
+    self.capture_stderr = true;
+    self.collect_perf = true;
+    self
+  }
+
  /// Whether to use the synchronization messages to better sync diagnostics
  /// between the test client and server.
  pub fn use_diagnostic_sync(mut self, value: bool) -> Self {
@ -577,10 +588,12 @@ impl LspClientBuilder {
    let stdin = child.stdin.take().unwrap();
    let writer = io::BufWriter::new(stdin);

-    let stderr_lines_rx = if self.capture_stderr {
+    let (stderr_lines_rx, perf_rx) = if self.capture_stderr {
      let stderr = child.stderr.take().unwrap();
      let print_stderr = self.print_stderr;
      let (tx, rx) = mpsc::channel::<String>();
+      let (perf_tx, perf_rx) =
+        self.collect_perf.then(mpsc::channel::<PerfRecord>).unzip();
      std::thread::spawn(move || {
        let stderr = BufReader::new(stderr);
        for line in stderr.lines() {
@ -589,6 +602,22 @@ impl LspClientBuilder {
              if print_stderr {
                eprintln!("{}", line);
              }
+              if let Some(tx) = perf_tx.as_ref() {
+                // look for perf records
+                if line.starts_with('{') && line.ends_with("},") {
+                  match serde_json::from_str::<PerfRecord>(
+                    line.trim_end_matches(','),
+                  ) {
+                    Ok(record) => {
+                      tx.send(record).unwrap();
+                      continue;
+                    }
+                    Err(err) => {
+                      eprintln!("failed to parse perf record: {:#}", err);
+                    }
+                  }
+                }
+              }
              tx.send(line).unwrap();
            }
            Err(err) => {
@ -597,9 +626,9 @@ impl LspClientBuilder {
          }
        }
      });
-      Some(rx)
+      (Some(rx), perf_rx)
    } else {
-      None
+      (None, None)
    };

    Ok(LspClient {
@ -613,10 +642,76 @@ impl LspClientBuilder {
      stderr_lines_rx,
      config: json!("{}"),
      supports_workspace_configuration: false,
+      perf: perf_rx.map(Perf::new),
    })
  }
 }

+#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
+#[serde(rename_all = "camelCase", tag = "type")]
+/// A performance record, emitted by the `lsp::performance`
+/// module.
+pub enum PerfRecord {
+  Mark(PerfMark),
+  Measure(PerfMeasure),
+}
+
+#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct PerfMeasure {
+  name: String,
+  count: u32,
+  duration: f64,
+}
+
+#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct PerfMark {
+  name: String,
+  #[serde(default)]
+  count: Option<u32>,
+  #[serde(default)]
+  args: Option<Value>,
+}
+
+#[derive(Debug)]
+pub struct Perf {
+  records: Vec<PerfRecord>,
+  measures_counts: HashMap<String, u32>,
+  rx: mpsc::Receiver<PerfRecord>,
+}
+
+impl Perf {
+  fn new(rx: mpsc::Receiver<PerfRecord>) -> Self {
+    Self {
+      records: Default::default(),
+      measures_counts: Default::default(),
+      rx,
+    }
+  }
+  fn drain(&mut self) {
+    while let Ok(record) = self.rx.try_recv() {
+      if let PerfRecord::Measure(measure) = &record {
+        *self
+          .measures_counts
+          .entry(measure.name.clone())
+          .or_default() += 1;
+      }
+      self.records.push(record);
+    }
+  }
+  pub fn measures(&self) -> impl IntoIterator<Item = &PerfMeasure> {
+    self.records.iter().filter_map(|record| match record {
+      PerfRecord::Measure(measure) => Some(measure),
+      _ => None,
+    })
+  }
+
+  pub fn measure_count(&self, name: &str) -> u32 {
+    self.measures_counts.get(name).copied().unwrap_or_default()
+  }
+}
+
 pub struct LspClient {
  child: Child,
  reader: LspStdoutReader,
@ -628,6 +723,7 @@ pub struct LspClient {
  stderr_lines_rx: Option<mpsc::Receiver<String>>,
  config: serde_json::Value,
  supports_workspace_configuration: bool,
+  perf: Option<Perf>,
 }

 impl Drop for LspClient {
@ -661,6 +757,15 @@ impl LspClient {
    self.reader.pending_len()
  }

+  pub fn perf(&mut self) -> &Perf {
+    let perf = self
+      .perf
+      .as_mut()
+      .expect("must setup with client_builder.collect_perf()");
+    perf.drain();
+    perf
+  }
+
  #[track_caller]
  pub fn wait_until_stderr_line(
    &self,
@ -733,6 +838,9 @@ impl LspClient {
        "tlsCertificate": null,
        "unsafelyIgnoreCertificateErrors": null,
        "unstable": false,
+        // setting this causes performance records to be logged
+        // to stderr
+        "internalDebug": self.perf.is_some(),
      } }),
    )
  }