Skip to content

Commit

Permalink
fix(server): support for unicode graphemes
Browse files Browse the repository at this point in the history
  • Loading branch information
Jamalam360 committed Apr 21, 2023
1 parent efee547 commit 83f5555
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 30 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion TEST_CASES.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ These are manual tests that should be performed to ensure the server is working
- Check that the unused rule diagnostic works, with and without the `pestIdeTools.alwaysUsedRuleNames` configuration.
- Check go to definition and find references works correctly.
- Check that renaming rules works as expected.
- Check the CJK characters example works. Attempt to hover over a CJK rule.
- Check the CJK characters example works. Attempt to hover over a CJK rule to see if the server crashes.

1 change: 1 addition & 0 deletions language-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tokio = { version = "1.13", features = ["full"] }
tower-lsp = "0.19"
unicode-segmentation = "1.10"
47 changes: 28 additions & 19 deletions language-server/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use tower_lsp::lsp_types::{
};

use pest_meta::parser::{self, Rule};
use unicode_segmentation::UnicodeSegmentation;

pub type Documents = HashMap<Url, TextDocumentItem>;
pub type Diagnostics = HashMap<Url, PublishDiagnosticsParams>;
Expand Down Expand Up @@ -98,35 +99,35 @@ pub trait FindWordRange {

impl FindWordRange for &str {
fn get_word_range_at_idx(self, search_idx: usize) -> std::ops::Range<usize> {
fn is_identifier(c: &char) -> bool {
fn is_identifier(c: char) -> bool {
!(c.is_whitespace()
|| *c == '*'
|| *c == '+'
|| *c == '?'
|| *c == '!'
|| *c == '&'
|| *c == '~'
|| *c == '{'
|| *c == '}'
|| *c == '['
|| *c == ']'
|| *c == '('
|| *c == ')')
|| c == '*'
|| c == '+'
|| c == '?'
|| c == '!'
|| c == '&'
|| c == '~'
|| c == '{'
|| c == '}'
|| c == '['
|| c == ']'
|| c == '('
|| c == ')')
}

let next = self[search_idx..]
.chars()
let next = str_range(&self, &(search_idx..self.len()))
.graphemes(true)
.enumerate()
.find(|(_index, char)| !is_identifier(char))
.find(|(_index, char)| !is_identifier(char.chars().next().unwrap_or(' ')))
.map(|(index, _char)| index)
.map(|index| search_idx + index)
.unwrap_or(self.len());

let preceding = self[0..search_idx]
.chars()
let preceding = str_range(&self, &(0..search_idx))
.graphemes(true)
.rev()
.enumerate()
.find(|(_index, char)| !is_identifier(char))
.find(|(_index, char)| !is_identifier(char.chars().next().unwrap_or(' ')))
.map(|(index, _char)| index)
.map(|index| search_idx - index)
.unwrap_or(0);
Expand All @@ -135,6 +136,14 @@ impl FindWordRange for &str {
}
}

/// Returns a string from a range of human characters (graphemes). Respects unicode.
pub fn str_range(s: &str, range: &std::ops::Range<usize>) -> String {
s.graphemes(true)
.skip(range.start)
.take(range.len())
.collect()
}

pub trait IntoDiagnostics {
fn into_diagnostics(self) -> Vec<Diagnostic>;
}
Expand Down
20 changes: 11 additions & 9 deletions language-server/src/lsp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use crate::{
analysis::{Analysis, RuleAnalysis},
config::Config,
helpers::{
create_empty_diagnostics, Diagnostics, Documents, FindWordRange, IntoDiagnostics,
IntoRangeWithLine,
create_empty_diagnostics, str_range, Diagnostics, Documents, FindWordRange,
IntoDiagnostics, IntoRangeWithLine,
},
};
use crate::{builtins::get_builtin_description, update_checker::check_for_updates};
Expand Down Expand Up @@ -338,7 +338,7 @@ impl PestLanguageServerImpl {
.nth(text_document_position.position.line as usize)
.unwrap_or("");
let range = line.get_word_range_at_idx(text_document_position.position.character as usize);
let partial_identifier = &line[range];
let partial_identifier = &str_range(line, &range);

if let Some(analysis) = self.analyses.get(&document.uri) {
return Ok(Some(CompletionResponse::Array(
Expand Down Expand Up @@ -374,7 +374,7 @@ impl PestLanguageServerImpl {
.unwrap_or("");
let range =
line.get_word_range_at_idx(text_document_position_params.position.character as usize);
let identifier = &line[range.clone()];
let identifier = &str_range(line, &range);

if let Some(description) = get_builtin_description(identifier) {
return Ok(Some(Hover {
Expand Down Expand Up @@ -413,8 +413,10 @@ impl PestLanguageServerImpl {
.lines()
.nth(text_document_position.position.line as usize)
.unwrap_or("");
let old_identifier =
&line[line.get_word_range_at_idx(text_document_position.position.character as usize)];
let old_identifier = &str_range(
line,
&line.get_word_range_at_idx(text_document_position.position.character as usize),
);
let mut edits = Vec::new();

if let Some(occurrences) = self
Expand Down Expand Up @@ -463,7 +465,7 @@ impl PestLanguageServerImpl {

let range =
line.get_word_range_at_idx(text_document_position_params.position.character as usize);
let identifier = &line[range];
let identifier = &str_range(line, &range);

if let Some(location) = self
.get_rule_analysis(&document.uri, identifier)
Expand Down Expand Up @@ -498,7 +500,7 @@ impl PestLanguageServerImpl {
.unwrap_or("");
let range =
line.get_word_range_at_idx(text_document_position_params.position.character as usize);
let identifier = &line[range];
let identifier = &str_range(line, &range);

if let Some(location) = self
.get_rule_analysis(&document.uri, identifier)
Expand Down Expand Up @@ -529,7 +531,7 @@ impl PestLanguageServerImpl {
.nth(text_document_position.position.line as usize)
.unwrap_or("");
let range = line.get_word_range_at_idx(text_document_position.position.character as usize);
let identifier = &line[range];
let identifier = &str_range(line, &range);

Ok(self
.get_rule_analysis(&document.uri, identifier)
Expand Down
3 changes: 2 additions & 1 deletion vscode/tests/cjk.pest
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// A test for CJK/non-ascii characters characters

root = { inner+ ~ "中文" }
root = { inner+ ~ "中文" ~ ANY }
/// No idea what these characters mean, just random ones.
inner = { "文" | "中" }
>

0 comments on commit 83f5555

Please sign in to comment.