102 lines
2.7 KiB
Rust
102 lines
2.7 KiB
Rust
|
use lua_patterns::LuaPattern;
|
||
|
use pcre2::bytes::Regex;
|
||
|
|
||
|
use crate::syntax::types::{
|
||
|
Rule,
|
||
|
RegexOrPattern,
|
||
|
PatternTypes,
|
||
|
Syntax
|
||
|
};
|
||
|
|
||
|
use crate::tokenizer::{
|
||
|
state::State,
|
||
|
types::{
|
||
|
TokenKind,
|
||
|
Token
|
||
|
}
|
||
|
};
|
||
|
|
||
|
pub struct Tokenizer<'t> {
|
||
|
pub syntax: Syntax<'t>,
|
||
|
syntax_tree: Vec<Token<'t>>,
|
||
|
state: State
|
||
|
}
|
||
|
|
||
|
impl <'t>Tokenizer<'t> {
|
||
|
pub fn new(syntax: Syntax<'t>) -> Self {
|
||
|
Tokenizer {
|
||
|
syntax,
|
||
|
syntax_tree: Vec::new(),
|
||
|
state: State::new()
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn push_token(&mut self, token_type: TokenKind<'t>, value: String) {
|
||
|
match &self.syntax_tree.last() {
|
||
|
Some(last_token) => {
|
||
|
let mut pattern = LuaPattern::new("^%s*$");
|
||
|
|
||
|
if last_token.token_type == token_type || pattern.matches(&last_token.value) {
|
||
|
self.syntax_tree.last_mut().unwrap().value.push_str(&value);
|
||
|
}
|
||
|
},
|
||
|
|
||
|
None => {
|
||
|
self.syntax_tree.push(Token {
|
||
|
token_type,
|
||
|
value
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/// 'offset' - Where to start matching
|
||
|
/// 'at_start' - Forces the tokenizer to start matching at the start of text
|
||
|
/// 'close' - Whether the rule is a closing one
|
||
|
fn find_text(text: &str, rule: &mut Rule<'t>, offset: u8, at_start: bool, close: bool) {
|
||
|
let rule_index = if close { 2 } else { 1 };
|
||
|
let target_pattern: &RegexOrPattern = match rule.pattern {
|
||
|
PatternTypes::Type1(ref pattern) => &pattern,
|
||
|
PatternTypes::Type2(ref pattern) => &pattern[rule_index],
|
||
|
PatternTypes::Type3(ref pattern) => &pattern[rule_index]
|
||
|
};
|
||
|
|
||
|
let target_as_string;
|
||
|
|
||
|
match target_pattern {
|
||
|
RegexOrPattern::Regex(regex) => {
|
||
|
target_as_string = regex.as_str().to_string()
|
||
|
},
|
||
|
RegexOrPattern::Pattern(pattern) => {
|
||
|
target_as_string = pattern.as_string();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Check if the pattern contains a "^"
|
||
|
let anchor_check = LuaPattern::new("^%^").match_maybe(&target_as_string);
|
||
|
|
||
|
rule.whole_line = match anchor_check {
|
||
|
Some(_) => true,
|
||
|
None => false
|
||
|
};
|
||
|
|
||
|
if rule.whole_line {
|
||
|
match &mut rule.pattern {
|
||
|
PatternTypes::Type1(RegexOrPattern::Regex(regex)) => {
|
||
|
*regex = Regex::new(&target_as_string).unwrap();
|
||
|
},
|
||
|
PatternTypes::Type1(RegexOrPattern::Pattern(pattern)) => {
|
||
|
*pattern = LuaPattern::new(&target_as_string);
|
||
|
},
|
||
|
_ => {}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pub fn tokenize() {
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|