use lua_patterns::LuaPattern; use pcre2::bytes::Regex; use crate::syntax::types::{ Rule, RegexOrPattern, PatternTypes, Syntax }; use crate::tokenizer::{ state::State, types::{ TokenKind, Token } }; pub struct Tokenizer<'t> { pub syntax: Syntax<'t>, syntax_tree: Vec>, state: State } impl <'t>Tokenizer<'t> { pub fn new(syntax: Syntax<'t>) -> Self { Tokenizer { syntax, syntax_tree: Vec::new(), state: State::new() } } fn push_token(&mut self, token_type: TokenKind<'t>, value: String) { match &self.syntax_tree.last() { Some(last_token) => { let mut pattern = LuaPattern::new("^%s*$"); if last_token.token_type == token_type || pattern.matches(&last_token.value) { self.syntax_tree.last_mut().unwrap().value.push_str(&value); } }, None => { self.syntax_tree.push(Token { token_type, value }); } } } /// 'offset' - Where to start matching /// 'at_start' - Forces the tokenizer to start matching at the start of text /// 'close' - Whether the rule is a closing one fn find_text(text: &str, rule: &mut Rule<'t>, offset: u8, at_start: bool, close: bool) { let rule_index = if close { 2 } else { 1 }; let target_pattern: &RegexOrPattern = match rule.pattern { PatternTypes::Type1(ref pattern) => &pattern, PatternTypes::Type2(ref pattern) => &pattern[rule_index], PatternTypes::Type3(ref pattern) => &pattern[rule_index] }; let target_as_string; match target_pattern { RegexOrPattern::Regex(regex) => { target_as_string = regex.as_str().to_string() }, RegexOrPattern::Pattern(pattern) => { target_as_string = pattern.as_string(); } } // Check if the pattern contains a "^" let anchor_check = LuaPattern::new("^%^").match_maybe(&target_as_string); rule.whole_line = match anchor_check { Some(_) => true, None => false }; if rule.whole_line { match &mut rule.pattern { PatternTypes::Type1(RegexOrPattern::Regex(regex)) => { *regex = Regex::new(&target_as_string).unwrap(); }, PatternTypes::Type1(RegexOrPattern::Pattern(pattern)) => { *pattern = LuaPattern::new(&target_as_string); }, _ => {} } } } pub fn tokenize() { } }