litelighter/src/tokenizer/tokenizer.rs
2022-11-22 00:24:23 +01:00

102 lines
2.7 KiB
Rust

use lua_patterns::LuaPattern;
use pcre2::bytes::Regex;
use crate::syntax::types::{
Rule,
RegexOrPattern,
PatternTypes,
Syntax
};
use crate::tokenizer::{
state::State,
types::{
TokenKind,
Token
}
};
pub struct Tokenizer<'t> {
pub syntax: Syntax<'t>,
syntax_tree: Vec<Token<'t>>,
state: State
}
impl <'t>Tokenizer<'t> {
pub fn new(syntax: Syntax<'t>) -> Self {
Tokenizer {
syntax,
syntax_tree: Vec::new(),
state: State::new()
}
}
fn push_token(&mut self, token_type: TokenKind<'t>, value: String) {
match &self.syntax_tree.last() {
Some(last_token) => {
let mut pattern = LuaPattern::new("^%s*$");
if last_token.token_type == token_type || pattern.matches(&last_token.value) {
self.syntax_tree.last_mut().unwrap().value.push_str(&value);
}
},
None => {
self.syntax_tree.push(Token {
token_type,
value
});
}
}
}
/// 'offset' - Where to start matching
/// 'at_start' - Forces the tokenizer to start matching at the start of text
/// 'close' - Whether the rule is a closing one
fn find_text(text: &str, rule: &mut Rule<'t>, offset: u8, at_start: bool, close: bool) {
let rule_index = if close { 2 } else { 1 };
let target_pattern: &RegexOrPattern = match rule.pattern {
PatternTypes::Type1(ref pattern) => &pattern,
PatternTypes::Type2(ref pattern) => &pattern[rule_index],
PatternTypes::Type3(ref pattern) => &pattern[rule_index]
};
let target_as_string;
match target_pattern {
RegexOrPattern::Regex(regex) => {
target_as_string = regex.as_str().to_string()
},
RegexOrPattern::Pattern(pattern) => {
target_as_string = pattern.as_string();
}
}
// Check if the pattern contains a "^"
let anchor_check = LuaPattern::new("^%^").match_maybe(&target_as_string);
rule.whole_line = match anchor_check {
Some(_) => true,
None => false
};
if rule.whole_line {
match &mut rule.pattern {
PatternTypes::Type1(RegexOrPattern::Regex(regex)) => {
*regex = Regex::new(&target_as_string).unwrap();
},
PatternTypes::Type1(RegexOrPattern::Pattern(pattern)) => {
*pattern = LuaPattern::new(&target_as_string);
},
_ => {}
}
}
}
pub fn tokenize() {
}
}