From 207c25ad2f8b737d8a4d5fd5e416cadcc621a93b Mon Sep 17 00:00:00 2001 From: Steve Donovan Date: Tue, 30 May 2017 14:20:12 +0200 Subject: [PATCH] 0.2.0 candidate: static verification of Lua string patterns --- Cargo.toml | 4 ++- readme.md | 13 +++++--- src/lib.rs | 84 ++++++++++++++++++++++++++++++++++++++++-------- src/lua-str.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 169 insertions(+), 21 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7652a1a..17f77a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lua-patterns" -version = "0.1.1" +version = "0.2.0" authors = ["steve donovan "] description = "Binding to Lua String Patterns" build = "build.rs" @@ -10,6 +10,8 @@ documentation = "https://docs.rs/lua-patterns" keywords = ["string","matching","lua"] +categories = ["parsing","api-bindings"] + [build-dependencies] gcc="0.3" diff --git a/readme.md b/readme.md index 7f056c9..35b0545 100644 --- a/readme.md +++ b/readme.md @@ -37,7 +37,10 @@ assert_eq!(r.start, 6); assert_eq!(r.end, 9); ``` This not in itself impressive, since it can be done with the string `find` -method, but once we start using patterns it gets more exciting, especially +method. (`new` will panic if you feed it a bad pattern, so use `new_try` if +you want more control.) + +Once we start using patterns it gets more exciting, especially with _captures_: ```rust @@ -202,8 +205,8 @@ let patt = LuaPatternBuilder::new() let mut m = LuaPattern::from_bytes(&patt); // picks up "DE2424BE" ``` +> Static verification: this version attempts to verify string patterns. If you +> want errors, use `new_try` and `from_bytes_try`, otherwise the constructors panic. +> If a match panics after successful verification, it is a __BUG__ - please +> report the offending pattern. -> **PANICKING** Currently this library will behave badly and panic -> if the Lua pattern is malformed. There is no compilation step, -> unlike regexps, but I intend to provide a static validation -> to convert panics into errors, as good practice demands. diff --git a/src/lib.rs b/src/lib.rs index 9741acc..fbad099 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,8 @@ //! [the README](https://github.com/stevedonovan/lua-patterns/blob/master/readme.md) //! for more discussion. //! +//! [LuaPattern](struct.LuaPattern.html) implements the public API. +//! //! ## Examples //! //! ```rust @@ -52,6 +54,24 @@ struct LuaMatch { static LUA_MAXCAPTURES: usize = 32; +use std::fmt; +use std::error::Error; + +#[derive(Debug,PartialEq)] +pub struct PatternError(pub String); + +impl fmt::Display for PatternError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f,"{}",self.0) + } +} + +impl Error for PatternError { + fn description(&self) -> &str { + &self.0 + } +} + #[link(name = "lua-str", kind="static")] extern { fn str_match ( @@ -59,6 +79,10 @@ extern { err_msg: *mut *mut c_char, mm: *mut LuaMatch ) -> c_int; + + fn str_check ( + p: *const u8, lp: c_uint + ) -> *const i8; } /// Represents a Lua string pattern and the results of a match @@ -69,17 +93,34 @@ pub struct LuaPattern<'a> { } impl <'a> LuaPattern<'a> { - /// Create a new Lua pattern from a string - pub fn new(patt: &'a str) -> LuaPattern<'a> { - LuaPattern::from_bytes(patt.as_bytes()) - } - - /// Create a new Lua pattern from a slice of bytes - pub fn from_bytes (bytes: &'a [u8]) -> LuaPattern<'a> { + /// Maybe create a new Lua pattern from a slice of bytes + pub fn from_bytes_try (bytes: &'a [u8]) -> Result,PatternError> { let mut matches: Vec = Vec::with_capacity(LUA_MAXCAPTURES); + unsafe { + let res = str_check(bytes.as_ptr(),bytes.len() as c_uint); + if ! res.is_null() { + let sres = CStr::from_ptr(res).to_str().unwrap().to_string(); + return Err(PatternError(sres)); + } + } unsafe {matches.set_len(LUA_MAXCAPTURES);} - LuaPattern{patt: bytes, matches: matches, n_match: 0} + Ok(LuaPattern{patt: bytes, matches: matches, n_match: 0}) } + + /// Maybe create a new Lua pattern from a string + pub fn new_try(patt: &'a str) -> Result,PatternError> { + LuaPattern::from_bytes_try(patt.as_bytes()) + } + + /// Create a new Lua pattern from a string, panicking if bad + pub fn new(patt: &'a str) -> LuaPattern<'a> { + LuaPattern::new_try(patt).expect("bad pattern") + } + + /// Create a new Lua pattern from a slice of bytes, panicking if bad + pub fn from_bytes (bytes: &'a [u8]) -> LuaPattern<'a> { + LuaPattern::from_bytes_try(bytes).expect("bad pattern") + } /// Match a slice of bytes with a pattern /// @@ -101,7 +142,7 @@ impl <'a> LuaPattern<'a> { err_msg, self.matches.as_mut_ptr()) as usize; let ep = *err_msg; if ! ep.is_null() { - panic!(format!("lua-pattern {:?}",CStr::from_ptr(ep))); + panic!(format!("REPORT AS BUG: lua-pattern {:?}",CStr::from_ptr(ep))); } } @@ -687,8 +728,6 @@ mod tests { assert_eq!(iter.next().unwrap().get(1), "one"); assert_eq!(iter.next().unwrap().get(1), "two"); assert_eq!(iter.next().unwrap().get(1), "three"); - - } #[test] @@ -719,8 +758,25 @@ mod tests { let mut m = LuaPattern::new("(%S+)%s*=%s*(%S+);%s*"); let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 "); assert_eq!(res,"'2':a '3':b '4':c "); - - - + } + + #[test] + fn bad_patterns() { + let bad = [ + ("bonzo %","malformed pattern (ends with '%')"), + ("bonzo (dog%(","unfinished capture"), + ("alles [%a%[","malformed pattern (missing ']')"), + ("bonzo (dog (cat)","unfinished capture"), + ("frodo %f[%A","malformed pattern (missing ']')"), + ("frodo (1) (2(3)%2)%1","invalid capture index %2"), + ]; + for p in bad.iter() { + let res = LuaPattern::new_try(p.0); + if let Err(e) = res { + assert_eq!(e, PatternError(p.1.into())); + } else { + panic!("false positive"); + } + } } } diff --git a/src/lua-str.c b/src/lua-str.c index 0ca62f0..42d3b6d 100644 --- a/src/lua-str.c +++ b/src/lua-str.c @@ -280,7 +280,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) { const char *ep; char previous; p += 2; if (*p != '[') - throw_error(ms,"missing '[' after '%f' in pattern"); + throw_error(ms,"missing '[' after '%%f' in pattern"); ep = classend(ms, p); /* points to what is next */ previous = (s == ms->src_init) ? '\0' : *(s - 1); if (!matchbracketclass(uchar(previous), p, ep - 1) && @@ -412,3 +412,90 @@ int str_match (const char *s, unsigned int ls, const char *p, unsigned int lp, c return 0; } +static void str_match_check(MatchState *ms, const char *p) { + char ch; + int level_stack[LUA_MAXCAPTURES]; + int stack_idx = 0; + int current_level = 0; + while (p < ms->p_end && (ch=*p++)) { + switch (ch) { + case L_ESC: { + switch ((ch=*p++)) { + case 'b': { + p++; + if (p >= ms->p_end) throw_error(ms,"malformed pattern " + "(missing arguments to '%b')"); + } break; + case 'f': { + if (*p != '[') throw_error(ms,"missing '[' after '%%f' in pattern"); + --p; // so we see [...] + } break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': { + int l = uchar(ch) - '1'; // + if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) + throw_error(ms,"invalid capture index %%%d", l + 1); + --p; + } break; + } + } break; + case '[': { + do { /* look for a `]' */ + if (p == ms->p_end) + throw_error(ms,"malformed pattern (missing ']')"); + if (*(p++) == L_ESC && p < ms->p_end) + p++; /* skip escapes (e.g. `%]') */ + } while (*p != ']'); + } break; + case '(': { + if (*p != ')') { /* not a position capture */ + level_stack[stack_idx++] = ms->level; + ms->capture[ms->level].len = CAP_UNFINISHED; + ms->level ++; /* level counts total number of captures */ + if (ms->level >= LUA_MAXCAPTURES) throw_error(ms,"too many captures"); + } else { + ++p; + } + } break; + case ')': { + if (stack_idx == 0) + throw_error(ms, "no open capture"); + ms->capture[level_stack[--stack_idx]].len = CAP_POSITION; + } break; + default: { + + } + } + } + if (stack_idx > 0) { + throw_error(ms,"unfinished capture"); + } +} + +const char *str_check (const char *p, unsigned int lp) { + MatchState ms; + int anchor = (*p == '^'); + if (anchor) { + p++; /* skip anchor character */ + } + + memset(ms.msg_buff,0,sizeof(ms.msg_buff)); + + if (setjmp(ms.jump_buf) != 0) { + return strdup(ms.msg_buff); + } + + ms.level = 0; + ms.matchdepth = MAXCCALLS; + ms.p_end = p + lp; + + if ( *(ms.p_end-1) == '%') { + throw_error(&ms,"malformed pattern (ends with '%')"); + } + + str_match_check(&ms,p); + + return NULL; +} +