0.2.0 candidate: static verification of Lua string patterns

This commit is contained in:
Steve Donovan 2017-05-30 14:20:12 +02:00
parent 0b755ed997
commit 207c25ad2f
4 changed files with 169 additions and 21 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "lua-patterns"
version = "0.1.1"
version = "0.2.0"
authors = ["steve donovan <steve.j.donovan@gmail.com>"]
description = "Binding to Lua String Patterns"
build = "build.rs"
@ -10,6 +10,8 @@ documentation = "https://docs.rs/lua-patterns"
keywords = ["string","matching","lua"]
categories = ["parsing","api-bindings"]
[build-dependencies]
gcc="0.3"

View File

@ -37,7 +37,10 @@ assert_eq!(r.start, 6);
assert_eq!(r.end, 9);
```
This not in itself impressive, since it can be done with the string `find`
method, but once we start using patterns it gets more exciting, especially
method. (`new` will panic if you feed it a bad pattern, so use `new_try` if
you want more control.)
Once we start using patterns it gets more exciting, especially
with _captures_:
```rust
@ -202,8 +205,8 @@ let patt = LuaPatternBuilder::new()
let mut m = LuaPattern::from_bytes(&patt);
// picks up "DE2424BE"
```
> Static verification: this version attempts to verify string patterns. If you
> want errors, use `new_try` and `from_bytes_try`, otherwise the constructors panic.
> If a match panics after successful verification, it is a __BUG__ - please
> report the offending pattern.
> **PANICKING** Currently this library will behave badly and panic
> if the Lua pattern is malformed. There is no compilation step,
> unlike regexps, but I intend to provide a static validation
> to convert panics into errors, as good practice demands.

View File

@ -10,6 +10,8 @@
//! [the README](https://github.com/stevedonovan/lua-patterns/blob/master/readme.md)
//! for more discussion.
//!
//! [LuaPattern](struct.LuaPattern.html) implements the public API.
//!
//! ## Examples
//!
//! ```rust
@ -52,6 +54,24 @@ struct LuaMatch {
static LUA_MAXCAPTURES: usize = 32;
use std::fmt;
use std::error::Error;
#[derive(Debug,PartialEq)]
pub struct PatternError(pub String);
impl fmt::Display for PatternError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f,"{}",self.0)
}
}
impl Error for PatternError {
fn description(&self) -> &str {
&self.0
}
}
#[link(name = "lua-str", kind="static")]
extern {
fn str_match (
@ -59,6 +79,10 @@ extern {
err_msg: *mut *mut c_char,
mm: *mut LuaMatch
) -> c_int;
fn str_check (
p: *const u8, lp: c_uint
) -> *const i8;
}
/// Represents a Lua string pattern and the results of a match
@ -69,17 +93,34 @@ pub struct LuaPattern<'a> {
}
impl <'a> LuaPattern<'a> {
/// Create a new Lua pattern from a string
pub fn new(patt: &'a str) -> LuaPattern<'a> {
LuaPattern::from_bytes(patt.as_bytes())
}
/// Create a new Lua pattern from a slice of bytes
pub fn from_bytes (bytes: &'a [u8]) -> LuaPattern<'a> {
/// Maybe create a new Lua pattern from a slice of bytes
pub fn from_bytes_try (bytes: &'a [u8]) -> Result<LuaPattern<'a>,PatternError> {
let mut matches: Vec<LuaMatch> = Vec::with_capacity(LUA_MAXCAPTURES);
unsafe {
let res = str_check(bytes.as_ptr(),bytes.len() as c_uint);
if ! res.is_null() {
let sres = CStr::from_ptr(res).to_str().unwrap().to_string();
return Err(PatternError(sres));
}
}
unsafe {matches.set_len(LUA_MAXCAPTURES);}
LuaPattern{patt: bytes, matches: matches, n_match: 0}
Ok(LuaPattern{patt: bytes, matches: matches, n_match: 0})
}
/// Maybe create a new Lua pattern from a string
pub fn new_try(patt: &'a str) -> Result<LuaPattern<'a>,PatternError> {
LuaPattern::from_bytes_try(patt.as_bytes())
}
/// Create a new Lua pattern from a string, panicking if bad
pub fn new(patt: &'a str) -> LuaPattern<'a> {
LuaPattern::new_try(patt).expect("bad pattern")
}
/// Create a new Lua pattern from a slice of bytes, panicking if bad
pub fn from_bytes (bytes: &'a [u8]) -> LuaPattern<'a> {
LuaPattern::from_bytes_try(bytes).expect("bad pattern")
}
/// Match a slice of bytes with a pattern
///
@ -101,7 +142,7 @@ impl <'a> LuaPattern<'a> {
err_msg, self.matches.as_mut_ptr()) as usize;
let ep = *err_msg;
if ! ep.is_null() {
panic!(format!("lua-pattern {:?}",CStr::from_ptr(ep)));
panic!(format!("REPORT AS BUG: lua-pattern {:?}",CStr::from_ptr(ep)));
}
}
@ -687,8 +728,6 @@ mod tests {
assert_eq!(iter.next().unwrap().get(1), "one");
assert_eq!(iter.next().unwrap().get(1), "two");
assert_eq!(iter.next().unwrap().get(1), "three");
}
#[test]
@ -719,8 +758,25 @@ mod tests {
let mut m = LuaPattern::new("(%S+)%s*=%s*(%S+);%s*");
let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 ");
assert_eq!(res,"'2':a '3':b '4':c ");
}
#[test]
fn bad_patterns() {
let bad = [
("bonzo %","malformed pattern (ends with '%')"),
("bonzo (dog%(","unfinished capture"),
("alles [%a%[","malformed pattern (missing ']')"),
("bonzo (dog (cat)","unfinished capture"),
("frodo %f[%A","malformed pattern (missing ']')"),
("frodo (1) (2(3)%2)%1","invalid capture index %2"),
];
for p in bad.iter() {
let res = LuaPattern::new_try(p.0);
if let Err(e) = res {
assert_eq!(e, PatternError(p.1.into()));
} else {
panic!("false positive");
}
}
}
}

View File

@ -280,7 +280,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
const char *ep; char previous;
p += 2;
if (*p != '[')
throw_error(ms,"missing '[' after '%f' in pattern");
throw_error(ms,"missing '[' after '%%f' in pattern");
ep = classend(ms, p); /* points to what is next */
previous = (s == ms->src_init) ? '\0' : *(s - 1);
if (!matchbracketclass(uchar(previous), p, ep - 1) &&
@ -412,3 +412,90 @@ int str_match (const char *s, unsigned int ls, const char *p, unsigned int lp, c
return 0;
}
static void str_match_check(MatchState *ms, const char *p) {
char ch;
int level_stack[LUA_MAXCAPTURES];
int stack_idx = 0;
int current_level = 0;
while (p < ms->p_end && (ch=*p++)) {
switch (ch) {
case L_ESC: {
switch ((ch=*p++)) {
case 'b': {
p++;
if (p >= ms->p_end) throw_error(ms,"malformed pattern "
"(missing arguments to '%b')");
} break;
case 'f': {
if (*p != '[') throw_error(ms,"missing '[' after '%%f' in pattern");
--p; // so we see [...]
} break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9': {
int l = uchar(ch) - '1'; //
if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
throw_error(ms,"invalid capture index %%%d", l + 1);
--p;
} break;
}
} break;
case '[': {
do { /* look for a `]' */
if (p == ms->p_end)
throw_error(ms,"malformed pattern (missing ']')");
if (*(p++) == L_ESC && p < ms->p_end)
p++; /* skip escapes (e.g. `%]') */
} while (*p != ']');
} break;
case '(': {
if (*p != ')') { /* not a position capture */
level_stack[stack_idx++] = ms->level;
ms->capture[ms->level].len = CAP_UNFINISHED;
ms->level ++; /* level counts total number of captures */
if (ms->level >= LUA_MAXCAPTURES) throw_error(ms,"too many captures");
} else {
++p;
}
} break;
case ')': {
if (stack_idx == 0)
throw_error(ms, "no open capture");
ms->capture[level_stack[--stack_idx]].len = CAP_POSITION;
} break;
default: {
}
}
}
if (stack_idx > 0) {
throw_error(ms,"unfinished capture");
}
}
const char *str_check (const char *p, unsigned int lp) {
MatchState ms;
int anchor = (*p == '^');
if (anchor) {
p++; /* skip anchor character */
}
memset(ms.msg_buff,0,sizeof(ms.msg_buff));
if (setjmp(ms.jump_buf) != 0) {
return strdup(ms.msg_buff);
}
ms.level = 0;
ms.matchdepth = MAXCCALLS;
ms.p_end = p + lp;
if ( *(ms.p_end-1) == '%') {
throw_error(&ms,"malformed pattern (ends with '%')");
}
str_match_check(&ms,p);
return NULL;
}