0.2.0 candidate: static verification of Lua string patterns
This commit is contained in:
parent
0b755ed997
commit
207c25ad2f
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "lua-patterns"
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
authors = ["steve donovan <steve.j.donovan@gmail.com>"]
|
||||
description = "Binding to Lua String Patterns"
|
||||
build = "build.rs"
|
||||
|
@ -10,6 +10,8 @@ documentation = "https://docs.rs/lua-patterns"
|
|||
|
||||
keywords = ["string","matching","lua"]
|
||||
|
||||
categories = ["parsing","api-bindings"]
|
||||
|
||||
[build-dependencies]
|
||||
gcc="0.3"
|
||||
|
||||
|
|
13
readme.md
13
readme.md
|
@ -37,7 +37,10 @@ assert_eq!(r.start, 6);
|
|||
assert_eq!(r.end, 9);
|
||||
```
|
||||
This not in itself impressive, since it can be done with the string `find`
|
||||
method, but once we start using patterns it gets more exciting, especially
|
||||
method. (`new` will panic if you feed it a bad pattern, so use `new_try` if
|
||||
you want more control.)
|
||||
|
||||
Once we start using patterns it gets more exciting, especially
|
||||
with _captures_:
|
||||
|
||||
```rust
|
||||
|
@ -202,8 +205,8 @@ let patt = LuaPatternBuilder::new()
|
|||
let mut m = LuaPattern::from_bytes(&patt);
|
||||
// picks up "DE2424BE"
|
||||
```
|
||||
> Static verification: this version attempts to verify string patterns. If you
|
||||
> want errors, use `new_try` and `from_bytes_try`, otherwise the constructors panic.
|
||||
> If a match panics after successful verification, it is a __BUG__ - please
|
||||
> report the offending pattern.
|
||||
|
||||
> **PANICKING** Currently this library will behave badly and panic
|
||||
> if the Lua pattern is malformed. There is no compilation step,
|
||||
> unlike regexps, but I intend to provide a static validation
|
||||
> to convert panics into errors, as good practice demands.
|
||||
|
|
84
src/lib.rs
84
src/lib.rs
|
@ -10,6 +10,8 @@
|
|||
//! [the README](https://github.com/stevedonovan/lua-patterns/blob/master/readme.md)
|
||||
//! for more discussion.
|
||||
//!
|
||||
//! [LuaPattern](struct.LuaPattern.html) implements the public API.
|
||||
//!
|
||||
//! ## Examples
|
||||
//!
|
||||
//! ```rust
|
||||
|
@ -52,6 +54,24 @@ struct LuaMatch {
|
|||
|
||||
static LUA_MAXCAPTURES: usize = 32;
|
||||
|
||||
use std::fmt;
|
||||
use std::error::Error;
|
||||
|
||||
#[derive(Debug,PartialEq)]
|
||||
pub struct PatternError(pub String);
|
||||
|
||||
impl fmt::Display for PatternError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f,"{}",self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for PatternError {
|
||||
fn description(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[link(name = "lua-str", kind="static")]
|
||||
extern {
|
||||
fn str_match (
|
||||
|
@ -59,6 +79,10 @@ extern {
|
|||
err_msg: *mut *mut c_char,
|
||||
mm: *mut LuaMatch
|
||||
) -> c_int;
|
||||
|
||||
fn str_check (
|
||||
p: *const u8, lp: c_uint
|
||||
) -> *const i8;
|
||||
}
|
||||
|
||||
/// Represents a Lua string pattern and the results of a match
|
||||
|
@ -69,17 +93,34 @@ pub struct LuaPattern<'a> {
|
|||
}
|
||||
|
||||
impl <'a> LuaPattern<'a> {
|
||||
/// Create a new Lua pattern from a string
|
||||
pub fn new(patt: &'a str) -> LuaPattern<'a> {
|
||||
LuaPattern::from_bytes(patt.as_bytes())
|
||||
}
|
||||
|
||||
/// Create a new Lua pattern from a slice of bytes
|
||||
pub fn from_bytes (bytes: &'a [u8]) -> LuaPattern<'a> {
|
||||
/// Maybe create a new Lua pattern from a slice of bytes
|
||||
pub fn from_bytes_try (bytes: &'a [u8]) -> Result<LuaPattern<'a>,PatternError> {
|
||||
let mut matches: Vec<LuaMatch> = Vec::with_capacity(LUA_MAXCAPTURES);
|
||||
unsafe {
|
||||
let res = str_check(bytes.as_ptr(),bytes.len() as c_uint);
|
||||
if ! res.is_null() {
|
||||
let sres = CStr::from_ptr(res).to_str().unwrap().to_string();
|
||||
return Err(PatternError(sres));
|
||||
}
|
||||
}
|
||||
unsafe {matches.set_len(LUA_MAXCAPTURES);}
|
||||
LuaPattern{patt: bytes, matches: matches, n_match: 0}
|
||||
Ok(LuaPattern{patt: bytes, matches: matches, n_match: 0})
|
||||
}
|
||||
|
||||
/// Maybe create a new Lua pattern from a string
|
||||
pub fn new_try(patt: &'a str) -> Result<LuaPattern<'a>,PatternError> {
|
||||
LuaPattern::from_bytes_try(patt.as_bytes())
|
||||
}
|
||||
|
||||
/// Create a new Lua pattern from a string, panicking if bad
|
||||
pub fn new(patt: &'a str) -> LuaPattern<'a> {
|
||||
LuaPattern::new_try(patt).expect("bad pattern")
|
||||
}
|
||||
|
||||
/// Create a new Lua pattern from a slice of bytes, panicking if bad
|
||||
pub fn from_bytes (bytes: &'a [u8]) -> LuaPattern<'a> {
|
||||
LuaPattern::from_bytes_try(bytes).expect("bad pattern")
|
||||
}
|
||||
|
||||
/// Match a slice of bytes with a pattern
|
||||
///
|
||||
|
@ -101,7 +142,7 @@ impl <'a> LuaPattern<'a> {
|
|||
err_msg, self.matches.as_mut_ptr()) as usize;
|
||||
let ep = *err_msg;
|
||||
if ! ep.is_null() {
|
||||
panic!(format!("lua-pattern {:?}",CStr::from_ptr(ep)));
|
||||
panic!(format!("REPORT AS BUG: lua-pattern {:?}",CStr::from_ptr(ep)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -687,8 +728,6 @@ mod tests {
|
|||
assert_eq!(iter.next().unwrap().get(1), "one");
|
||||
assert_eq!(iter.next().unwrap().get(1), "two");
|
||||
assert_eq!(iter.next().unwrap().get(1), "three");
|
||||
|
||||
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -719,8 +758,25 @@ mod tests {
|
|||
let mut m = LuaPattern::new("(%S+)%s*=%s*(%S+);%s*");
|
||||
let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 ");
|
||||
assert_eq!(res,"'2':a '3':b '4':c ");
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_patterns() {
|
||||
let bad = [
|
||||
("bonzo %","malformed pattern (ends with '%')"),
|
||||
("bonzo (dog%(","unfinished capture"),
|
||||
("alles [%a%[","malformed pattern (missing ']')"),
|
||||
("bonzo (dog (cat)","unfinished capture"),
|
||||
("frodo %f[%A","malformed pattern (missing ']')"),
|
||||
("frodo (1) (2(3)%2)%1","invalid capture index %2"),
|
||||
];
|
||||
for p in bad.iter() {
|
||||
let res = LuaPattern::new_try(p.0);
|
||||
if let Err(e) = res {
|
||||
assert_eq!(e, PatternError(p.1.into()));
|
||||
} else {
|
||||
panic!("false positive");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -280,7 +280,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
|
|||
const char *ep; char previous;
|
||||
p += 2;
|
||||
if (*p != '[')
|
||||
throw_error(ms,"missing '[' after '%f' in pattern");
|
||||
throw_error(ms,"missing '[' after '%%f' in pattern");
|
||||
ep = classend(ms, p); /* points to what is next */
|
||||
previous = (s == ms->src_init) ? '\0' : *(s - 1);
|
||||
if (!matchbracketclass(uchar(previous), p, ep - 1) &&
|
||||
|
@ -412,3 +412,90 @@ int str_match (const char *s, unsigned int ls, const char *p, unsigned int lp, c
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void str_match_check(MatchState *ms, const char *p) {
|
||||
char ch;
|
||||
int level_stack[LUA_MAXCAPTURES];
|
||||
int stack_idx = 0;
|
||||
int current_level = 0;
|
||||
while (p < ms->p_end && (ch=*p++)) {
|
||||
switch (ch) {
|
||||
case L_ESC: {
|
||||
switch ((ch=*p++)) {
|
||||
case 'b': {
|
||||
p++;
|
||||
if (p >= ms->p_end) throw_error(ms,"malformed pattern "
|
||||
"(missing arguments to '%b')");
|
||||
} break;
|
||||
case 'f': {
|
||||
if (*p != '[') throw_error(ms,"missing '[' after '%%f' in pattern");
|
||||
--p; // so we see [...]
|
||||
} break;
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
case '8': case '9': {
|
||||
int l = uchar(ch) - '1'; //
|
||||
if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
|
||||
throw_error(ms,"invalid capture index %%%d", l + 1);
|
||||
--p;
|
||||
} break;
|
||||
}
|
||||
} break;
|
||||
case '[': {
|
||||
do { /* look for a `]' */
|
||||
if (p == ms->p_end)
|
||||
throw_error(ms,"malformed pattern (missing ']')");
|
||||
if (*(p++) == L_ESC && p < ms->p_end)
|
||||
p++; /* skip escapes (e.g. `%]') */
|
||||
} while (*p != ']');
|
||||
} break;
|
||||
case '(': {
|
||||
if (*p != ')') { /* not a position capture */
|
||||
level_stack[stack_idx++] = ms->level;
|
||||
ms->capture[ms->level].len = CAP_UNFINISHED;
|
||||
ms->level ++; /* level counts total number of captures */
|
||||
if (ms->level >= LUA_MAXCAPTURES) throw_error(ms,"too many captures");
|
||||
} else {
|
||||
++p;
|
||||
}
|
||||
} break;
|
||||
case ')': {
|
||||
if (stack_idx == 0)
|
||||
throw_error(ms, "no open capture");
|
||||
ms->capture[level_stack[--stack_idx]].len = CAP_POSITION;
|
||||
} break;
|
||||
default: {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if (stack_idx > 0) {
|
||||
throw_error(ms,"unfinished capture");
|
||||
}
|
||||
}
|
||||
|
||||
const char *str_check (const char *p, unsigned int lp) {
|
||||
MatchState ms;
|
||||
int anchor = (*p == '^');
|
||||
if (anchor) {
|
||||
p++; /* skip anchor character */
|
||||
}
|
||||
|
||||
memset(ms.msg_buff,0,sizeof(ms.msg_buff));
|
||||
|
||||
if (setjmp(ms.jump_buf) != 0) {
|
||||
return strdup(ms.msg_buff);
|
||||
}
|
||||
|
||||
ms.level = 0;
|
||||
ms.matchdepth = MAXCCALLS;
|
||||
ms.p_end = p + lp;
|
||||
|
||||
if ( *(ms.p_end-1) == '%') {
|
||||
throw_error(&ms,"malformed pattern (ends with '%')");
|
||||
}
|
||||
|
||||
str_match_check(&ms,p);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue