0.2.0 candidate: static verification of Lua string patterns
This commit is contained in:
parent
0b755ed997
commit
207c25ad2f
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "lua-patterns"
|
name = "lua-patterns"
|
||||||
version = "0.1.1"
|
version = "0.2.0"
|
||||||
authors = ["steve donovan <steve.j.donovan@gmail.com>"]
|
authors = ["steve donovan <steve.j.donovan@gmail.com>"]
|
||||||
description = "Binding to Lua String Patterns"
|
description = "Binding to Lua String Patterns"
|
||||||
build = "build.rs"
|
build = "build.rs"
|
||||||
|
@ -10,6 +10,8 @@ documentation = "https://docs.rs/lua-patterns"
|
||||||
|
|
||||||
keywords = ["string","matching","lua"]
|
keywords = ["string","matching","lua"]
|
||||||
|
|
||||||
|
categories = ["parsing","api-bindings"]
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
gcc="0.3"
|
gcc="0.3"
|
||||||
|
|
||||||
|
|
13
readme.md
13
readme.md
|
@ -37,7 +37,10 @@ assert_eq!(r.start, 6);
|
||||||
assert_eq!(r.end, 9);
|
assert_eq!(r.end, 9);
|
||||||
```
|
```
|
||||||
This not in itself impressive, since it can be done with the string `find`
|
This not in itself impressive, since it can be done with the string `find`
|
||||||
method, but once we start using patterns it gets more exciting, especially
|
method. (`new` will panic if you feed it a bad pattern, so use `new_try` if
|
||||||
|
you want more control.)
|
||||||
|
|
||||||
|
Once we start using patterns it gets more exciting, especially
|
||||||
with _captures_:
|
with _captures_:
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
|
@ -202,8 +205,8 @@ let patt = LuaPatternBuilder::new()
|
||||||
let mut m = LuaPattern::from_bytes(&patt);
|
let mut m = LuaPattern::from_bytes(&patt);
|
||||||
// picks up "DE2424BE"
|
// picks up "DE2424BE"
|
||||||
```
|
```
|
||||||
|
> Static verification: this version attempts to verify string patterns. If you
|
||||||
|
> want errors, use `new_try` and `from_bytes_try`, otherwise the constructors panic.
|
||||||
|
> If a match panics after successful verification, it is a __BUG__ - please
|
||||||
|
> report the offending pattern.
|
||||||
|
|
||||||
> **PANICKING** Currently this library will behave badly and panic
|
|
||||||
> if the Lua pattern is malformed. There is no compilation step,
|
|
||||||
> unlike regexps, but I intend to provide a static validation
|
|
||||||
> to convert panics into errors, as good practice demands.
|
|
||||||
|
|
80
src/lib.rs
80
src/lib.rs
|
@ -10,6 +10,8 @@
|
||||||
//! [the README](https://github.com/stevedonovan/lua-patterns/blob/master/readme.md)
|
//! [the README](https://github.com/stevedonovan/lua-patterns/blob/master/readme.md)
|
||||||
//! for more discussion.
|
//! for more discussion.
|
||||||
//!
|
//!
|
||||||
|
//! [LuaPattern](struct.LuaPattern.html) implements the public API.
|
||||||
|
//!
|
||||||
//! ## Examples
|
//! ## Examples
|
||||||
//!
|
//!
|
||||||
//! ```rust
|
//! ```rust
|
||||||
|
@ -52,6 +54,24 @@ struct LuaMatch {
|
||||||
|
|
||||||
static LUA_MAXCAPTURES: usize = 32;
|
static LUA_MAXCAPTURES: usize = 32;
|
||||||
|
|
||||||
|
use std::fmt;
|
||||||
|
use std::error::Error;
|
||||||
|
|
||||||
|
#[derive(Debug,PartialEq)]
|
||||||
|
pub struct PatternError(pub String);
|
||||||
|
|
||||||
|
impl fmt::Display for PatternError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f,"{}",self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for PatternError {
|
||||||
|
fn description(&self) -> &str {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[link(name = "lua-str", kind="static")]
|
#[link(name = "lua-str", kind="static")]
|
||||||
extern {
|
extern {
|
||||||
fn str_match (
|
fn str_match (
|
||||||
|
@ -59,6 +79,10 @@ extern {
|
||||||
err_msg: *mut *mut c_char,
|
err_msg: *mut *mut c_char,
|
||||||
mm: *mut LuaMatch
|
mm: *mut LuaMatch
|
||||||
) -> c_int;
|
) -> c_int;
|
||||||
|
|
||||||
|
fn str_check (
|
||||||
|
p: *const u8, lp: c_uint
|
||||||
|
) -> *const i8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Represents a Lua string pattern and the results of a match
|
/// Represents a Lua string pattern and the results of a match
|
||||||
|
@ -69,16 +93,33 @@ pub struct LuaPattern<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl <'a> LuaPattern<'a> {
|
impl <'a> LuaPattern<'a> {
|
||||||
/// Create a new Lua pattern from a string
|
/// Maybe create a new Lua pattern from a slice of bytes
|
||||||
pub fn new(patt: &'a str) -> LuaPattern<'a> {
|
pub fn from_bytes_try (bytes: &'a [u8]) -> Result<LuaPattern<'a>,PatternError> {
|
||||||
LuaPattern::from_bytes(patt.as_bytes())
|
let mut matches: Vec<LuaMatch> = Vec::with_capacity(LUA_MAXCAPTURES);
|
||||||
|
unsafe {
|
||||||
|
let res = str_check(bytes.as_ptr(),bytes.len() as c_uint);
|
||||||
|
if ! res.is_null() {
|
||||||
|
let sres = CStr::from_ptr(res).to_str().unwrap().to_string();
|
||||||
|
return Err(PatternError(sres));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsafe {matches.set_len(LUA_MAXCAPTURES);}
|
||||||
|
Ok(LuaPattern{patt: bytes, matches: matches, n_match: 0})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new Lua pattern from a slice of bytes
|
/// Maybe create a new Lua pattern from a string
|
||||||
|
pub fn new_try(patt: &'a str) -> Result<LuaPattern<'a>,PatternError> {
|
||||||
|
LuaPattern::from_bytes_try(patt.as_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new Lua pattern from a string, panicking if bad
|
||||||
|
pub fn new(patt: &'a str) -> LuaPattern<'a> {
|
||||||
|
LuaPattern::new_try(patt).expect("bad pattern")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new Lua pattern from a slice of bytes, panicking if bad
|
||||||
pub fn from_bytes (bytes: &'a [u8]) -> LuaPattern<'a> {
|
pub fn from_bytes (bytes: &'a [u8]) -> LuaPattern<'a> {
|
||||||
let mut matches: Vec<LuaMatch> = Vec::with_capacity(LUA_MAXCAPTURES);
|
LuaPattern::from_bytes_try(bytes).expect("bad pattern")
|
||||||
unsafe {matches.set_len(LUA_MAXCAPTURES);}
|
|
||||||
LuaPattern{patt: bytes, matches: matches, n_match: 0}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Match a slice of bytes with a pattern
|
/// Match a slice of bytes with a pattern
|
||||||
|
@ -101,7 +142,7 @@ impl <'a> LuaPattern<'a> {
|
||||||
err_msg, self.matches.as_mut_ptr()) as usize;
|
err_msg, self.matches.as_mut_ptr()) as usize;
|
||||||
let ep = *err_msg;
|
let ep = *err_msg;
|
||||||
if ! ep.is_null() {
|
if ! ep.is_null() {
|
||||||
panic!(format!("lua-pattern {:?}",CStr::from_ptr(ep)));
|
panic!(format!("REPORT AS BUG: lua-pattern {:?}",CStr::from_ptr(ep)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -687,8 +728,6 @@ mod tests {
|
||||||
assert_eq!(iter.next().unwrap().get(1), "one");
|
assert_eq!(iter.next().unwrap().get(1), "one");
|
||||||
assert_eq!(iter.next().unwrap().get(1), "two");
|
assert_eq!(iter.next().unwrap().get(1), "two");
|
||||||
assert_eq!(iter.next().unwrap().get(1), "three");
|
assert_eq!(iter.next().unwrap().get(1), "three");
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -719,8 +758,25 @@ mod tests {
|
||||||
let mut m = LuaPattern::new("(%S+)%s*=%s*(%S+);%s*");
|
let mut m = LuaPattern::new("(%S+)%s*=%s*(%S+);%s*");
|
||||||
let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 ");
|
let res = m.gsub("a=2; b=3; c = 4;", "'%2':%1 ");
|
||||||
assert_eq!(res,"'2':a '3':b '4':c ");
|
assert_eq!(res,"'2':a '3':b '4':c ");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bad_patterns() {
|
||||||
|
let bad = [
|
||||||
|
("bonzo %","malformed pattern (ends with '%')"),
|
||||||
|
("bonzo (dog%(","unfinished capture"),
|
||||||
|
("alles [%a%[","malformed pattern (missing ']')"),
|
||||||
|
("bonzo (dog (cat)","unfinished capture"),
|
||||||
|
("frodo %f[%A","malformed pattern (missing ']')"),
|
||||||
|
("frodo (1) (2(3)%2)%1","invalid capture index %2"),
|
||||||
|
];
|
||||||
|
for p in bad.iter() {
|
||||||
|
let res = LuaPattern::new_try(p.0);
|
||||||
|
if let Err(e) = res {
|
||||||
|
assert_eq!(e, PatternError(p.1.into()));
|
||||||
|
} else {
|
||||||
|
panic!("false positive");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -280,7 +280,7 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
|
||||||
const char *ep; char previous;
|
const char *ep; char previous;
|
||||||
p += 2;
|
p += 2;
|
||||||
if (*p != '[')
|
if (*p != '[')
|
||||||
throw_error(ms,"missing '[' after '%f' in pattern");
|
throw_error(ms,"missing '[' after '%%f' in pattern");
|
||||||
ep = classend(ms, p); /* points to what is next */
|
ep = classend(ms, p); /* points to what is next */
|
||||||
previous = (s == ms->src_init) ? '\0' : *(s - 1);
|
previous = (s == ms->src_init) ? '\0' : *(s - 1);
|
||||||
if (!matchbracketclass(uchar(previous), p, ep - 1) &&
|
if (!matchbracketclass(uchar(previous), p, ep - 1) &&
|
||||||
|
@ -412,3 +412,90 @@ int str_match (const char *s, unsigned int ls, const char *p, unsigned int lp, c
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void str_match_check(MatchState *ms, const char *p) {
|
||||||
|
char ch;
|
||||||
|
int level_stack[LUA_MAXCAPTURES];
|
||||||
|
int stack_idx = 0;
|
||||||
|
int current_level = 0;
|
||||||
|
while (p < ms->p_end && (ch=*p++)) {
|
||||||
|
switch (ch) {
|
||||||
|
case L_ESC: {
|
||||||
|
switch ((ch=*p++)) {
|
||||||
|
case 'b': {
|
||||||
|
p++;
|
||||||
|
if (p >= ms->p_end) throw_error(ms,"malformed pattern "
|
||||||
|
"(missing arguments to '%b')");
|
||||||
|
} break;
|
||||||
|
case 'f': {
|
||||||
|
if (*p != '[') throw_error(ms,"missing '[' after '%%f' in pattern");
|
||||||
|
--p; // so we see [...]
|
||||||
|
} break;
|
||||||
|
case '0': case '1': case '2': case '3':
|
||||||
|
case '4': case '5': case '6': case '7':
|
||||||
|
case '8': case '9': {
|
||||||
|
int l = uchar(ch) - '1'; //
|
||||||
|
if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
|
||||||
|
throw_error(ms,"invalid capture index %%%d", l + 1);
|
||||||
|
--p;
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case '[': {
|
||||||
|
do { /* look for a `]' */
|
||||||
|
if (p == ms->p_end)
|
||||||
|
throw_error(ms,"malformed pattern (missing ']')");
|
||||||
|
if (*(p++) == L_ESC && p < ms->p_end)
|
||||||
|
p++; /* skip escapes (e.g. `%]') */
|
||||||
|
} while (*p != ']');
|
||||||
|
} break;
|
||||||
|
case '(': {
|
||||||
|
if (*p != ')') { /* not a position capture */
|
||||||
|
level_stack[stack_idx++] = ms->level;
|
||||||
|
ms->capture[ms->level].len = CAP_UNFINISHED;
|
||||||
|
ms->level ++; /* level counts total number of captures */
|
||||||
|
if (ms->level >= LUA_MAXCAPTURES) throw_error(ms,"too many captures");
|
||||||
|
} else {
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case ')': {
|
||||||
|
if (stack_idx == 0)
|
||||||
|
throw_error(ms, "no open capture");
|
||||||
|
ms->capture[level_stack[--stack_idx]].len = CAP_POSITION;
|
||||||
|
} break;
|
||||||
|
default: {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (stack_idx > 0) {
|
||||||
|
throw_error(ms,"unfinished capture");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *str_check (const char *p, unsigned int lp) {
|
||||||
|
MatchState ms;
|
||||||
|
int anchor = (*p == '^');
|
||||||
|
if (anchor) {
|
||||||
|
p++; /* skip anchor character */
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(ms.msg_buff,0,sizeof(ms.msg_buff));
|
||||||
|
|
||||||
|
if (setjmp(ms.jump_buf) != 0) {
|
||||||
|
return strdup(ms.msg_buff);
|
||||||
|
}
|
||||||
|
|
||||||
|
ms.level = 0;
|
||||||
|
ms.matchdepth = MAXCCALLS;
|
||||||
|
ms.p_end = p + lp;
|
||||||
|
|
||||||
|
if ( *(ms.p_end-1) == '%') {
|
||||||
|
throw_error(&ms,"malformed pattern (ends with '%')");
|
||||||
|
}
|
||||||
|
|
||||||
|
str_match_check(&ms,p);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue