feat(glob): new Glob implementation based on Peglob #33605

|vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that
provides ~50% speedup for complex patterns. The implementation restores
support for nested braces and follows LSP 3.17 specification with
additional constraints for improved correctness and resistance to
backtracking edge cases.
This commit is contained in:
Brynne Taylor
2025-05-22 15:24:49 +08:00
committed by GitHub
parent 172a90c245
commit 322a6d305d
4 changed files with 464 additions and 129 deletions

View File

@@ -3236,30 +3236,51 @@ vim.fs.root({source}, {marker}) *vim.fs.root()*
==============================================================================
Lua module: vim.glob *vim.glob*
Glob-to-LPeg Converter (Peglob) This module converts glob patterns to LPeg
patterns according to the LSP 3.17 specification:
https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
Glob grammar overview:
• `*` to match zero or more characters in a path segment
• `?` to match on one character in a path segment
• `**` to match any number of path segments, including none
• `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript
files)
• `[]` to declare a range of characters to match in a path segment (e.g.,
`example.[0-9]` to match on `example.0`, `example.1`, …)
• `[!...]` to negate a range of characters to match in a path segment (e.g.,
`example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
Additional constraints:
• A Glob pattern must match an entire path, with partial matches considered
failures.
• The pattern only determines success or failure, without specifying which
parts correspond to which characters.
• A path segment is the portion of a path between two adjacent path separators
(`/`), or between the start/end of the path and the nearest separator.
• The `**` (globstar) pattern matches zero or more path segments, including
intervening separators (`/`). Within pattern strings, `**` must be delimited
by path separators (`/`) or pattern boundaries and cannot be adjacent to any
characters other than `/`. If `**` is not the final element, it must be
followed by `/`.
• `{}` (braced conditions) contains valid Glob patterns as branches, separated
by commas. Commas are exclusively used for separating branches and cannot
appear within a branch for any other purpose. Nested `{}` structures are
allowed, but `{}` must contain at least two branches—zero or one branch is
not permitted.
• In `[]` or `[!...]`, a character range consists of character intervals
(e.g., `a-z`) or individual characters (e.g., `w`). A range including `/`
wont match that character.
vim.glob.to_lpeg({pattern}) *vim.glob.to_lpeg()*
Parses a raw glob into an |lua-lpeg| pattern.
This uses glob semantics from LSP 3.17.0:
https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
Glob patterns can have the following syntax:
• `*` to match one or more characters in a path segment
• `?` to match on one character in a path segment
• `**` to match any number of path segments, including none
• `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and
JavaScript files)
• `[]` to declare a range of characters to match in a path segment (e.g.,
`example.[0-9]` to match on `example.0`, `example.1`, …)
• `[!...]` to negate a range of characters to match in a path segment
(e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not
`example.0`)
Parameters: ~
• {pattern} (`string`) The raw glob pattern
Return: ~
(`vim.lpeg.Pattern`) pattern An |lua-lpeg| representation of the
pattern
(`vim.lpeg.Pattern`) An |lua-lpeg| representation of the pattern
==============================================================================

View File

@@ -175,7 +175,11 @@ OPTIONS
PERFORMANCE
todo
|vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that
provides ~50% speedup for complex patterns. The implementation restores
support for nested braces and follows LSP 3.17 specification with
additional constraints for improved correctness and resistance to
backtracking edge cases.
PLUGINS

View File

@@ -1,93 +1,375 @@
local lpeg = vim.lpeg
local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
local C, Cc, Ct, Cf, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cmt
local M = {}
local pathsep = P('/')
--- Parses a raw glob into an |lua-lpeg| pattern.
--- @brief Glob-to-LPeg Converter (Peglob)
--- This module converts glob patterns to LPeg patterns according to the LSP 3.17 specification:
--- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
---
--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
---
--- Glob patterns can have the following syntax:
--- - `*` to match one or more characters in a path segment
--- Glob grammar overview:
--- - `*` to match zero or more characters in a path segment
--- - `?` to match on one character in a path segment
--- - `**` to match any number of path segments, including none
--- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files)
--- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
--- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
--- - `[]` to declare a range of characters to match in a path segment
--- (e.g., `example.[0-9]` to match on `example.0`, `example.1`, )
--- - `[!...]` to negate a range of characters to match in a path segment
--- (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
---
--- Additional constraints:
--- - A Glob pattern must match an entire path, with partial matches
--- considered failures.
--- - The pattern only determines success or failure, without specifying
--- which parts correspond to which characters.
--- - A *path segment* is the portion of a path between two adjacent path
--- separators (`/`), or between the start/end of the path and the nearest
--- separator.
--- - The `**` (*globstar*) pattern matches zero or more path segments,
--- including intervening separators (`/`). Within pattern strings, `**`
--- must be delimited by path separators (`/`) or pattern boundaries and
--- cannot be adjacent to any characters other than `/`. If `**` is not
--- the final element, it must be followed by `/`.
--- - `{}` (*braced conditions*) contains valid Glob patterns as branches,
--- separated by commas. Commas are exclusively used for separating
--- branches and cannot appear within a branch for any other purpose.
--- Nested `{}` structures are allowed, but `{}` must contain at least two
--- branches—zero or one branch is not permitted.
--- - In `[]` or `[!...]`, a *character range* consists of character
--- intervals (e.g., `a-z`) or individual characters (e.g., `w`). A range
--- including `/` wont match that character.
--- @diagnostic disable: missing-fields
local m = vim.lpeg
local mt = getmetatable(m.P(0))
local re = vim.re
local bit = require('bit')
local M = {}
-- Basic patterns for matching glob components
local letter = m.P(1) - m.S(',*?[]{}/\\') -- Any character except special glob characters
local slash = m.P '/' * m.Cc(m.P '/') -- Path separator with capture
local notslash = m.P(1) - m.P '/' -- Any character except path separator
local notcomma = m.P(1) - m.S(',\\') -- Any character except comma and backslash
--- Handle EOF, considering whether we're in a segment or not
--- @type vim.lpeg.Pattern
local eof = -1
* m.Cb('inseg')
/ function(flag)
if flag then
return #m.P '/'
else
return m.P(-1)
end
end
---@alias pat_table { F: string?, [1]: string, [2]: vim.lpeg.Pattern }
---@alias seg_part { [string]: any, [integer]: pat_table }
--- @param p pat_table Initial segment pattern data
--- @return seg_part Segment structure with start pattern
local function start_seg(p)
return { s = p[2], e = true, n = 0 }
end
--- @param t seg_part Segment structure
--- @param p pat_table Pattern to look for
--- @return table Updated segment structure
local function lookfor(t, p)
t.n = t.n + 1
t[t.n] = p
return t
end
--- @param t seg_part Segment structure
--- @return table Segment structure with end pattern
local function to_seg_end(t)
t.e = notslash ^ 0
return t
end
--- Constructs a segment matching pattern from collected components
---
--- @param t seg_part Segment structure with patterns
--- @return vim.lpeg.Pattern Complete segment match pattern
local function end_seg(t)
--- @type table<any,any>
local seg_grammar = { 's' }
if t.n > 0 then
seg_grammar.s = t.s
for i = 1, t.n do
local rname = t[i][1]
if not seg_grammar[rname] then
-- Optimize search when deterministic first character is available
if t[i].F then
seg_grammar[rname] = t[i][2] + notslash * (notslash - m.P(t[i].F)) ^ 0 * m.V(rname)
else
seg_grammar[rname] = t[i][2] + notslash * m.V(rname)
end
end
seg_grammar.s = seg_grammar.s * m.V(rname)
end
if t.e then
seg_grammar.s = seg_grammar.s * t.e
end
return m.P(seg_grammar)
else
seg_grammar.s = t.s
if t.e then
seg_grammar.s = seg_grammar.s * t.e
end
return seg_grammar.s
end
end
--- @param p vim.lpeg.Pattern Pattern directly after `**/`
--- @return vim.lpeg.Pattern LPeg pattern for `**/p`
local function dseg(p)
return m.P { p + notslash ^ 0 * m.P '/' * m.V(1) }
end
--- @type (vim.lpeg.Pattern|table)
local g = nil
--- Multiplies conditions for braced expansion (Cartesian product)
---
--- @param a string|string[] First part
--- @param b string|string[] Second part
--- @return string|string[] Cartesian product of values
local function mul_cond(a, b)
if type(a) == 'string' then
if type(b) == 'string' then
return a .. b
elseif type(b) == 'table' then
for i = 1, #b do
b[i] = a .. b[i]
end
return b
else
return a
end
elseif type(a) == 'table' then
if type(b) == 'string' then
for i = 1, #a do
a[i] = a[i] .. b
end
return a
elseif type(b) == 'table' then
--- @type string[]
local res = {}
local idx = 0
for i = 1, #a do
for j = 1, #b do
idx = idx + 1
res[idx] = a[i] .. b[j]
end
end
return res
else
return a
end
else
return b
end
end
--- Combines alternatives in braced patterns
---
--- @param a string|table First part
--- @param b string|table Second part
--- @return table #Combined alternatives
local function add_cond(a, b)
if type(a) == 'string' then
if type(b) == 'string' then
return { a, b }
elseif type(b) == 'table' then
table.insert(b, 1, a)
return b
end
elseif type(a) == 'table' then
if type(b) == 'string' then
table.insert(a, b)
return a
elseif type(b) == 'table' then
for i = 1, #b do
table.insert(a, b[i])
end
return a
end
--- @diagnostic disable-next-line: missing-return
end
end
--- Expands patterns handling segment boundaries
--- `#` prefix is added for sub-grammar to detect in-segment flag
---
---@param a (any[]|vim.lpeg.Pattern[]) Array of patterns
---@param b string Tail string
---@param inseg boolean Whether inside a path segment
---@return vim.lpeg.Pattern #Expanded pattern
local function expand(a, b, inseg)
for i = 1, #a do
if inseg then
a[i] = '#' .. a[i]
end
a[i] = g:match(a[i] .. b)
end
local res = a[1]
for i = 2, #a do
res = res + a[i]
end
return res
end
--- Converts a UTF-8 character to its Unicode codepoint
---
--- @param utf8_str string UTF-8 character
--- @return number #Codepoint value
local function to_codepoint(utf8_str)
local codepoint = 0
local byte_count = 0
for i = 1, #utf8_str do
local byte = utf8_str:byte(i)
if byte_count ~= 0 then
codepoint = bit.bor(bit.lshift(codepoint, 6), bit.band(byte, 0x3F))
byte_count = byte_count - 1
else
if byte < 0x80 then
codepoint = byte
elseif byte < 0xE0 then
byte_count = 1
codepoint = bit.band(byte, 0x1F)
elseif byte < 0xF0 then
byte_count = 2
codepoint = bit.band(byte, 0x0F)
else
byte_count = 3
codepoint = bit.band(byte, 0x07)
end
end
if byte_count == 0 then
break
end
end
return codepoint
end
--- Pattern for matching UTF-8 characters
local cont = m.R('\128\191')
local any_utf8 = m.R('\0\127')
+ m.R('\194\223') * cont
+ m.R('\224\239') * cont * cont
+ m.R('\240\244') * cont * cont * cont
--- Creates a character class pattern for glob ranges
--- @param inv string Inversion flag ('!' or '')
--- @param ranges (string|string[])[] Character ranges
--- @return vim.lpeg.Pattern #Character class pattern
local function class(inv, ranges)
local patt = m.P(false)
if #ranges == 0 then
if inv == '!' then
return m.P '[!]'
else
return m.P '[]'
end
end
for _, v in ipairs(ranges) do
patt = patt + (type(v) == 'table' and m.utfR(to_codepoint(v[1]), to_codepoint(v[2])) or m.P(v))
end
if inv == '!' then
patt = m.P(1) - patt --[[@as vim.lpeg.Pattern]]
end
return patt - m.P '/'
end
-- Parse constraints for optimizing braced conditions
local noopt_condlist = re.compile [[
s <- '/' / '**' / . [^/*]* s
]]
local opt_tail = re.compile [[
s <- (!'**' [^{/])* &'/'
]]
-- stylua: ignore start
--- @nodoc
--- @diagnostic disable
--- Main grammar for glob pattern matching
g = {
'Glob',
Glob = (m.P'#' * m.Cg(m.Cc(true), 'inseg') + m.Cg(m.Cc(false), 'inseg')) *
m.Cf(m.V'Element'^-1 * (slash * m.V'Element')^0 * (slash^-1 * eof), mt.__mul),
-- Elements handle segments, globstar patterns
Element = m.V'DSeg' + m.V'DSEnd' + m.Cf(m.V'Segment' * (slash * m.V'Segment')^0 * (slash * eof + eof^-1), mt.__mul),
-- Globstar patterns
DSeg = m.P'**/' * ((m.V'Element' + eof) / dseg),
DSEnd = m.P'**' * -1 * m.Cc(m.P(1)^0),
-- Segment handling with word and star patterns
Segment = (m.V'Word' / start_seg + m.Cc({ '', true }) / start_seg * (m.V'Star' * m.V'Word' % lookfor)) *
(m.V'Star' * m.V'Word' % lookfor)^0 * (m.V'Star' * m.V'CheckBnd' % to_seg_end)^-1 / end_seg
+ m.V'Star' * m.V'CheckBnd' * m.Cc(notslash^0),
CheckBnd = #m.P'/' + -1, -- Boundary constraint
-- Word patterns for fixed-length matching
Word = -m.P'*' * m.Ct( m.V('FIRST')^-1 * m.C(m.V'WordAux') ),
WordAux = m.V'Branch' + m.Cf(m.V'Simple'^1 * m.V'Branch'^-1, mt.__mul),
Simple = m.Cg( m.V'Token' * (m.V'Token' % mt.__mul)^0 * (m.V'Boundary' % mt.__mul)^-1),
Boundary = #m.P'/' * m.Cc(#m.P'/') + eof,
Token = m.V'Ques' + m.V'Class' + m.V'Escape' + m.V'Literal',
Star = m.P'*',
Ques = m.P'?' * m.Cc(notslash),
Escape = m.P'\\' * m.C(1) / m.P,
Literal = m.C(letter^1) / m.P,
-- Branch handling for braced conditions
Branch = m.Cmt(m.C(m.V'CondList'), function(s, i, p1, p2)
-- Optimize brace expansion when possible
-- p1: string form of condition list, p2: transformed lua table
if noopt_condlist:match(p1) then
-- Cannot optimize, match till the end
return #s + 1, p2, s:sub(i)
end
-- Find point to cut for optimization
local cut = opt_tail:match(s, i)
if cut then
-- Can optimize: match till cut point
-- true flag tells expand to transform EOF matches to &'/' predicates
return cut, p2, s:sub(i, cut - 1), true
else
-- Cannot optimize
return #s + 1, p2, s:sub(i)
end
end) / expand,
-- Brace expansion handling
CondList = m.Cf(m.P'{' * m.V'Cond' * (m.P',' * m.V'Cond')^1 * m.P'}', add_cond),
Cond = m.Cf((m.C((notcomma + m.P'\\' * 1 - m.S'{}')^1) + m.V'CondList')^1, mul_cond) + m.C(true),
-- Character class handling
Class = m.P'[' * m.C(m.P'!'^-1) * m.Ct(
(m.Ct(m.C(any_utf8) * m.P'-' * m.C(any_utf8 - m.P']')) + m.C(any_utf8 - m.P']'))^0
) * m.P']' / class,
-- Deterministic first character extraction for optimization
FIRST = m.Cg(m.P(function(s, i)
if letter:match(s, i) then return true, s:sub(i, i)
else return false end
end), 'F')
}
-- stylua: ignore end
--- @diagnostic enable
--- @nodoc
g = m.P(g)
--- Parses a raw glob into an |lua-lpeg| pattern.
---
---@param pattern string The raw glob pattern
---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
---@return vim.lpeg.Pattern #An |lua-lpeg| representation of the pattern
function M.to_lpeg(pattern)
local function class(inv, ranges)
local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
if inv == '!' then
patt = P(1) - patt
end
return patt
end
local function condlist(conds, after)
return vim.iter(conds):fold(P(false), function(acc, cond)
return acc + cond * after
end)
end
local function mul(acc, m)
return acc * m
end
local function star(stars, after)
return (-after * (P(1) - pathsep)) ^ #stars * after
end
local function dstar(after)
return (-after * P(1)) ^ 0 * after
end
-- luacheck: push ignore s
local function cut(_s, idx, match)
return idx, match
end
-- luacheck: pop
--- @diagnostic disable-next-line: missing-fields
local p = P({
'Pattern',
Pattern = V('Elem') ^ -1 * V('End'),
Elem = Cmt(
Cf(
(V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
* (V('Elem') + V('End')),
mul
),
cut
),
DStar = (B(pathsep) + -B(P(1)))
* P('**')
* (pathsep * (V('Elem') + V('End')) + V('End'))
/ dstar,
Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
Ques = P('?') * Cc(P(1) - pathsep),
Class = P('[')
* C(P('!') ^ -1)
* Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
/ class,
CondList = P('{') * Ct(V('Cond') * (P(',') * V('Cond')) ^ 0) * P('}') * V('Pattern') / condlist,
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
-- wildcard semantics it usually has.
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
-- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
-- condition means "everything after the {}" where several other options separated by ',' may
-- exist in between that should not be matched by '*'.
Cond = Cmt(Cf((V('Ques') + V('Class') + V('Literal') - S(',}')) ^ 1, mul), cut) + Cc(P(0)),
Literal = P(1) / P,
End = P(-1) * Cc(P(-1)),
})
local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
local lpeg_pattern = g:match(pattern) --[[@as vim.lpeg.Pattern?]]
assert(lpeg_pattern, 'Invalid glob')
return lpeg_pattern
end

View File

@@ -18,6 +18,7 @@ describe('glob', function()
eq(true, match('', ''))
eq(false, match('', 'a'))
eq(true, match('a', 'a'))
eq(true, match('.', '.'))
eq(true, match('/', '/'))
eq(true, match('abc', 'abc'))
eq(false, match('abc', 'abcdef'))
@@ -35,7 +36,8 @@ describe('glob', function()
end)
it('should match * wildcards', function()
eq(false, match('*', ''))
eq(true, match('*', ''))
eq(true, match('*', ' '))
eq(true, match('*', 'a'))
eq(false, match('*', '/'))
eq(false, match('*', '/a'))
@@ -43,6 +45,7 @@ describe('glob', function()
eq(true, match('*', 'aaa'))
eq(true, match('*a', 'aa'))
eq(true, match('*a', 'abca'))
eq(true, match('*.ts', '.ts'))
eq(true, match('*.txt', 'file.txt'))
eq(false, match('*.txt', 'file.txtxt'))
eq(false, match('*.txt', 'dir/file.txt'))
@@ -62,18 +65,13 @@ describe('glob', function()
eq(false, match('dir/*/file.txt', 'dir/file.txt'))
eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
-- The spec does not describe this, but VSCode only interprets ** when it's by
-- itself in a path segment, and otherwise interprets ** as consecutive * directives.
-- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
eq(true, match('**c', 'abc'))
eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
eq(false, match('**c', 'bc'))
eq(true, match('a**', 'abcd'))
eq(true, match('**d', 'abcd'))
eq(false, match('a**', 'abc/d'))
eq(false, match('**d', 'abc/d'))
eq(true, match('a*b*c*d*e*', 'axbxcxdxe'))
eq(true, match('a*b*c*d*e*', 'axbxcxdxexxx'))
eq(true, match('a*b?c*x', 'abxbbxdbxebxczzx'))
eq(false, match('a*b?c*x', 'abxbbxdbxebxczzy'))
eq(true, match('a*b*[cy]*d*e*', 'axbxcxdxexxx'))
eq(true, match('a*b*[cy]*d*e*', 'axbxyxdxexxx'))
eq(true, match('a*b*[cy]*d*e*', 'axbxxxyxdxexxx'))
end)
it('should match ? wildcards', function()
@@ -84,6 +82,11 @@ describe('glob', function()
eq(true, match('??', 'ab'))
eq(true, match('a?c', 'abc'))
eq(false, match('a?c', 'a/c'))
eq(false, match('a/', 'a/.b'))
eq(true, match('?/?', 'a/b'))
eq(true, match('/??', '/ab'))
eq(true, match('/?b', '/ab'))
eq(false, match('foo?bar', 'foo/bar'))
end)
it('should match ** wildcards', function()
@@ -99,7 +102,7 @@ describe('glob', function()
eq(true, match('/**', '/'))
eq(true, match('/**', '/a/b/c'))
eq(true, match('**/', '')) -- **/ absorbs trailing /
eq(true, match('**/', '/a/b/c'))
eq(false, match('**/', '/a/b/c'))
eq(true, match('**/**', ''))
eq(true, match('**/**', 'a'))
eq(false, match('a/**', ''))
@@ -134,20 +137,9 @@ describe('glob', function()
end)
it('should match {} groups', function()
eq(true, match('{}', ''))
eq(false, match('{}', 'a'))
eq(true, match('a{}', 'a'))
eq(true, match('{}a', 'a'))
eq(true, match('{,}', ''))
eq(true, match('{a,}', ''))
eq(true, match('{a,}', 'a'))
eq(true, match('{a}', 'a'))
eq(false, match('{a}', 'aa'))
eq(false, match('{a}', 'ab'))
eq(true, match('{a?c}', 'abc'))
eq(false, match('{ab}', 'a'))
eq(false, match('{ab}', 'b'))
eq(true, match('{ab}', 'ab'))
eq(true, match('{a,b}', 'a'))
eq(true, match('{a,b}', 'b'))
eq(false, match('{a,b}', 'ab'))
@@ -155,7 +147,22 @@ describe('glob', function()
eq(false, match('{ab,cd}', 'a'))
eq(true, match('{ab,cd}', 'cd'))
eq(true, match('{a,b,c}', 'c'))
eq(false, match('{a,{b,c}}', 'c')) -- {} cannot nest
eq(true, match('{a,{b,c}}', 'c'))
eq(true, match('a{,/}*.txt', 'a.txt'))
eq(true, match('a{,/}*.txt', 'ab.txt'))
eq(true, match('a{,/}*.txt', 'a/b.txt'))
eq(true, match('a{,/}*.txt', 'a/ab.txt'))
eq(true, match('a/{a{a,b},b}', 'a/aa'))
eq(true, match('a/{a{a,b},b}', 'a/ab'))
eq(false, match('a/{a{a,b},b}', 'a/ac'))
eq(true, match('a/{a{a,b},b}', 'a/b'))
eq(false, match('a/{a{a,b},b}', 'a/c'))
eq(true, match('foo{bar,b*z}', 'foobar'))
eq(true, match('foo{bar,b*z}', 'foobuzz'))
eq(true, match('foo{bar,b*z}', 'foobarz'))
eq(true, match('{a,b}/c/{d,e}/**/*est.ts', 'a/c/d/one/two/three.test.ts'))
eq(true, match('{a,{d,e}b}/c', 'a/c'))
eq(true, match('{**/a,**/b}', 'b'))
end)
it('should match [] groups', function()
@@ -181,6 +188,13 @@ describe('glob', function()
eq(true, match('[a-zA-Z0-9]', 'Z'))
eq(true, match('[a-zA-Z0-9]', '9'))
eq(false, match('[a-zA-Z0-9]', '&'))
eq(true, match('[?]', '?'))
eq(false, match('[?]', 'a'))
eq(true, match('[*]', '*'))
eq(false, match('[*]', 'a'))
eq(true, match('[\\!]', '!'))
eq(true, match('a\\*b', 'a*b'))
eq(false, match('a\\*b', 'axb'))
end)
it('should match [!...] groups', function()
@@ -202,8 +216,7 @@ describe('glob', function()
it('should handle long patterns', function()
-- lpeg has a recursion limit of 200 by default, make sure the grammar does trigger it on
-- strings longer than that
local fill_200 =
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
local fill_200 = ('a'):rep(200)
eq(200, fill_200:len())
local long_lit = fill_200 .. 'a'
eq(false, match(long_lit, 'b'))
@@ -212,6 +225,21 @@ describe('glob', function()
eq(true, match(long_pat, fill_200 .. 'a/b/c/d.c'))
end)
-- New test for unicode patterns from assets
it('should match unicode patterns', function()
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.ts'))
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.tsx'))
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.js'))
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsx'))
eq(false, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsxxxxxxxx'))
eq(true, match('*é*', 'café noir'))
eq(true, match('caf*noir', 'café noir'))
eq(true, match('caf*noir', 'cafeenoir'))
eq(true, match('F[ë£a]', ''))
eq(true, match('F[ë£a]', ''))
eq(true, match('F[ë£a]', 'Fa'))
end)
it('should match complex patterns', function()
eq(false, match('**/*.{c,h}', ''))
eq(false, match('**/*.{c,h}', 'c'))