mirror of
https://github.com/neovim/neovim.git
synced 2026-01-04 02:17:09 +10:00
feat(glob): new Glob implementation based on Peglob #33605
|vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that provides ~50% speedup for complex patterns. The implementation restores support for nested braces and follows LSP 3.17 specification with additional constraints for improved correctness and resistance to backtracking edge cases.
This commit is contained in:
@@ -3236,30 +3236,51 @@ vim.fs.root({source}, {marker}) *vim.fs.root()*
|
||||
==============================================================================
|
||||
Lua module: vim.glob *vim.glob*
|
||||
|
||||
Glob-to-LPeg Converter (Peglob) This module converts glob patterns to LPeg
|
||||
patterns according to the LSP 3.17 specification:
|
||||
https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
|
||||
|
||||
Glob grammar overview:
|
||||
• `*` to match zero or more characters in a path segment
|
||||
• `?` to match on one character in a path segment
|
||||
• `**` to match any number of path segments, including none
|
||||
• `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript
|
||||
files)
|
||||
• `[]` to declare a range of characters to match in a path segment (e.g.,
|
||||
`example.[0-9]` to match on `example.0`, `example.1`, …)
|
||||
• `[!...]` to negate a range of characters to match in a path segment (e.g.,
|
||||
`example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
|
||||
|
||||
Additional constraints:
|
||||
• A Glob pattern must match an entire path, with partial matches considered
|
||||
failures.
|
||||
• The pattern only determines success or failure, without specifying which
|
||||
parts correspond to which characters.
|
||||
• A path segment is the portion of a path between two adjacent path separators
|
||||
(`/`), or between the start/end of the path and the nearest separator.
|
||||
• The `**` (globstar) pattern matches zero or more path segments, including
|
||||
intervening separators (`/`). Within pattern strings, `**` must be delimited
|
||||
by path separators (`/`) or pattern boundaries and cannot be adjacent to any
|
||||
characters other than `/`. If `**` is not the final element, it must be
|
||||
followed by `/`.
|
||||
• `{}` (braced conditions) contains valid Glob patterns as branches, separated
|
||||
by commas. Commas are exclusively used for separating branches and cannot
|
||||
appear within a branch for any other purpose. Nested `{}` structures are
|
||||
allowed, but `{}` must contain at least two branches—zero or one branch is
|
||||
not permitted.
|
||||
• In `[]` or `[!...]`, a character range consists of character intervals
|
||||
(e.g., `a-z`) or individual characters (e.g., `w`). A range including `/`
|
||||
won’t match that character.
|
||||
|
||||
|
||||
vim.glob.to_lpeg({pattern}) *vim.glob.to_lpeg()*
|
||||
Parses a raw glob into an |lua-lpeg| pattern.
|
||||
|
||||
This uses glob semantics from LSP 3.17.0:
|
||||
https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
|
||||
|
||||
Glob patterns can have the following syntax:
|
||||
• `*` to match one or more characters in a path segment
|
||||
• `?` to match on one character in a path segment
|
||||
• `**` to match any number of path segments, including none
|
||||
• `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and
|
||||
JavaScript files)
|
||||
• `[]` to declare a range of characters to match in a path segment (e.g.,
|
||||
`example.[0-9]` to match on `example.0`, `example.1`, …)
|
||||
• `[!...]` to negate a range of characters to match in a path segment
|
||||
(e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not
|
||||
`example.0`)
|
||||
|
||||
Parameters: ~
|
||||
• {pattern} (`string`) The raw glob pattern
|
||||
|
||||
Return: ~
|
||||
(`vim.lpeg.Pattern`) pattern An |lua-lpeg| representation of the
|
||||
pattern
|
||||
(`vim.lpeg.Pattern`) An |lua-lpeg| representation of the pattern
|
||||
|
||||
|
||||
==============================================================================
|
||||
|
||||
@@ -175,7 +175,11 @@ OPTIONS
|
||||
|
||||
PERFORMANCE
|
||||
|
||||
• todo
|
||||
• |vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that
|
||||
provides ~50% speedup for complex patterns. The implementation restores
|
||||
support for nested braces and follows LSP 3.17 specification with
|
||||
additional constraints for improved correctness and resistance to
|
||||
backtracking edge cases.
|
||||
|
||||
PLUGINS
|
||||
|
||||
|
||||
@@ -1,93 +1,375 @@
|
||||
local lpeg = vim.lpeg
|
||||
local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
|
||||
local C, Cc, Ct, Cf, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cmt
|
||||
|
||||
local M = {}
|
||||
|
||||
local pathsep = P('/')
|
||||
|
||||
--- Parses a raw glob into an |lua-lpeg| pattern.
|
||||
--- @brief Glob-to-LPeg Converter (Peglob)
|
||||
--- This module converts glob patterns to LPeg patterns according to the LSP 3.17 specification:
|
||||
--- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
|
||||
---
|
||||
--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
|
||||
---
|
||||
--- Glob patterns can have the following syntax:
|
||||
--- - `*` to match one or more characters in a path segment
|
||||
--- Glob grammar overview:
|
||||
--- - `*` to match zero or more characters in a path segment
|
||||
--- - `?` to match on one character in a path segment
|
||||
--- - `**` to match any number of path segments, including none
|
||||
--- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files)
|
||||
--- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
|
||||
--- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
|
||||
--- - `[]` to declare a range of characters to match in a path segment
|
||||
--- (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
|
||||
--- - `[!...]` to negate a range of characters to match in a path segment
|
||||
--- (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
|
||||
---
|
||||
--- Additional constraints:
|
||||
--- - A Glob pattern must match an entire path, with partial matches
|
||||
--- considered failures.
|
||||
--- - The pattern only determines success or failure, without specifying
|
||||
--- which parts correspond to which characters.
|
||||
--- - A *path segment* is the portion of a path between two adjacent path
|
||||
--- separators (`/`), or between the start/end of the path and the nearest
|
||||
--- separator.
|
||||
--- - The `**` (*globstar*) pattern matches zero or more path segments,
|
||||
--- including intervening separators (`/`). Within pattern strings, `**`
|
||||
--- must be delimited by path separators (`/`) or pattern boundaries and
|
||||
--- cannot be adjacent to any characters other than `/`. If `**` is not
|
||||
--- the final element, it must be followed by `/`.
|
||||
--- - `{}` (*braced conditions*) contains valid Glob patterns as branches,
|
||||
--- separated by commas. Commas are exclusively used for separating
|
||||
--- branches and cannot appear within a branch for any other purpose.
|
||||
--- Nested `{}` structures are allowed, but `{}` must contain at least two
|
||||
--- branches—zero or one branch is not permitted.
|
||||
--- - In `[]` or `[!...]`, a *character range* consists of character
|
||||
--- intervals (e.g., `a-z`) or individual characters (e.g., `w`). A range
|
||||
--- including `/` won’t match that character.
|
||||
|
||||
--- @diagnostic disable: missing-fields
|
||||
|
||||
local m = vim.lpeg
|
||||
local mt = getmetatable(m.P(0))
|
||||
local re = vim.re
|
||||
local bit = require('bit')
|
||||
|
||||
local M = {}
|
||||
|
||||
-- Basic patterns for matching glob components
|
||||
local letter = m.P(1) - m.S(',*?[]{}/\\') -- Any character except special glob characters
|
||||
local slash = m.P '/' * m.Cc(m.P '/') -- Path separator with capture
|
||||
local notslash = m.P(1) - m.P '/' -- Any character except path separator
|
||||
local notcomma = m.P(1) - m.S(',\\') -- Any character except comma and backslash
|
||||
|
||||
--- Handle EOF, considering whether we're in a segment or not
|
||||
--- @type vim.lpeg.Pattern
|
||||
local eof = -1
|
||||
* m.Cb('inseg')
|
||||
/ function(flag)
|
||||
if flag then
|
||||
return #m.P '/'
|
||||
else
|
||||
return m.P(-1)
|
||||
end
|
||||
end
|
||||
|
||||
---@alias pat_table { F: string?, [1]: string, [2]: vim.lpeg.Pattern }
|
||||
---@alias seg_part { [string]: any, [integer]: pat_table }
|
||||
|
||||
--- @param p pat_table Initial segment pattern data
|
||||
--- @return seg_part Segment structure with start pattern
|
||||
local function start_seg(p)
|
||||
return { s = p[2], e = true, n = 0 }
|
||||
end
|
||||
|
||||
--- @param t seg_part Segment structure
|
||||
--- @param p pat_table Pattern to look for
|
||||
--- @return table Updated segment structure
|
||||
local function lookfor(t, p)
|
||||
t.n = t.n + 1
|
||||
t[t.n] = p
|
||||
return t
|
||||
end
|
||||
|
||||
--- @param t seg_part Segment structure
|
||||
--- @return table Segment structure with end pattern
|
||||
local function to_seg_end(t)
|
||||
t.e = notslash ^ 0
|
||||
return t
|
||||
end
|
||||
|
||||
--- Constructs a segment matching pattern from collected components
|
||||
---
|
||||
--- @param t seg_part Segment structure with patterns
|
||||
--- @return vim.lpeg.Pattern Complete segment match pattern
|
||||
local function end_seg(t)
|
||||
--- @type table<any,any>
|
||||
local seg_grammar = { 's' }
|
||||
if t.n > 0 then
|
||||
seg_grammar.s = t.s
|
||||
for i = 1, t.n do
|
||||
local rname = t[i][1]
|
||||
if not seg_grammar[rname] then
|
||||
-- Optimize search when deterministic first character is available
|
||||
if t[i].F then
|
||||
seg_grammar[rname] = t[i][2] + notslash * (notslash - m.P(t[i].F)) ^ 0 * m.V(rname)
|
||||
else
|
||||
seg_grammar[rname] = t[i][2] + notslash * m.V(rname)
|
||||
end
|
||||
end
|
||||
seg_grammar.s = seg_grammar.s * m.V(rname)
|
||||
end
|
||||
if t.e then
|
||||
seg_grammar.s = seg_grammar.s * t.e
|
||||
end
|
||||
return m.P(seg_grammar)
|
||||
else
|
||||
seg_grammar.s = t.s
|
||||
if t.e then
|
||||
seg_grammar.s = seg_grammar.s * t.e
|
||||
end
|
||||
return seg_grammar.s
|
||||
end
|
||||
end
|
||||
|
||||
--- @param p vim.lpeg.Pattern Pattern directly after `**/`
|
||||
--- @return vim.lpeg.Pattern LPeg pattern for `**/p`
|
||||
local function dseg(p)
|
||||
return m.P { p + notslash ^ 0 * m.P '/' * m.V(1) }
|
||||
end
|
||||
|
||||
--- @type (vim.lpeg.Pattern|table)
|
||||
local g = nil
|
||||
|
||||
--- Multiplies conditions for braced expansion (Cartesian product)
|
||||
---
|
||||
--- @param a string|string[] First part
|
||||
--- @param b string|string[] Second part
|
||||
--- @return string|string[] Cartesian product of values
|
||||
local function mul_cond(a, b)
|
||||
if type(a) == 'string' then
|
||||
if type(b) == 'string' then
|
||||
return a .. b
|
||||
elseif type(b) == 'table' then
|
||||
for i = 1, #b do
|
||||
b[i] = a .. b[i]
|
||||
end
|
||||
return b
|
||||
else
|
||||
return a
|
||||
end
|
||||
elseif type(a) == 'table' then
|
||||
if type(b) == 'string' then
|
||||
for i = 1, #a do
|
||||
a[i] = a[i] .. b
|
||||
end
|
||||
return a
|
||||
elseif type(b) == 'table' then
|
||||
--- @type string[]
|
||||
local res = {}
|
||||
local idx = 0
|
||||
for i = 1, #a do
|
||||
for j = 1, #b do
|
||||
idx = idx + 1
|
||||
res[idx] = a[i] .. b[j]
|
||||
end
|
||||
end
|
||||
return res
|
||||
else
|
||||
return a
|
||||
end
|
||||
else
|
||||
return b
|
||||
end
|
||||
end
|
||||
|
||||
--- Combines alternatives in braced patterns
|
||||
---
|
||||
--- @param a string|table First part
|
||||
--- @param b string|table Second part
|
||||
--- @return table #Combined alternatives
|
||||
local function add_cond(a, b)
|
||||
if type(a) == 'string' then
|
||||
if type(b) == 'string' then
|
||||
return { a, b }
|
||||
elseif type(b) == 'table' then
|
||||
table.insert(b, 1, a)
|
||||
return b
|
||||
end
|
||||
elseif type(a) == 'table' then
|
||||
if type(b) == 'string' then
|
||||
table.insert(a, b)
|
||||
return a
|
||||
elseif type(b) == 'table' then
|
||||
for i = 1, #b do
|
||||
table.insert(a, b[i])
|
||||
end
|
||||
return a
|
||||
end
|
||||
--- @diagnostic disable-next-line: missing-return
|
||||
end
|
||||
end
|
||||
|
||||
--- Expands patterns handling segment boundaries
|
||||
--- `#` prefix is added for sub-grammar to detect in-segment flag
|
||||
---
|
||||
---@param a (any[]|vim.lpeg.Pattern[]) Array of patterns
|
||||
---@param b string Tail string
|
||||
---@param inseg boolean Whether inside a path segment
|
||||
---@return vim.lpeg.Pattern #Expanded pattern
|
||||
local function expand(a, b, inseg)
|
||||
for i = 1, #a do
|
||||
if inseg then
|
||||
a[i] = '#' .. a[i]
|
||||
end
|
||||
a[i] = g:match(a[i] .. b)
|
||||
end
|
||||
local res = a[1]
|
||||
for i = 2, #a do
|
||||
res = res + a[i]
|
||||
end
|
||||
return res
|
||||
end
|
||||
|
||||
--- Converts a UTF-8 character to its Unicode codepoint
|
||||
---
|
||||
--- @param utf8_str string UTF-8 character
|
||||
--- @return number #Codepoint value
|
||||
local function to_codepoint(utf8_str)
|
||||
local codepoint = 0
|
||||
local byte_count = 0
|
||||
|
||||
for i = 1, #utf8_str do
|
||||
local byte = utf8_str:byte(i)
|
||||
|
||||
if byte_count ~= 0 then
|
||||
codepoint = bit.bor(bit.lshift(codepoint, 6), bit.band(byte, 0x3F))
|
||||
byte_count = byte_count - 1
|
||||
else
|
||||
if byte < 0x80 then
|
||||
codepoint = byte
|
||||
elseif byte < 0xE0 then
|
||||
byte_count = 1
|
||||
codepoint = bit.band(byte, 0x1F)
|
||||
elseif byte < 0xF0 then
|
||||
byte_count = 2
|
||||
codepoint = bit.band(byte, 0x0F)
|
||||
else
|
||||
byte_count = 3
|
||||
codepoint = bit.band(byte, 0x07)
|
||||
end
|
||||
end
|
||||
|
||||
if byte_count == 0 then
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
return codepoint
|
||||
end
|
||||
|
||||
--- Pattern for matching UTF-8 characters
|
||||
local cont = m.R('\128\191')
|
||||
local any_utf8 = m.R('\0\127')
|
||||
+ m.R('\194\223') * cont
|
||||
+ m.R('\224\239') * cont * cont
|
||||
+ m.R('\240\244') * cont * cont * cont
|
||||
|
||||
--- Creates a character class pattern for glob ranges
|
||||
--- @param inv string Inversion flag ('!' or '')
|
||||
--- @param ranges (string|string[])[] Character ranges
|
||||
--- @return vim.lpeg.Pattern #Character class pattern
|
||||
local function class(inv, ranges)
|
||||
local patt = m.P(false)
|
||||
if #ranges == 0 then
|
||||
if inv == '!' then
|
||||
return m.P '[!]'
|
||||
else
|
||||
return m.P '[]'
|
||||
end
|
||||
end
|
||||
for _, v in ipairs(ranges) do
|
||||
patt = patt + (type(v) == 'table' and m.utfR(to_codepoint(v[1]), to_codepoint(v[2])) or m.P(v))
|
||||
end
|
||||
if inv == '!' then
|
||||
patt = m.P(1) - patt --[[@as vim.lpeg.Pattern]]
|
||||
end
|
||||
return patt - m.P '/'
|
||||
end
|
||||
|
||||
-- Parse constraints for optimizing braced conditions
|
||||
local noopt_condlist = re.compile [[
|
||||
s <- '/' / '**' / . [^/*]* s
|
||||
]]
|
||||
|
||||
local opt_tail = re.compile [[
|
||||
s <- (!'**' [^{/])* &'/'
|
||||
]]
|
||||
|
||||
-- stylua: ignore start
|
||||
--- @nodoc
|
||||
--- @diagnostic disable
|
||||
--- Main grammar for glob pattern matching
|
||||
g = {
|
||||
'Glob',
|
||||
Glob = (m.P'#' * m.Cg(m.Cc(true), 'inseg') + m.Cg(m.Cc(false), 'inseg')) *
|
||||
m.Cf(m.V'Element'^-1 * (slash * m.V'Element')^0 * (slash^-1 * eof), mt.__mul),
|
||||
-- Elements handle segments, globstar patterns
|
||||
Element = m.V'DSeg' + m.V'DSEnd' + m.Cf(m.V'Segment' * (slash * m.V'Segment')^0 * (slash * eof + eof^-1), mt.__mul),
|
||||
-- Globstar patterns
|
||||
DSeg = m.P'**/' * ((m.V'Element' + eof) / dseg),
|
||||
DSEnd = m.P'**' * -1 * m.Cc(m.P(1)^0),
|
||||
-- Segment handling with word and star patterns
|
||||
Segment = (m.V'Word' / start_seg + m.Cc({ '', true }) / start_seg * (m.V'Star' * m.V'Word' % lookfor)) *
|
||||
(m.V'Star' * m.V'Word' % lookfor)^0 * (m.V'Star' * m.V'CheckBnd' % to_seg_end)^-1 / end_seg
|
||||
+ m.V'Star' * m.V'CheckBnd' * m.Cc(notslash^0),
|
||||
CheckBnd = #m.P'/' + -1, -- Boundary constraint
|
||||
|
||||
-- Word patterns for fixed-length matching
|
||||
Word = -m.P'*' * m.Ct( m.V('FIRST')^-1 * m.C(m.V'WordAux') ),
|
||||
WordAux = m.V'Branch' + m.Cf(m.V'Simple'^1 * m.V'Branch'^-1, mt.__mul),
|
||||
Simple = m.Cg( m.V'Token' * (m.V'Token' % mt.__mul)^0 * (m.V'Boundary' % mt.__mul)^-1),
|
||||
Boundary = #m.P'/' * m.Cc(#m.P'/') + eof,
|
||||
Token = m.V'Ques' + m.V'Class' + m.V'Escape' + m.V'Literal',
|
||||
Star = m.P'*',
|
||||
Ques = m.P'?' * m.Cc(notslash),
|
||||
Escape = m.P'\\' * m.C(1) / m.P,
|
||||
Literal = m.C(letter^1) / m.P,
|
||||
|
||||
-- Branch handling for braced conditions
|
||||
Branch = m.Cmt(m.C(m.V'CondList'), function(s, i, p1, p2)
|
||||
-- Optimize brace expansion when possible
|
||||
-- p1: string form of condition list, p2: transformed lua table
|
||||
if noopt_condlist:match(p1) then
|
||||
-- Cannot optimize, match till the end
|
||||
return #s + 1, p2, s:sub(i)
|
||||
end
|
||||
-- Find point to cut for optimization
|
||||
local cut = opt_tail:match(s, i)
|
||||
if cut then
|
||||
-- Can optimize: match till cut point
|
||||
-- true flag tells expand to transform EOF matches to &'/' predicates
|
||||
return cut, p2, s:sub(i, cut - 1), true
|
||||
else
|
||||
-- Cannot optimize
|
||||
return #s + 1, p2, s:sub(i)
|
||||
end
|
||||
end) / expand,
|
||||
-- Brace expansion handling
|
||||
CondList = m.Cf(m.P'{' * m.V'Cond' * (m.P',' * m.V'Cond')^1 * m.P'}', add_cond),
|
||||
Cond = m.Cf((m.C((notcomma + m.P'\\' * 1 - m.S'{}')^1) + m.V'CondList')^1, mul_cond) + m.C(true),
|
||||
|
||||
-- Character class handling
|
||||
Class = m.P'[' * m.C(m.P'!'^-1) * m.Ct(
|
||||
(m.Ct(m.C(any_utf8) * m.P'-' * m.C(any_utf8 - m.P']')) + m.C(any_utf8 - m.P']'))^0
|
||||
) * m.P']' / class,
|
||||
|
||||
-- Deterministic first character extraction for optimization
|
||||
FIRST = m.Cg(m.P(function(s, i)
|
||||
if letter:match(s, i) then return true, s:sub(i, i)
|
||||
else return false end
|
||||
end), 'F')
|
||||
}
|
||||
-- stylua: ignore end
|
||||
--- @diagnostic enable
|
||||
|
||||
--- @nodoc
|
||||
g = m.P(g)
|
||||
|
||||
--- Parses a raw glob into an |lua-lpeg| pattern.
|
||||
---
|
||||
---@param pattern string The raw glob pattern
|
||||
---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
|
||||
---@return vim.lpeg.Pattern #An |lua-lpeg| representation of the pattern
|
||||
function M.to_lpeg(pattern)
|
||||
local function class(inv, ranges)
|
||||
local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
|
||||
if inv == '!' then
|
||||
patt = P(1) - patt
|
||||
end
|
||||
return patt
|
||||
end
|
||||
|
||||
local function condlist(conds, after)
|
||||
return vim.iter(conds):fold(P(false), function(acc, cond)
|
||||
return acc + cond * after
|
||||
end)
|
||||
end
|
||||
|
||||
local function mul(acc, m)
|
||||
return acc * m
|
||||
end
|
||||
|
||||
local function star(stars, after)
|
||||
return (-after * (P(1) - pathsep)) ^ #stars * after
|
||||
end
|
||||
|
||||
local function dstar(after)
|
||||
return (-after * P(1)) ^ 0 * after
|
||||
end
|
||||
|
||||
-- luacheck: push ignore s
|
||||
local function cut(_s, idx, match)
|
||||
return idx, match
|
||||
end
|
||||
-- luacheck: pop
|
||||
|
||||
--- @diagnostic disable-next-line: missing-fields
|
||||
local p = P({
|
||||
'Pattern',
|
||||
Pattern = V('Elem') ^ -1 * V('End'),
|
||||
Elem = Cmt(
|
||||
Cf(
|
||||
(V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
|
||||
* (V('Elem') + V('End')),
|
||||
mul
|
||||
),
|
||||
cut
|
||||
),
|
||||
DStar = (B(pathsep) + -B(P(1)))
|
||||
* P('**')
|
||||
* (pathsep * (V('Elem') + V('End')) + V('End'))
|
||||
/ dstar,
|
||||
Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
|
||||
Ques = P('?') * Cc(P(1) - pathsep),
|
||||
Class = P('[')
|
||||
* C(P('!') ^ -1)
|
||||
* Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
|
||||
/ class,
|
||||
CondList = P('{') * Ct(V('Cond') * (P(',') * V('Cond')) ^ 0) * P('}') * V('Pattern') / condlist,
|
||||
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
|
||||
-- wildcard semantics it usually has.
|
||||
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
|
||||
-- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
|
||||
-- condition means "everything after the {}" where several other options separated by ',' may
|
||||
-- exist in between that should not be matched by '*'.
|
||||
Cond = Cmt(Cf((V('Ques') + V('Class') + V('Literal') - S(',}')) ^ 1, mul), cut) + Cc(P(0)),
|
||||
Literal = P(1) / P,
|
||||
End = P(-1) * Cc(P(-1)),
|
||||
})
|
||||
|
||||
local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
|
||||
local lpeg_pattern = g:match(pattern) --[[@as vim.lpeg.Pattern?]]
|
||||
assert(lpeg_pattern, 'Invalid glob')
|
||||
return lpeg_pattern
|
||||
end
|
||||
|
||||
@@ -18,6 +18,7 @@ describe('glob', function()
|
||||
eq(true, match('', ''))
|
||||
eq(false, match('', 'a'))
|
||||
eq(true, match('a', 'a'))
|
||||
eq(true, match('.', '.'))
|
||||
eq(true, match('/', '/'))
|
||||
eq(true, match('abc', 'abc'))
|
||||
eq(false, match('abc', 'abcdef'))
|
||||
@@ -35,7 +36,8 @@ describe('glob', function()
|
||||
end)
|
||||
|
||||
it('should match * wildcards', function()
|
||||
eq(false, match('*', ''))
|
||||
eq(true, match('*', ''))
|
||||
eq(true, match('*', ' '))
|
||||
eq(true, match('*', 'a'))
|
||||
eq(false, match('*', '/'))
|
||||
eq(false, match('*', '/a'))
|
||||
@@ -43,6 +45,7 @@ describe('glob', function()
|
||||
eq(true, match('*', 'aaa'))
|
||||
eq(true, match('*a', 'aa'))
|
||||
eq(true, match('*a', 'abca'))
|
||||
eq(true, match('*.ts', '.ts'))
|
||||
eq(true, match('*.txt', 'file.txt'))
|
||||
eq(false, match('*.txt', 'file.txtxt'))
|
||||
eq(false, match('*.txt', 'dir/file.txt'))
|
||||
@@ -62,18 +65,13 @@ describe('glob', function()
|
||||
eq(false, match('dir/*/file.txt', 'dir/file.txt'))
|
||||
eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
|
||||
eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
|
||||
|
||||
-- The spec does not describe this, but VSCode only interprets ** when it's by
|
||||
-- itself in a path segment, and otherwise interprets ** as consecutive * directives.
|
||||
-- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
|
||||
eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
|
||||
eq(true, match('**c', 'abc'))
|
||||
eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
|
||||
eq(false, match('**c', 'bc'))
|
||||
eq(true, match('a**', 'abcd'))
|
||||
eq(true, match('**d', 'abcd'))
|
||||
eq(false, match('a**', 'abc/d'))
|
||||
eq(false, match('**d', 'abc/d'))
|
||||
eq(true, match('a*b*c*d*e*', 'axbxcxdxe'))
|
||||
eq(true, match('a*b*c*d*e*', 'axbxcxdxexxx'))
|
||||
eq(true, match('a*b?c*x', 'abxbbxdbxebxczzx'))
|
||||
eq(false, match('a*b?c*x', 'abxbbxdbxebxczzy'))
|
||||
eq(true, match('a*b*[cy]*d*e*', 'axbxcxdxexxx'))
|
||||
eq(true, match('a*b*[cy]*d*e*', 'axbxyxdxexxx'))
|
||||
eq(true, match('a*b*[cy]*d*e*', 'axbxxxyxdxexxx'))
|
||||
end)
|
||||
|
||||
it('should match ? wildcards', function()
|
||||
@@ -84,6 +82,11 @@ describe('glob', function()
|
||||
eq(true, match('??', 'ab'))
|
||||
eq(true, match('a?c', 'abc'))
|
||||
eq(false, match('a?c', 'a/c'))
|
||||
eq(false, match('a/', 'a/.b'))
|
||||
eq(true, match('?/?', 'a/b'))
|
||||
eq(true, match('/??', '/ab'))
|
||||
eq(true, match('/?b', '/ab'))
|
||||
eq(false, match('foo?bar', 'foo/bar'))
|
||||
end)
|
||||
|
||||
it('should match ** wildcards', function()
|
||||
@@ -99,7 +102,7 @@ describe('glob', function()
|
||||
eq(true, match('/**', '/'))
|
||||
eq(true, match('/**', '/a/b/c'))
|
||||
eq(true, match('**/', '')) -- **/ absorbs trailing /
|
||||
eq(true, match('**/', '/a/b/c'))
|
||||
eq(false, match('**/', '/a/b/c'))
|
||||
eq(true, match('**/**', ''))
|
||||
eq(true, match('**/**', 'a'))
|
||||
eq(false, match('a/**', ''))
|
||||
@@ -134,20 +137,9 @@ describe('glob', function()
|
||||
end)
|
||||
|
||||
it('should match {} groups', function()
|
||||
eq(true, match('{}', ''))
|
||||
eq(false, match('{}', 'a'))
|
||||
eq(true, match('a{}', 'a'))
|
||||
eq(true, match('{}a', 'a'))
|
||||
eq(true, match('{,}', ''))
|
||||
eq(true, match('{a,}', ''))
|
||||
eq(true, match('{a,}', 'a'))
|
||||
eq(true, match('{a}', 'a'))
|
||||
eq(false, match('{a}', 'aa'))
|
||||
eq(false, match('{a}', 'ab'))
|
||||
eq(true, match('{a?c}', 'abc'))
|
||||
eq(false, match('{ab}', 'a'))
|
||||
eq(false, match('{ab}', 'b'))
|
||||
eq(true, match('{ab}', 'ab'))
|
||||
eq(true, match('{a,b}', 'a'))
|
||||
eq(true, match('{a,b}', 'b'))
|
||||
eq(false, match('{a,b}', 'ab'))
|
||||
@@ -155,7 +147,22 @@ describe('glob', function()
|
||||
eq(false, match('{ab,cd}', 'a'))
|
||||
eq(true, match('{ab,cd}', 'cd'))
|
||||
eq(true, match('{a,b,c}', 'c'))
|
||||
eq(false, match('{a,{b,c}}', 'c')) -- {} cannot nest
|
||||
eq(true, match('{a,{b,c}}', 'c'))
|
||||
eq(true, match('a{,/}*.txt', 'a.txt'))
|
||||
eq(true, match('a{,/}*.txt', 'ab.txt'))
|
||||
eq(true, match('a{,/}*.txt', 'a/b.txt'))
|
||||
eq(true, match('a{,/}*.txt', 'a/ab.txt'))
|
||||
eq(true, match('a/{a{a,b},b}', 'a/aa'))
|
||||
eq(true, match('a/{a{a,b},b}', 'a/ab'))
|
||||
eq(false, match('a/{a{a,b},b}', 'a/ac'))
|
||||
eq(true, match('a/{a{a,b},b}', 'a/b'))
|
||||
eq(false, match('a/{a{a,b},b}', 'a/c'))
|
||||
eq(true, match('foo{bar,b*z}', 'foobar'))
|
||||
eq(true, match('foo{bar,b*z}', 'foobuzz'))
|
||||
eq(true, match('foo{bar,b*z}', 'foobarz'))
|
||||
eq(true, match('{a,b}/c/{d,e}/**/*est.ts', 'a/c/d/one/two/three.test.ts'))
|
||||
eq(true, match('{a,{d,e}b}/c', 'a/c'))
|
||||
eq(true, match('{**/a,**/b}', 'b'))
|
||||
end)
|
||||
|
||||
it('should match [] groups', function()
|
||||
@@ -181,6 +188,13 @@ describe('glob', function()
|
||||
eq(true, match('[a-zA-Z0-9]', 'Z'))
|
||||
eq(true, match('[a-zA-Z0-9]', '9'))
|
||||
eq(false, match('[a-zA-Z0-9]', '&'))
|
||||
eq(true, match('[?]', '?'))
|
||||
eq(false, match('[?]', 'a'))
|
||||
eq(true, match('[*]', '*'))
|
||||
eq(false, match('[*]', 'a'))
|
||||
eq(true, match('[\\!]', '!'))
|
||||
eq(true, match('a\\*b', 'a*b'))
|
||||
eq(false, match('a\\*b', 'axb'))
|
||||
end)
|
||||
|
||||
it('should match [!...] groups', function()
|
||||
@@ -202,8 +216,7 @@ describe('glob', function()
|
||||
it('should handle long patterns', function()
|
||||
-- lpeg has a recursion limit of 200 by default, make sure the grammar does trigger it on
|
||||
-- strings longer than that
|
||||
local fill_200 =
|
||||
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
|
||||
local fill_200 = ('a'):rep(200)
|
||||
eq(200, fill_200:len())
|
||||
local long_lit = fill_200 .. 'a'
|
||||
eq(false, match(long_lit, 'b'))
|
||||
@@ -212,6 +225,21 @@ describe('glob', function()
|
||||
eq(true, match(long_pat, fill_200 .. 'a/b/c/d.c'))
|
||||
end)
|
||||
|
||||
-- New test for unicode patterns from assets
|
||||
it('should match unicode patterns', function()
|
||||
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.ts'))
|
||||
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.tsx'))
|
||||
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.js'))
|
||||
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsx'))
|
||||
eq(false, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsxxxxxxxx'))
|
||||
eq(true, match('*é*', 'café noir'))
|
||||
eq(true, match('caf*noir', 'café noir'))
|
||||
eq(true, match('caf*noir', 'cafeenoir'))
|
||||
eq(true, match('F[ë£a]', 'Fë'))
|
||||
eq(true, match('F[ë£a]', 'F£'))
|
||||
eq(true, match('F[ë£a]', 'Fa'))
|
||||
end)
|
||||
|
||||
it('should match complex patterns', function()
|
||||
eq(false, match('**/*.{c,h}', ''))
|
||||
eq(false, match('**/*.{c,h}', 'c'))
|
||||
|
||||
Reference in New Issue
Block a user