feat(glob): new Glob implementation based on Peglob #33605

|vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that provides ~50% speedup for complex patterns. The implementation restores support for nested braces and follows LSP 3.17 specification with additional constraints for improved correctness and resistance to backtracking edge cases.
2026-01-04 02:17:09 +10:00 · 2025-05-22 15:24:49 +08:00
parent 172a90c245
commit 322a6d305d
4 changed files with 464 additions and 129 deletions
--- a/runtime/doc/lua.txt
+++ b/runtime/doc/lua.txt
@@ -3236,30 +3236,51 @@ vim.fs.root({source}, {marker})                                *vim.fs.root()*
 ==============================================================================
 Lua module: vim.glob                                                *vim.glob*

+Glob-to-LPeg Converter (Peglob) This module converts glob patterns to LPeg
+patterns according to the LSP 3.17 specification:
+https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
+
+Glob grammar overview:
+• `*` to match zero or more characters in a path segment
+• `?` to match on one character in a path segment
+• `**` to match any number of path segments, including none
+• `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript
+  files)
+• `[]` to declare a range of characters to match in a path segment (e.g.,
+  `example.[0-9]` to match on `example.0`, `example.1`, …)
+• `[!...]` to negate a range of characters to match in a path segment (e.g.,
+  `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
+
+Additional constraints:
+• A Glob pattern must match an entire path, with partial matches considered
+  failures.
+• The pattern only determines success or failure, without specifying which
+  parts correspond to which characters.
+• A path segment is the portion of a path between two adjacent path separators
+  (`/`), or between the start/end of the path and the nearest separator.
+• The `**` (globstar) pattern matches zero or more path segments, including
+  intervening separators (`/`). Within pattern strings, `**` must be delimited
+  by path separators (`/`) or pattern boundaries and cannot be adjacent to any
+  characters other than `/`. If `**` is not the final element, it must be
+  followed by `/`.
+• `{}` (braced conditions) contains valid Glob patterns as branches, separated
+  by commas. Commas are exclusively used for separating branches and cannot
+  appear within a branch for any other purpose. Nested `{}` structures are
+  allowed, but `{}` must contain at least two branches—zero or one branch is
+  not permitted.
+• In `[]` or `[!...]`, a character range consists of character intervals
+  (e.g., `a-z`) or individual characters (e.g., `w`). A range including `/`
+  won’t match that character.
+
+
 vim.glob.to_lpeg({pattern})                               *vim.glob.to_lpeg()*
    Parses a raw glob into an |lua-lpeg| pattern.

-    This uses glob semantics from LSP 3.17.0:
-    https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
-
-    Glob patterns can have the following syntax:
-    • `*` to match one or more characters in a path segment
-    • `?` to match on one character in a path segment
-    • `**` to match any number of path segments, including none
-    • `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and
-      JavaScript files)
-    • `[]` to declare a range of characters to match in a path segment (e.g.,
-      `example.[0-9]` to match on `example.0`, `example.1`, …)
-    • `[!...]` to negate a range of characters to match in a path segment
-      (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not
-      `example.0`)
-
    Parameters: ~
      • {pattern}  (`string`) The raw glob pattern

    Return: ~
-        (`vim.lpeg.Pattern`) pattern An |lua-lpeg| representation of the
-        pattern
+        (`vim.lpeg.Pattern`) An |lua-lpeg| representation of the pattern


 ==============================================================================
--- a/runtime/doc/news.txt
+++ b/runtime/doc/news.txt
@@ -175,7 +175,11 @@ OPTIONS

 PERFORMANCE

-• todo
+• |vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that
+  provides ~50% speedup for complex patterns. The implementation restores
+  support for nested braces and follows LSP 3.17 specification with
+  additional constraints for improved correctness and resistance to
+  backtracking edge cases.

 PLUGINS

--- a/runtime/lua/vim/glob.lua
+++ b/runtime/lua/vim/glob.lua
@@ -1,93 +1,375 @@
-local lpeg = vim.lpeg
-local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
-local C, Cc, Ct, Cf, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cmt
-
-local M = {}
-
-local pathsep = P('/')
-
--- Parses a raw glob into an |lua-lpeg| pattern.
+--- @brief Glob-to-LPeg Converter (Peglob)
+--- This module converts glob patterns to LPeg patterns according to the LSP 3.17 specification:
+--- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
 ---
--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
---
--- Glob patterns can have the following syntax:
--- - `*` to match one or more characters in a path segment
+--- Glob grammar overview:
+--- - `*` to match zero or more characters in a path segment
 --- - `?` to match on one character in a path segment
 --- - `**` to match any number of path segments, including none
 --- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files)
--- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
--- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
+--- - `[]` to declare a range of characters to match in a path segment
+---   (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
+--- - `[!...]` to negate a range of characters to match in a path segment
+---   (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
+---
+--- Additional constraints:
+--- - A Glob pattern must match an entire path, with partial matches
+---   considered failures.
+--- - The pattern only determines success or failure, without specifying
+---   which parts correspond to which characters.
+--- - A *path segment* is the portion of a path between two adjacent path
+---   separators (`/`), or between the start/end of the path and the nearest
+---   separator.
+--- - The `**` (*globstar*) pattern matches zero or more path segments,
+---   including intervening separators (`/`). Within pattern strings, `**`
+---   must be delimited by path separators (`/`) or pattern boundaries and
+---   cannot be adjacent to any characters other than `/`. If `**` is not
+---   the final element, it must be followed by `/`.
+--- - `{}` (*braced conditions*) contains valid Glob patterns as branches,
+---   separated by commas. Commas are exclusively used for separating
+---   branches and cannot appear within a branch for any other purpose.
+---   Nested `{}` structures are allowed, but `{}` must contain at least two
+---   branches—zero or one branch is not permitted.
+--- - In `[]` or `[!...]`, a *character range* consists of character
+---   intervals (e.g., `a-z`) or individual characters (e.g., `w`). A range
+---   including `/` won’t match that character.
+
+--- @diagnostic disable: missing-fields
+
+local m = vim.lpeg
+local mt = getmetatable(m.P(0))
+local re = vim.re
+local bit = require('bit')
+
+local M = {}
+
+-- Basic patterns for matching glob components
+local letter = m.P(1) - m.S(',*?[]{}/\\') -- Any character except special glob characters
+local slash = m.P '/' * m.Cc(m.P '/') -- Path separator with capture
+local notslash = m.P(1) - m.P '/' -- Any character except path separator
+local notcomma = m.P(1) - m.S(',\\') -- Any character except comma and backslash
+
+--- Handle EOF, considering whether we're in a segment or not
+--- @type vim.lpeg.Pattern
+local eof = -1
+  * m.Cb('inseg')
+  / function(flag)
+    if flag then
+      return #m.P '/'
+    else
+      return m.P(-1)
+    end
+  end
+
+---@alias pat_table { F: string?, [1]: string, [2]: vim.lpeg.Pattern }
+---@alias seg_part { [string]: any, [integer]: pat_table }
+
+--- @param p pat_table Initial segment pattern data
+--- @return seg_part Segment structure with start pattern
+local function start_seg(p)
+  return { s = p[2], e = true, n = 0 }
+end
+
+--- @param t seg_part Segment structure
+--- @param p pat_table Pattern to look for
+--- @return table Updated segment structure
+local function lookfor(t, p)
+  t.n = t.n + 1
+  t[t.n] = p
+  return t
+end
+
+--- @param t seg_part Segment structure
+--- @return table Segment structure with end pattern
+local function to_seg_end(t)
+  t.e = notslash ^ 0
+  return t
+end
+
+--- Constructs a segment matching pattern from collected components
+---
+--- @param t seg_part Segment structure with patterns
+--- @return vim.lpeg.Pattern Complete segment match pattern
+local function end_seg(t)
+  --- @type table<any,any>
+  local seg_grammar = { 's' }
+  if t.n > 0 then
+    seg_grammar.s = t.s
+    for i = 1, t.n do
+      local rname = t[i][1]
+      if not seg_grammar[rname] then
+        -- Optimize search when deterministic first character is available
+        if t[i].F then
+          seg_grammar[rname] = t[i][2] + notslash * (notslash - m.P(t[i].F)) ^ 0 * m.V(rname)
+        else
+          seg_grammar[rname] = t[i][2] + notslash * m.V(rname)
+        end
+      end
+      seg_grammar.s = seg_grammar.s * m.V(rname)
+    end
+    if t.e then
+      seg_grammar.s = seg_grammar.s * t.e
+    end
+    return m.P(seg_grammar)
+  else
+    seg_grammar.s = t.s
+    if t.e then
+      seg_grammar.s = seg_grammar.s * t.e
+    end
+    return seg_grammar.s
+  end
+end
+
+--- @param p vim.lpeg.Pattern Pattern directly after `**/`
+--- @return vim.lpeg.Pattern LPeg pattern for `**/p`
+local function dseg(p)
+  return m.P { p + notslash ^ 0 * m.P '/' * m.V(1) }
+end
+
+--- @type (vim.lpeg.Pattern|table)
+local g = nil
+
+--- Multiplies conditions for braced expansion (Cartesian product)
+---
+--- @param a string|string[] First part
+--- @param b string|string[] Second part
+--- @return string|string[] Cartesian product of values
+local function mul_cond(a, b)
+  if type(a) == 'string' then
+    if type(b) == 'string' then
+      return a .. b
+    elseif type(b) == 'table' then
+      for i = 1, #b do
+        b[i] = a .. b[i]
+      end
+      return b
+    else
+      return a
+    end
+  elseif type(a) == 'table' then
+    if type(b) == 'string' then
+      for i = 1, #a do
+        a[i] = a[i] .. b
+      end
+      return a
+    elseif type(b) == 'table' then
+      --- @type string[]
+      local res = {}
+      local idx = 0
+      for i = 1, #a do
+        for j = 1, #b do
+          idx = idx + 1
+          res[idx] = a[i] .. b[j]
+        end
+      end
+      return res
+    else
+      return a
+    end
+  else
+    return b
+  end
+end
+
+--- Combines alternatives in braced patterns
+---
+--- @param a string|table First part
+--- @param b string|table Second part
+--- @return table #Combined alternatives
+local function add_cond(a, b)
+  if type(a) == 'string' then
+    if type(b) == 'string' then
+      return { a, b }
+    elseif type(b) == 'table' then
+      table.insert(b, 1, a)
+      return b
+    end
+  elseif type(a) == 'table' then
+    if type(b) == 'string' then
+      table.insert(a, b)
+      return a
+    elseif type(b) == 'table' then
+      for i = 1, #b do
+        table.insert(a, b[i])
+      end
+      return a
+    end
+    --- @diagnostic disable-next-line: missing-return
+  end
+end
+
+--- Expands patterns handling segment boundaries
+--- `#` prefix is added for sub-grammar to detect in-segment flag
+---
+---@param a (any[]|vim.lpeg.Pattern[]) Array of patterns
+---@param b string Tail string
+---@param inseg boolean Whether inside a path segment
+---@return vim.lpeg.Pattern #Expanded pattern
+local function expand(a, b, inseg)
+  for i = 1, #a do
+    if inseg then
+      a[i] = '#' .. a[i]
+    end
+    a[i] = g:match(a[i] .. b)
+  end
+  local res = a[1]
+  for i = 2, #a do
+    res = res + a[i]
+  end
+  return res
+end
+
+--- Converts a UTF-8 character to its Unicode codepoint
+---
+--- @param utf8_str string UTF-8 character
+--- @return number #Codepoint value
+local function to_codepoint(utf8_str)
+  local codepoint = 0
+  local byte_count = 0
+
+  for i = 1, #utf8_str do
+    local byte = utf8_str:byte(i)
+
+    if byte_count ~= 0 then
+      codepoint = bit.bor(bit.lshift(codepoint, 6), bit.band(byte, 0x3F))
+      byte_count = byte_count - 1
+    else
+      if byte < 0x80 then
+        codepoint = byte
+      elseif byte < 0xE0 then
+        byte_count = 1
+        codepoint = bit.band(byte, 0x1F)
+      elseif byte < 0xF0 then
+        byte_count = 2
+        codepoint = bit.band(byte, 0x0F)
+      else
+        byte_count = 3
+        codepoint = bit.band(byte, 0x07)
+      end
+    end
+
+    if byte_count == 0 then
+      break
+    end
+  end
+
+  return codepoint
+end
+
+--- Pattern for matching UTF-8 characters
+local cont = m.R('\128\191')
+local any_utf8 = m.R('\0\127')
+  + m.R('\194\223') * cont
+  + m.R('\224\239') * cont * cont
+  + m.R('\240\244') * cont * cont * cont
+
+--- Creates a character class pattern for glob ranges
+--- @param inv string Inversion flag ('!' or '')
+--- @param ranges (string|string[])[] Character ranges
+--- @return vim.lpeg.Pattern #Character class pattern
+local function class(inv, ranges)
+  local patt = m.P(false)
+  if #ranges == 0 then
+    if inv == '!' then
+      return m.P '[!]'
+    else
+      return m.P '[]'
+    end
+  end
+  for _, v in ipairs(ranges) do
+    patt = patt + (type(v) == 'table' and m.utfR(to_codepoint(v[1]), to_codepoint(v[2])) or m.P(v))
+  end
+  if inv == '!' then
+    patt = m.P(1) - patt --[[@as vim.lpeg.Pattern]]
+  end
+  return patt - m.P '/'
+end
+
+-- Parse constraints for optimizing braced conditions
+local noopt_condlist = re.compile [[
+  s <- '/' / '**' / . [^/*]* s
+]]
+
+local opt_tail = re.compile [[
+  s <- (!'**' [^{/])* &'/'
+]]
+
+-- stylua: ignore start
+--- @nodoc
+--- @diagnostic disable
+--- Main grammar for glob pattern matching
+g = {
+  'Glob',
+  Glob     = (m.P'#' * m.Cg(m.Cc(true), 'inseg') + m.Cg(m.Cc(false), 'inseg')) *
+             m.Cf(m.V'Element'^-1 * (slash * m.V'Element')^0 * (slash^-1 * eof), mt.__mul),
+  -- Elements handle segments, globstar patterns
+  Element  = m.V'DSeg' + m.V'DSEnd' + m.Cf(m.V'Segment' * (slash * m.V'Segment')^0 * (slash * eof + eof^-1), mt.__mul),
+  -- Globstar patterns
+  DSeg     = m.P'**/' * ((m.V'Element' + eof) / dseg),
+  DSEnd    = m.P'**' * -1 * m.Cc(m.P(1)^0),
+  -- Segment handling with word and star patterns
+  Segment  = (m.V'Word' / start_seg + m.Cc({ '', true }) / start_seg * (m.V'Star' * m.V'Word' % lookfor)) *
+              (m.V'Star' * m.V'Word' % lookfor)^0 * (m.V'Star' * m.V'CheckBnd' % to_seg_end)^-1 / end_seg
+             + m.V'Star' * m.V'CheckBnd' * m.Cc(notslash^0),
+  CheckBnd = #m.P'/' + -1,  -- Boundary constraint
+
+  -- Word patterns for fixed-length matching
+  Word     = -m.P'*' * m.Ct( m.V('FIRST')^-1 * m.C(m.V'WordAux') ),
+  WordAux  = m.V'Branch' + m.Cf(m.V'Simple'^1 * m.V'Branch'^-1, mt.__mul),
+  Simple   = m.Cg( m.V'Token' * (m.V'Token' % mt.__mul)^0 * (m.V'Boundary' % mt.__mul)^-1),
+  Boundary = #m.P'/' * m.Cc(#m.P'/') + eof,
+  Token    = m.V'Ques' + m.V'Class' + m.V'Escape' + m.V'Literal',
+  Star     = m.P'*',
+  Ques     = m.P'?' * m.Cc(notslash),
+  Escape   = m.P'\\' * m.C(1) / m.P,
+  Literal  = m.C(letter^1) / m.P,
+
+  -- Branch handling for braced conditions
+  Branch   = m.Cmt(m.C(m.V'CondList'), function(s, i, p1, p2)
+                                         -- Optimize brace expansion when possible
+                                         -- p1: string form of condition list, p2: transformed lua table
+                                         if noopt_condlist:match(p1) then
+                                           -- Cannot optimize, match till the end
+                                           return #s + 1, p2, s:sub(i)
+                                         end
+                                         -- Find point to cut for optimization
+                                         local cut = opt_tail:match(s, i)
+                                         if cut then
+                                           -- Can optimize: match till cut point
+                                           -- true flag tells expand to transform EOF matches to &'/' predicates
+                                           return cut, p2, s:sub(i, cut - 1), true
+                                         else
+                                           -- Cannot optimize
+                                           return #s + 1, p2, s:sub(i)
+                                         end
+                                       end) / expand,
+  -- Brace expansion handling
+  CondList = m.Cf(m.P'{' * m.V'Cond' * (m.P',' * m.V'Cond')^1 * m.P'}', add_cond),
+  Cond     = m.Cf((m.C((notcomma + m.P'\\' * 1 - m.S'{}')^1) + m.V'CondList')^1, mul_cond) + m.C(true),
+
+  -- Character class handling
+  Class    = m.P'[' * m.C(m.P'!'^-1) * m.Ct(
+              (m.Ct(m.C(any_utf8) * m.P'-' * m.C(any_utf8 - m.P']')) + m.C(any_utf8 - m.P']'))^0
+            ) * m.P']' / class,
+
+  -- Deterministic first character extraction for optimization
+  FIRST    = m.Cg(m.P(function(s, i)
+                        if letter:match(s, i) then return true, s:sub(i, i)
+                        else return false end
+                      end), 'F')
+}
+-- stylua: ignore end
+--- @diagnostic enable
+
+--- @nodoc
+g = m.P(g)
+
+--- Parses a raw glob into an |lua-lpeg| pattern.
 ---
 ---@param pattern string The raw glob pattern
---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
+---@return vim.lpeg.Pattern #An |lua-lpeg| representation of the pattern
 function M.to_lpeg(pattern)
-  local function class(inv, ranges)
-    local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
-    if inv == '!' then
-      patt = P(1) - patt
-    end
-    return patt
-  end
-
-  local function condlist(conds, after)
-    return vim.iter(conds):fold(P(false), function(acc, cond)
-      return acc + cond * after
-    end)
-  end
-
-  local function mul(acc, m)
-    return acc * m
-  end
-
-  local function star(stars, after)
-    return (-after * (P(1) - pathsep)) ^ #stars * after
-  end
-
-  local function dstar(after)
-    return (-after * P(1)) ^ 0 * after
-  end
-
-  -- luacheck: push ignore s
-  local function cut(_s, idx, match)
-    return idx, match
-  end
-  -- luacheck: pop
-
-  --- @diagnostic disable-next-line: missing-fields
-  local p = P({
-    'Pattern',
-    Pattern = V('Elem') ^ -1 * V('End'),
-    Elem = Cmt(
-      Cf(
-        (V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
-          * (V('Elem') + V('End')),
-        mul
-      ),
-      cut
-    ),
-    DStar = (B(pathsep) + -B(P(1)))
-      * P('**')
-      * (pathsep * (V('Elem') + V('End')) + V('End'))
-      / dstar,
-    Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
-    Ques = P('?') * Cc(P(1) - pathsep),
-    Class = P('[')
-      * C(P('!') ^ -1)
-      * Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
-      / class,
-    CondList = P('{') * Ct(V('Cond') * (P(',') * V('Cond')) ^ 0) * P('}') * V('Pattern') / condlist,
-    -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
-    -- wildcard semantics it usually has.
-    -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
-    -- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
-    -- condition means "everything after the {}" where several other options separated by ',' may
-    -- exist in between that should not be matched by '*'.
-    Cond = Cmt(Cf((V('Ques') + V('Class') + V('Literal') - S(',}')) ^ 1, mul), cut) + Cc(P(0)),
-    Literal = P(1) / P,
-    End = P(-1) * Cc(P(-1)),
-  })
-
-  local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
+  local lpeg_pattern = g:match(pattern) --[[@as vim.lpeg.Pattern?]]
  assert(lpeg_pattern, 'Invalid glob')
  return lpeg_pattern
 end
--- a/test/functional/lua/glob_spec.lua
+++ b/test/functional/lua/glob_spec.lua
@@ -18,6 +18,7 @@ describe('glob', function()
      eq(true, match('', ''))
      eq(false, match('', 'a'))
      eq(true, match('a', 'a'))
+      eq(true, match('.', '.'))
      eq(true, match('/', '/'))
      eq(true, match('abc', 'abc'))
      eq(false, match('abc', 'abcdef'))
@@ -35,7 +36,8 @@ describe('glob', function()
    end)

    it('should match * wildcards', function()
-      eq(false, match('*', ''))
+      eq(true, match('*', ''))
+      eq(true, match('*', '   '))
      eq(true, match('*', 'a'))
      eq(false, match('*', '/'))
      eq(false, match('*', '/a'))
@@ -43,6 +45,7 @@ describe('glob', function()
      eq(true, match('*', 'aaa'))
      eq(true, match('*a', 'aa'))
      eq(true, match('*a', 'abca'))
+      eq(true, match('*.ts', '.ts'))
      eq(true, match('*.txt', 'file.txt'))
      eq(false, match('*.txt', 'file.txtxt'))
      eq(false, match('*.txt', 'dir/file.txt'))
@@ -62,18 +65,13 @@ describe('glob', function()
      eq(false, match('dir/*/file.txt', 'dir/file.txt'))
      eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
      eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
-
-      -- The spec does not describe this, but VSCode only interprets ** when it's by
-      -- itself in a path segment, and otherwise interprets ** as consecutive * directives.
-      -- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
-      eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
-      eq(true, match('**c', 'abc'))
-      eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
-      eq(false, match('**c', 'bc'))
-      eq(true, match('a**', 'abcd'))
-      eq(true, match('**d', 'abcd'))
-      eq(false, match('a**', 'abc/d'))
-      eq(false, match('**d', 'abc/d'))
+      eq(true, match('a*b*c*d*e*', 'axbxcxdxe'))
+      eq(true, match('a*b*c*d*e*', 'axbxcxdxexxx'))
+      eq(true, match('a*b?c*x', 'abxbbxdbxebxczzx'))
+      eq(false, match('a*b?c*x', 'abxbbxdbxebxczzy'))
+      eq(true, match('a*b*[cy]*d*e*', 'axbxcxdxexxx'))
+      eq(true, match('a*b*[cy]*d*e*', 'axbxyxdxexxx'))
+      eq(true, match('a*b*[cy]*d*e*', 'axbxxxyxdxexxx'))
    end)

    it('should match ? wildcards', function()
@@ -84,6 +82,11 @@ describe('glob', function()
      eq(true, match('??', 'ab'))
      eq(true, match('a?c', 'abc'))
      eq(false, match('a?c', 'a/c'))
+      eq(false, match('a/', 'a/.b'))
+      eq(true, match('?/?', 'a/b'))
+      eq(true, match('/??', '/ab'))
+      eq(true, match('/?b', '/ab'))
+      eq(false, match('foo?bar', 'foo/bar'))
    end)

    it('should match ** wildcards', function()
@@ -99,7 +102,7 @@ describe('glob', function()
      eq(true, match('/**', '/'))
      eq(true, match('/**', '/a/b/c'))
      eq(true, match('**/', '')) -- **/ absorbs trailing /
-      eq(true, match('**/', '/a/b/c'))
+      eq(false, match('**/', '/a/b/c'))
      eq(true, match('**/**', ''))
      eq(true, match('**/**', 'a'))
      eq(false, match('a/**', ''))
@@ -134,20 +137,9 @@ describe('glob', function()
    end)

    it('should match {} groups', function()
-      eq(true, match('{}', ''))
-      eq(false, match('{}', 'a'))
-      eq(true, match('a{}', 'a'))
-      eq(true, match('{}a', 'a'))
      eq(true, match('{,}', ''))
      eq(true, match('{a,}', ''))
      eq(true, match('{a,}', 'a'))
-      eq(true, match('{a}', 'a'))
-      eq(false, match('{a}', 'aa'))
-      eq(false, match('{a}', 'ab'))
-      eq(true, match('{a?c}', 'abc'))
-      eq(false, match('{ab}', 'a'))
-      eq(false, match('{ab}', 'b'))
-      eq(true, match('{ab}', 'ab'))
      eq(true, match('{a,b}', 'a'))
      eq(true, match('{a,b}', 'b'))
      eq(false, match('{a,b}', 'ab'))
@@ -155,7 +147,22 @@ describe('glob', function()
      eq(false, match('{ab,cd}', 'a'))
      eq(true, match('{ab,cd}', 'cd'))
      eq(true, match('{a,b,c}', 'c'))
-      eq(false, match('{a,{b,c}}', 'c')) -- {} cannot nest
+      eq(true, match('{a,{b,c}}', 'c'))
+      eq(true, match('a{,/}*.txt', 'a.txt'))
+      eq(true, match('a{,/}*.txt', 'ab.txt'))
+      eq(true, match('a{,/}*.txt', 'a/b.txt'))
+      eq(true, match('a{,/}*.txt', 'a/ab.txt'))
+      eq(true, match('a/{a{a,b},b}', 'a/aa'))
+      eq(true, match('a/{a{a,b},b}', 'a/ab'))
+      eq(false, match('a/{a{a,b},b}', 'a/ac'))
+      eq(true, match('a/{a{a,b},b}', 'a/b'))
+      eq(false, match('a/{a{a,b},b}', 'a/c'))
+      eq(true, match('foo{bar,b*z}', 'foobar'))
+      eq(true, match('foo{bar,b*z}', 'foobuzz'))
+      eq(true, match('foo{bar,b*z}', 'foobarz'))
+      eq(true, match('{a,b}/c/{d,e}/**/*est.ts', 'a/c/d/one/two/three.test.ts'))
+      eq(true, match('{a,{d,e}b}/c', 'a/c'))
+      eq(true, match('{**/a,**/b}', 'b'))
    end)

    it('should match [] groups', function()
@@ -181,6 +188,13 @@ describe('glob', function()
      eq(true, match('[a-zA-Z0-9]', 'Z'))
      eq(true, match('[a-zA-Z0-9]', '9'))
      eq(false, match('[a-zA-Z0-9]', '&'))
+      eq(true, match('[?]', '?'))
+      eq(false, match('[?]', 'a'))
+      eq(true, match('[*]', '*'))
+      eq(false, match('[*]', 'a'))
+      eq(true, match('[\\!]', '!'))
+      eq(true, match('a\\*b', 'a*b'))
+      eq(false, match('a\\*b', 'axb'))
    end)

    it('should match [!...] groups', function()
@@ -202,8 +216,7 @@ describe('glob', function()
    it('should handle long patterns', function()
      -- lpeg has a recursion limit of 200 by default, make sure the grammar does trigger it on
      -- strings longer than that
-      local fill_200 =
-        'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
+      local fill_200 = ('a'):rep(200)
      eq(200, fill_200:len())
      local long_lit = fill_200 .. 'a'
      eq(false, match(long_lit, 'b'))
@@ -212,6 +225,21 @@ describe('glob', function()
      eq(true, match(long_pat, fill_200 .. 'a/b/c/d.c'))
    end)

+    -- New test for unicode patterns from assets
+    it('should match unicode patterns', function()
+      eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.ts'))
+      eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.tsx'))
+      eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.js'))
+      eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsx'))
+      eq(false, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsxxxxxxxx'))
+      eq(true, match('*é*', 'café noir'))
+      eq(true, match('caf*noir', 'café noir'))
+      eq(true, match('caf*noir', 'cafeenoir'))
+      eq(true, match('F[ë£a]', 'Fë'))
+      eq(true, match('F[ë£a]', 'F£'))
+      eq(true, match('F[ë£a]', 'Fa'))
+    end)
+
    it('should match complex patterns', function()
      eq(false, match('**/*.{c,h}', ''))
      eq(false, match('**/*.{c,h}', 'c'))