mirror of
https://github.com/neovim/neovim.git
synced 2026-01-04 02:17:09 +10:00
feat(glob): new Glob implementation based on Peglob #33605
|vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that provides ~50% speedup for complex patterns. The implementation restores support for nested braces and follows LSP 3.17 specification with additional constraints for improved correctness and resistance to backtracking edge cases.
This commit is contained in:
@@ -18,6 +18,7 @@ describe('glob', function()
|
||||
eq(true, match('', ''))
|
||||
eq(false, match('', 'a'))
|
||||
eq(true, match('a', 'a'))
|
||||
eq(true, match('.', '.'))
|
||||
eq(true, match('/', '/'))
|
||||
eq(true, match('abc', 'abc'))
|
||||
eq(false, match('abc', 'abcdef'))
|
||||
@@ -35,7 +36,8 @@ describe('glob', function()
|
||||
end)
|
||||
|
||||
it('should match * wildcards', function()
|
||||
eq(false, match('*', ''))
|
||||
eq(true, match('*', ''))
|
||||
eq(true, match('*', ' '))
|
||||
eq(true, match('*', 'a'))
|
||||
eq(false, match('*', '/'))
|
||||
eq(false, match('*', '/a'))
|
||||
@@ -43,6 +45,7 @@ describe('glob', function()
|
||||
eq(true, match('*', 'aaa'))
|
||||
eq(true, match('*a', 'aa'))
|
||||
eq(true, match('*a', 'abca'))
|
||||
eq(true, match('*.ts', '.ts'))
|
||||
eq(true, match('*.txt', 'file.txt'))
|
||||
eq(false, match('*.txt', 'file.txtxt'))
|
||||
eq(false, match('*.txt', 'dir/file.txt'))
|
||||
@@ -62,18 +65,13 @@ describe('glob', function()
|
||||
eq(false, match('dir/*/file.txt', 'dir/file.txt'))
|
||||
eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
|
||||
eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
|
||||
|
||||
-- The spec does not describe this, but VSCode only interprets ** when it's by
|
||||
-- itself in a path segment, and otherwise interprets ** as consecutive * directives.
|
||||
-- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
|
||||
eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
|
||||
eq(true, match('**c', 'abc'))
|
||||
eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
|
||||
eq(false, match('**c', 'bc'))
|
||||
eq(true, match('a**', 'abcd'))
|
||||
eq(true, match('**d', 'abcd'))
|
||||
eq(false, match('a**', 'abc/d'))
|
||||
eq(false, match('**d', 'abc/d'))
|
||||
eq(true, match('a*b*c*d*e*', 'axbxcxdxe'))
|
||||
eq(true, match('a*b*c*d*e*', 'axbxcxdxexxx'))
|
||||
eq(true, match('a*b?c*x', 'abxbbxdbxebxczzx'))
|
||||
eq(false, match('a*b?c*x', 'abxbbxdbxebxczzy'))
|
||||
eq(true, match('a*b*[cy]*d*e*', 'axbxcxdxexxx'))
|
||||
eq(true, match('a*b*[cy]*d*e*', 'axbxyxdxexxx'))
|
||||
eq(true, match('a*b*[cy]*d*e*', 'axbxxxyxdxexxx'))
|
||||
end)
|
||||
|
||||
it('should match ? wildcards', function()
|
||||
@@ -84,6 +82,11 @@ describe('glob', function()
|
||||
eq(true, match('??', 'ab'))
|
||||
eq(true, match('a?c', 'abc'))
|
||||
eq(false, match('a?c', 'a/c'))
|
||||
eq(false, match('a/', 'a/.b'))
|
||||
eq(true, match('?/?', 'a/b'))
|
||||
eq(true, match('/??', '/ab'))
|
||||
eq(true, match('/?b', '/ab'))
|
||||
eq(false, match('foo?bar', 'foo/bar'))
|
||||
end)
|
||||
|
||||
it('should match ** wildcards', function()
|
||||
@@ -99,7 +102,7 @@ describe('glob', function()
|
||||
eq(true, match('/**', '/'))
|
||||
eq(true, match('/**', '/a/b/c'))
|
||||
eq(true, match('**/', '')) -- **/ absorbs trailing /
|
||||
eq(true, match('**/', '/a/b/c'))
|
||||
eq(false, match('**/', '/a/b/c'))
|
||||
eq(true, match('**/**', ''))
|
||||
eq(true, match('**/**', 'a'))
|
||||
eq(false, match('a/**', ''))
|
||||
@@ -134,20 +137,9 @@ describe('glob', function()
|
||||
end)
|
||||
|
||||
it('should match {} groups', function()
|
||||
eq(true, match('{}', ''))
|
||||
eq(false, match('{}', 'a'))
|
||||
eq(true, match('a{}', 'a'))
|
||||
eq(true, match('{}a', 'a'))
|
||||
eq(true, match('{,}', ''))
|
||||
eq(true, match('{a,}', ''))
|
||||
eq(true, match('{a,}', 'a'))
|
||||
eq(true, match('{a}', 'a'))
|
||||
eq(false, match('{a}', 'aa'))
|
||||
eq(false, match('{a}', 'ab'))
|
||||
eq(true, match('{a?c}', 'abc'))
|
||||
eq(false, match('{ab}', 'a'))
|
||||
eq(false, match('{ab}', 'b'))
|
||||
eq(true, match('{ab}', 'ab'))
|
||||
eq(true, match('{a,b}', 'a'))
|
||||
eq(true, match('{a,b}', 'b'))
|
||||
eq(false, match('{a,b}', 'ab'))
|
||||
@@ -155,7 +147,22 @@ describe('glob', function()
|
||||
eq(false, match('{ab,cd}', 'a'))
|
||||
eq(true, match('{ab,cd}', 'cd'))
|
||||
eq(true, match('{a,b,c}', 'c'))
|
||||
eq(false, match('{a,{b,c}}', 'c')) -- {} cannot nest
|
||||
eq(true, match('{a,{b,c}}', 'c'))
|
||||
eq(true, match('a{,/}*.txt', 'a.txt'))
|
||||
eq(true, match('a{,/}*.txt', 'ab.txt'))
|
||||
eq(true, match('a{,/}*.txt', 'a/b.txt'))
|
||||
eq(true, match('a{,/}*.txt', 'a/ab.txt'))
|
||||
eq(true, match('a/{a{a,b},b}', 'a/aa'))
|
||||
eq(true, match('a/{a{a,b},b}', 'a/ab'))
|
||||
eq(false, match('a/{a{a,b},b}', 'a/ac'))
|
||||
eq(true, match('a/{a{a,b},b}', 'a/b'))
|
||||
eq(false, match('a/{a{a,b},b}', 'a/c'))
|
||||
eq(true, match('foo{bar,b*z}', 'foobar'))
|
||||
eq(true, match('foo{bar,b*z}', 'foobuzz'))
|
||||
eq(true, match('foo{bar,b*z}', 'foobarz'))
|
||||
eq(true, match('{a,b}/c/{d,e}/**/*est.ts', 'a/c/d/one/two/three.test.ts'))
|
||||
eq(true, match('{a,{d,e}b}/c', 'a/c'))
|
||||
eq(true, match('{**/a,**/b}', 'b'))
|
||||
end)
|
||||
|
||||
it('should match [] groups', function()
|
||||
@@ -181,6 +188,13 @@ describe('glob', function()
|
||||
eq(true, match('[a-zA-Z0-9]', 'Z'))
|
||||
eq(true, match('[a-zA-Z0-9]', '9'))
|
||||
eq(false, match('[a-zA-Z0-9]', '&'))
|
||||
eq(true, match('[?]', '?'))
|
||||
eq(false, match('[?]', 'a'))
|
||||
eq(true, match('[*]', '*'))
|
||||
eq(false, match('[*]', 'a'))
|
||||
eq(true, match('[\\!]', '!'))
|
||||
eq(true, match('a\\*b', 'a*b'))
|
||||
eq(false, match('a\\*b', 'axb'))
|
||||
end)
|
||||
|
||||
it('should match [!...] groups', function()
|
||||
@@ -202,8 +216,7 @@ describe('glob', function()
|
||||
it('should handle long patterns', function()
|
||||
-- lpeg has a recursion limit of 200 by default, make sure the grammar does trigger it on
|
||||
-- strings longer than that
|
||||
local fill_200 =
|
||||
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
|
||||
local fill_200 = ('a'):rep(200)
|
||||
eq(200, fill_200:len())
|
||||
local long_lit = fill_200 .. 'a'
|
||||
eq(false, match(long_lit, 'b'))
|
||||
@@ -212,6 +225,21 @@ describe('glob', function()
|
||||
eq(true, match(long_pat, fill_200 .. 'a/b/c/d.c'))
|
||||
end)
|
||||
|
||||
-- New test for unicode patterns from assets
|
||||
it('should match unicode patterns', function()
|
||||
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.ts'))
|
||||
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.tsx'))
|
||||
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.js'))
|
||||
eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsx'))
|
||||
eq(false, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsxxxxxxxx'))
|
||||
eq(true, match('*é*', 'café noir'))
|
||||
eq(true, match('caf*noir', 'café noir'))
|
||||
eq(true, match('caf*noir', 'cafeenoir'))
|
||||
eq(true, match('F[ë£a]', 'Fë'))
|
||||
eq(true, match('F[ë£a]', 'F£'))
|
||||
eq(true, match('F[ë£a]', 'Fa'))
|
||||
end)
|
||||
|
||||
it('should match complex patterns', function()
|
||||
eq(false, match('**/*.{c,h}', ''))
|
||||
eq(false, match('**/*.{c,h}', 'c'))
|
||||
|
||||
Reference in New Issue
Block a user