feat(treesitter)!: incremental injection parsing

Problem:

Treesitter highlighting is slow for large files with lots of injections.

Solution:

Only parse injections we are going to render during a redraw cycle.

---

- `LanguageTree:parse()` will no longer parse injections by default and
  now requires an explicit range argument to be passed.

- `TSHighlighter` now parses injections incrementally during on_win
  callbacks for the line range being rendered.

- Plugins which require certain injections to be parsed must run
  `parser:parse({ start_row, end_row })` before using the tree.
This commit is contained in:
Lewis Russell
2023-08-10 14:21:56 +01:00
committed by Lewis Russell
parent 5a25dcc5a4
commit 2ca076e45f
11 changed files with 279 additions and 117 deletions

View File

@@ -147,11 +147,14 @@ local function normalise_erow(bufnr, erow)
return math.min(erow or max_erow, max_erow)
end
-- TODO(lewis6991): Setup a decor provider so injections folds can be parsed
-- as the window is redrawn
---@param bufnr integer
---@param info TS.FoldInfo
---@param srow integer?
---@param erow integer?
local function get_folds_levels(bufnr, info, srow, erow)
---@param parse_injections? boolean
local function get_folds_levels(bufnr, info, srow, erow, parse_injections)
srow = srow or 0
erow = normalise_erow(bufnr, erow)
@@ -162,7 +165,7 @@ local function get_folds_levels(bufnr, info, srow, erow)
local parser = ts.get_parser(bufnr)
parser:parse()
parser:parse(parse_injections and { srow, erow } or nil)
parser:for_each_tree(function(tree, ltree)
local query = ts.query.get(ltree:lang(), 'folds')

View File

@@ -1,6 +1,6 @@
---@meta
---@class TSNode
---@class TSNode: userdata
---@field id fun(self: TSNode): integer
---@field tree fun(self: TSNode): TSTree
---@field range fun(self: TSNode, include_bytes: false?): integer, integer, integer, integer
@@ -51,7 +51,7 @@ function TSNode:_rawquery(query, captures, start, end_, opts) end
---@field parse fun(self: TSParser, tree: TSTree?, source: integer|string, include_bytes: boolean?): TSTree, integer[]
---@field reset fun(self: TSParser)
---@field included_ranges fun(self: TSParser, include_bytes: boolean?): integer[]
---@field set_included_ranges fun(self: TSParser, ranges: Range6[])
---@field set_included_ranges fun(self: TSParser, ranges: (Range6|TSNode)[])
---@field set_timeout fun(self: TSParser, timeout: integer)
---@field timeout fun(self: TSParser): integer
---@field _set_logger fun(self: TSParser, lex: boolean, parse: boolean, cb: TSLoggerCallback)
@@ -61,7 +61,8 @@ function TSNode:_rawquery(query, captures, start, end_, opts) end
---@field root fun(self: TSTree): TSNode
---@field edit fun(self: TSTree, _: integer, _: integer, _: integer, _: integer, _: integer, _: integer, _: integer, _: integer, _:integer)
---@field copy fun(self: TSTree): TSTree
---@field included_ranges fun(self: TSTree, include_bytes: boolean?): integer[]
---@field included_ranges fun(self: TSTree, include_bytes: true): Range6[]
---@field included_ranges fun(self: TSTree, include_bytes: false): Range4[]
---@return integer
vim._ts_get_language_version = function() end

View File

@@ -2,6 +2,10 @@ local api = vim.api
local M = {}
---@class Range2
---@field [1] integer start row
---@field [2] integer end row
---@class Range4
---@field [1] integer start row
---@field [2] integer start column
@@ -16,7 +20,7 @@ local M = {}
---@field [5] integer end column
---@field [6] integer end bytes
---@alias Range Range4|Range6
---@alias Range Range2|Range4|Range6
---@private
---@param a_row integer
@@ -111,6 +115,9 @@ end
---@param r Range
---@return integer, integer, integer, integer
function M.unpack4(r)
if #r == 2 then
return r[1], 0, r[2], 0
end
local off_1 = #r == 6 and 1 or 0
return r[1], r[2], r[3 + off_1], r[4 + off_1]
end

View File

@@ -99,7 +99,7 @@ function TSTreeView:new(bufnr, lang)
-- For each child tree (injected language), find the root of the tree and locate the node within
-- the primary tree that contains that root. Add a mapping from the node in the primary tree to
-- the root in the child tree to the {injections} table.
local root = parser:parse()[1]:root()
local root = parser:parse(true)[1]:root()
local injections = {} ---@type table<integer,table>
parser:for_each_child(function(child, lang_)
child:for_each_tree(function(tree)

View File

@@ -1,5 +1,6 @@
local api = vim.api
local query = vim.treesitter.query
local Range = require('vim.treesitter._range')
---@alias TSHlIter fun(): integer, TSNode, TSMetadata
@@ -14,6 +15,7 @@ local query = vim.treesitter.query
---@field _highlight_states table<TSTree,TSHighlightState>
---@field _queries table<string,TSHighlighterQuery>
---@field tree LanguageTree
---@field redraw_count integer
local TSHighlighter = rawget(vim.treesitter, 'TSHighlighter') or {}
TSHighlighter.__index = TSHighlighter
@@ -139,6 +141,7 @@ function TSHighlighter.new(tree, opts)
return self
end
--- @nodoc
--- Removes all internal references to the highlighter
function TSHighlighter:destroy()
if TSHighlighter.active[self.bufnr] then
@@ -186,7 +189,7 @@ function TSHighlighter:on_detach()
end
---@package
---@param changes Range6[][]
---@param changes Range6[]
function TSHighlighter:on_changedtree(changes)
for _, ch in ipairs(changes) do
api.nvim__buf_redraw_range(self.bufnr, ch[1], ch[4] + 1)
@@ -245,7 +248,7 @@ local function on_line_impl(self, buf, line, is_spell_nav)
end
local range = vim.treesitter.get_range(node, buf, metadata[capture])
local start_row, start_col, _, end_row, end_col, _ = unpack(range)
local start_row, start_col, end_row, end_col = Range.unpack4(range)
local hl = highlighter_query.hl_cache[capture]
local capture_name = highlighter_query:query().captures[capture]
@@ -309,32 +312,23 @@ function TSHighlighter._on_spell_nav(_, _, buf, srow, _, erow, _)
end
end
---@private
---@param buf integer
function TSHighlighter._on_buf(_, buf)
local self = TSHighlighter.active[buf]
if self then
self.tree:parse()
end
end
---@private
---@param _win integer
---@param buf integer
---@param _topline integer
function TSHighlighter._on_win(_, _win, buf, _topline)
---@param topline integer
---@param botline integer
function TSHighlighter._on_win(_, _win, buf, topline, botline)
local self = TSHighlighter.active[buf]
if not self then
return false
end
self.tree:parse({ topline, botline })
self:reset_highlight_state()
self.redraw_count = self.redraw_count + 1
return true
end
api.nvim_set_decoration_provider(ns, {
on_buf = TSHighlighter._on_buf,
on_win = TSHighlighter._on_win,
on_line = TSHighlighter._on_line,
_on_spell_nav = TSHighlighter._on_spell_nav,

View File

@@ -18,7 +18,7 @@
--- Whenever you need to access the current syntax tree, parse the buffer:
---
--- <pre>lua
--- local tree = parser:parse()
--- local tree = parser:parse({ start_row, end_row })
--- </pre>
---
--- This returns a table of immutable |treesitter-tree| objects representing the current state of
@@ -74,6 +74,7 @@ local TSCallbackNames = {
---@field package _callbacks_rec table<TSCallbackName,function[]> Callback handlers (recursive)
---@field private _children table<string,LanguageTree> Injected languages
---@field private _injection_query Query Queries defining injected languages
---@field private _injections_processed boolean
---@field private _opts table Options
---@field private _parser TSParser Parser for language
---@field private _has_regions boolean
@@ -115,7 +116,9 @@ function LanguageTree.new(source, lang, opts)
end
local injections = opts.injections or {}
local self = setmetatable({
--- @type LanguageTree
local self = {
_source = source,
_lang = lang,
_children = {},
@@ -123,14 +126,19 @@ function LanguageTree.new(source, lang, opts)
_opts = opts,
_injection_query = injections[lang] and query.parse(lang, injections[lang])
or query.get(lang, 'injections'),
_has_regions = false,
_injections_processed = false,
_valid = false,
_parser = vim._create_ts_parser(lang),
_callbacks = {},
_callbacks_rec = {},
}, LanguageTree)
}
setmetatable(self, LanguageTree)
if vim.g.__ts_debug and type(vim.g.__ts_debug) == 'number' then
self:_set_logger()
self:_log('START')
end
for _, name in pairs(TSCallbackNames) do
@@ -141,6 +149,7 @@ function LanguageTree.new(source, lang, opts)
return self
end
--- @private
function LanguageTree:_set_logger()
local source = self:source()
source = type(source) == 'string' and 'text' or tostring(source)
@@ -171,7 +180,7 @@ end
---Measure execution time of a function
---@generic R1, R2, R3
---@param f fun(): R1, R2, R2
---@return integer, R1, R2, R3
---@return number, R1, R2, R3
local function tcall(f, ...)
local start = vim.uv.hrtime()
---@diagnostic disable-next-line
@@ -219,7 +228,7 @@ function LanguageTree:invalidate(reload)
-- buffer was reloaded, reparse all trees
if reload then
for _, t in ipairs(self._trees) do
for _, t in pairs(self._trees) do
self:_do_callback('changedtree', t:included_ranges(true), t)
end
self._trees = {}
@@ -250,14 +259,18 @@ function LanguageTree:is_valid(exclude_children)
local valid = self._valid
if type(valid) == 'table' then
for _, v in ipairs(valid) do
if not v then
for i = 1, #self:included_regions() do
if not valid[i] then
return false
end
end
end
if not exclude_children then
if not self._injections_processed then
return false
end
for _, child in pairs(self._children) do
if not child:is_valid(exclude_children) then
return false
@@ -265,9 +278,12 @@ function LanguageTree:is_valid(exclude_children)
end
end
assert(type(valid) == 'boolean')
if type(valid) == 'boolean' then
return valid
end
return valid
self._valid = true
return true
end
--- Returns a map of language to child tree.
@@ -280,47 +296,72 @@ function LanguageTree:source()
return self._source
end
--- Parses all defined regions using a treesitter parser
--- for the language this tree represents.
--- This will run the injection query for this language to
--- determine if any child languages should be created.
---
---@return TSTree[]
function LanguageTree:parse()
if self:is_valid() then
self:_log('valid')
return self._trees
--- @param region Range6[]
--- @param range? boolean|Range
--- @return boolean
local function intercepts_region(region, range)
if #region == 0 then
return true
end
local changes = {}
if range == nil then
return false
end
-- Collect some stats
local regions_parsed = 0
local total_parse_time = 0
if type(range) == 'boolean' then
return range
end
--- At least 1 region is invalid
if not self:is_valid(true) then
-- If there are no ranges, set to an empty list
-- so the included ranges in the parser are cleared.
for i, ranges in ipairs(self:included_regions()) do
if not self._valid or not self._valid[i] then
self._parser:set_included_ranges(ranges)
local parse_time, tree, tree_changes =
tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
-- Pass ranges if this is an initial parse
local cb_changes = self._trees[i] and tree_changes or tree:included_ranges(true)
self:_do_callback('changedtree', cb_changes, tree)
self._trees[i] = tree
vim.list_extend(changes, tree_changes)
total_parse_time = total_parse_time + parse_time
regions_parsed = regions_parsed + 1
end
for _, r in ipairs(region) do
if Range.intercepts(r, range) then
return true
end
end
return false
end
--- @private
--- @param range boolean|Range?
--- @return integer[] changes
--- @return integer no_regions_parsed
--- @return number total_parse_time
function LanguageTree:_parse_regions(range)
local changes = {}
local no_regions_parsed = 0
local total_parse_time = 0
if type(self._valid) ~= 'table' then
self._valid = {}
end
-- If there are no ranges, set to an empty list
-- so the included ranges in the parser are cleared.
for i, ranges in pairs(self:included_regions()) do
if not self._valid[i] and intercepts_region(ranges, range) then
self._parser:set_included_ranges(ranges)
local parse_time, tree, tree_changes =
tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
-- Pass ranges if this is an initial parse
local cb_changes = self._trees[i] and tree_changes or tree:included_ranges(true)
self:_do_callback('changedtree', cb_changes, tree)
self._trees[i] = tree
vim.list_extend(changes, tree_changes)
total_parse_time = total_parse_time + parse_time
no_regions_parsed = no_regions_parsed + 1
self._valid[i] = true
end
end
return changes, no_regions_parsed, total_parse_time
end
--- @private
--- @return number
function LanguageTree:_add_injections()
local seen_langs = {} ---@type table<string,boolean>
local query_time, injections_by_lang = tcall(self._get_injections, self)
@@ -348,19 +389,60 @@ function LanguageTree:parse()
end
end
return query_time
end
--- Recursively parse all regions in the language tree using |treesitter-parsers|
--- for the corresponding languages and run injection queries on the parsed trees
--- to determine whether child trees should be created and parsed.
---
--- Any region with empty range (`{}`, typically only the root tree) is always parsed;
--- otherwise (typically injections) only if it intersects {range} (or if {range} is `true`).
---
--- @param range boolean|Range|nil: Parse this range in the parser's source.
--- Set to `true` to run a complete parse of the source (Note: Can be slow!)
--- Set to `false|nil` to only parse regions with empty ranges (typically
--- only the root tree without injections).
--- @return TSTree[]
function LanguageTree:parse(range)
if self:is_valid() then
self:_log('valid')
return self._trees
end
local changes --- @type Range6?
-- Collect some stats
local no_regions_parsed = 0
local query_time = 0
local total_parse_time = 0
--- At least 1 region is invalid
if not self:is_valid(true) then
changes, no_regions_parsed, total_parse_time = self:_parse_regions(range)
-- Need to run injections when we parsed something
if no_regions_parsed > 0 then
self._injections_processed = false
end
end
if not self._injections_processed and range ~= false and range ~= nil then
query_time = self:_add_injections()
self._injections_processed = true
end
self:_log({
changes = changes,
regions_parsed = regions_parsed,
changes = changes and #changes > 0 and changes or nil,
regions_parsed = no_regions_parsed,
parse_time = total_parse_time,
query_time = query_time,
range = range,
})
self:for_each_child(function(child)
child:parse()
child:parse(range)
end)
self._valid = true
return self._trees
end
@@ -384,7 +466,7 @@ end
---
---@param fn fun(tree: TSTree, ltree: LanguageTree)
function LanguageTree:for_each_tree(fn)
for _, tree in ipairs(self._trees) do
for _, tree in pairs(self._trees) do
fn(tree, self)
end
@@ -466,18 +548,17 @@ function LanguageTree:_iter_regions(fn)
return
end
if type(self._valid) ~= 'table' then
local was_valid = type(self._valid) ~= 'table'
if was_valid then
self:_log('was valid', self._valid)
self._valid = {}
end
local all_valid = true
for i, region in ipairs(self:included_regions()) do
if self._valid[i] == nil then
self._valid[i] = true
end
if self._valid[i] then
if was_valid or self._valid[i] then
self._valid[i] = fn(i, region)
if not self._valid[i] then
self:_log(function()
@@ -521,6 +602,8 @@ function LanguageTree:set_included_regions(new_regions)
for i, range in ipairs(region) do
if type(range) == 'table' and #range == 4 then
region[i] = Range.add_bytes(self._source, range)
elseif type(range) == 'userdata' then
region[i] = { range:range(true) }
end
end
end
@@ -542,7 +625,7 @@ function LanguageTree:set_included_regions(new_regions)
end
---Gets the set of included regions
---@return integer[][]
---@return Range6[][]
function LanguageTree:included_regions()
if self._regions then
return self._regions
@@ -581,7 +664,7 @@ local function get_node_ranges(node, source, metadata, include_children)
-- We are excluding children so we need to mask out their ranges
for i = 0, child_count - 1 do
local child = node:named_child(i)
local child = assert(node:named_child(i))
local c_srow, c_scol, c_sbyte, c_erow, c_ecol, c_ebyte = child:range(true)
if c_srow > srow or c_scol > scol then
ranges[#ranges + 1] = { srow, scol, sbyte, c_srow, c_scol, c_sbyte }
@@ -749,8 +832,8 @@ end
---
--- TODO: Allow for an offset predicate to tailor the injection range
--- instead of using the entire nodes range.
---@private
---@return table<string, Range6[][]>
--- @private
--- @return table<string, Range6[][]>
function LanguageTree:_get_injections()
if not self._injection_query then
return {}
@@ -759,7 +842,7 @@ function LanguageTree:_get_injections()
---@type table<integer,TSInjection>
local injections = {}
for tree_index, tree in ipairs(self._trees) do
for index, tree in pairs(self._trees) do
local root_node = tree:root()
local start_line, _, end_line, _ = root_node:range()
@@ -771,7 +854,7 @@ function LanguageTree:_get_injections()
-- TODO(lewis6991): remove after 0.9 (#20434)
lang, combined, ranges = self:_get_injection_deprecated(match, metadata)
end
add_injection(injections, tree_index, pattern, lang, combined, ranges)
add_injection(injections, index, pattern, lang, combined, ranges)
end
end
@@ -794,7 +877,7 @@ function LanguageTree:_get_injections()
end, entry.regions)
table.insert(result[lang], regions)
else
for _, ranges in ipairs(entry.regions) do
for _, ranges in pairs(entry.regions) do
table.insert(result[lang], ranges)
end
end
@@ -828,7 +911,7 @@ function LanguageTree:_edit(
end_row_new,
end_col_new
)
for _, tree in ipairs(self._trees) do
for _, tree in pairs(self._trees) do
tree:edit(
start_byte,
end_byte_old,

View File

@@ -435,6 +435,7 @@ predicate_handlers['vim-match?'] = predicate_handlers['match?']
---@class TSMetadata
---@field range? Range
---@field conceal? string
---@field [integer] TSMetadata
---@field [string] integer|string