perf(treesitter): smarter languagetree invalidation

Problem:
  Treesitter injections are slow because all injected trees are invalidated on every change.

Solution:
    Implement smarter invalidation to avoid reparsing injected regions.

    - In on_bytes, try and update self._regions as best we can. This PR just offsets any regions after the change.
    - Add valid flags for each region in self._regions.
    - Call on_bytes recursively for all children.
       - We still need to run the query every time for the top level tree. I don't know how to avoid this. However, if the new injection ranges don't change, then we re-use the old trees and avoid reparsing children.

This should result in roughly a 2-3x reduction in tree parsing when the comment injections are enabled.
This commit is contained in:
Lewis Russell
2023-02-23 15:19:52 +00:00
committed by GitHub
parent 8680715743
commit 75e53341f3
5 changed files with 326 additions and 88 deletions

View File

@@ -639,6 +639,17 @@ int x = INT_MAX;
{1, 26, 1, 65}, -- READ_STRING(x, y) (char_u *)read_string((x), (size_t)(y))
{2, 29, 2, 68} -- READ_STRING_OK(x, y) (char_u *)read_string((x), (size_t)(y))
}, get_ranges())
helpers.feed('ggo<esc>')
eq(5, exec_lua("return #parser:children().c:trees()"))
eq({
{0, 0, 8, 0}, -- root tree
{4, 14, 4, 17}, -- VALUE 123
{5, 15, 5, 18}, -- VALUE1 123
{6, 15, 6, 18}, -- VALUE2 123
{2, 26, 2, 65}, -- READ_STRING(x, y) (char_u *)read_string((x), (size_t)(y))
{3, 29, 3, 68} -- READ_STRING_OK(x, y) (char_u *)read_string((x), (size_t)(y))
}, get_ranges())
end)
end)
@@ -660,6 +671,18 @@ int x = INT_MAX;
{1, 26, 2, 68} -- READ_STRING(x, y) (char_u *)read_string((x), (size_t)(y))
-- READ_STRING_OK(x, y) (char_u *)read_string((x), (size_t)(y))
}, get_ranges())
helpers.feed('ggo<esc>')
eq("table", exec_lua("return type(parser:children().c)"))
eq(2, exec_lua("return #parser:children().c:trees()"))
eq({
{0, 0, 8, 0}, -- root tree
{4, 14, 6, 18}, -- VALUE 123
-- VALUE1 123
-- VALUE2 123
{2, 26, 3, 68} -- READ_STRING(x, y) (char_u *)read_string((x), (size_t)(y))
-- READ_STRING_OK(x, y) (char_u *)read_string((x), (size_t)(y))
}, get_ranges())
end)
end)
@@ -688,6 +711,18 @@ int x = INT_MAX;
{1, 26, 2, 68} -- READ_STRING(x, y) (char_u *)read_string((x), (size_t)(y))
-- READ_STRING_OK(x, y) (char_u *)read_string((x), (size_t)(y))
}, get_ranges())
helpers.feed('ggo<esc>')
eq("table", exec_lua("return type(parser:children().c)"))
eq(2, exec_lua("return #parser:children().c:trees()"))
eq({
{0, 0, 8, 0}, -- root tree
{4, 14, 6, 18}, -- VALUE 123
-- VALUE1 123
-- VALUE2 123
{2, 26, 3, 68} -- READ_STRING(x, y) (char_u *)read_string((x), (size_t)(y))
-- READ_STRING_OK(x, y) (char_u *)read_string((x), (size_t)(y))
}, get_ranges())
end)
it("should not inject bad languages", function()