From 552983515f2aed9d1017ba67b24b7673fdfcabe8 Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Fri, 6 Jun 2025 10:46:01 +0800 Subject: [PATCH] vim-patch:9.1.1435: completion: various flaws in fuzzy completion (#34335) Problem: completion: various flaws in fuzzy completion Solution: fix the issues (Girish Palya) - Remove the brittle `qsort()` on `compl_match_array`. - Add a stable, non-recursive `mergesort` for the internal doubly linked list of matches. - The sort now happens directly on the internal representation (`compl_T`), preserving sync with external structures and making sorting stable. - Update fuzzy match logic to enforce `max_matches` limits after sorting. - Remove `trim_compl_match_array()`, which is no longer necessary. - Fixe test failures by correctly setting `selected` index and maintaining match consistency. - Introduce `mergesort_list()` in `misc2.c`, which operates generically over doubly linked lists. - Remove `pum_score` and `pum_idx` variables fixes: vim/vim#17387 closes: vim/vim#17430 https://github.com/vim/vim/commit/8cd42a58b49c948ab59ced6ca5f5ccfae5d9ecea Co-authored-by: Girish Palya --- src/nvim/insexpand.c | 301 +++++++++---------------- src/nvim/memory.c | 109 +++++++++ src/nvim/memory.h | 4 + src/nvim/popupmenu.h | 2 - test/old/testdir/test_ins_complete.vim | 19 +- 5 files changed, 240 insertions(+), 195 deletions(-) diff --git a/src/nvim/insexpand.c b/src/nvim/insexpand.c index bc8323e281..1f289ebc82 100644 --- a/src/nvim/insexpand.c +++ b/src/nvim/insexpand.c @@ -1363,73 +1363,33 @@ static void trigger_complete_changed_event(int cur) restore_v_event(v_event, &save_v_event); } -/// Trim compl_match_array to enforce max_matches per completion source. -/// -/// Note: This special-case trimming is a workaround because compl_match_array -/// becomes inconsistent with compl_first_match (list) after former is sorted by -/// fuzzy score. The two structures end up in different orders. -/// Ideally, compl_first_match list should have been sorted instead. -/// -/// Returns recalculated index of shown match. -static int trim_compl_match_array(int shown_match_idx) +// Helper functions for mergesort_list(). + +static void *cp_get_next(void *node) { - int remove_count = 0; - - // Count current matches per source. - int *match_counts = xcalloc((size_t)cpt_sources_count, sizeof(int)); - for (int i = 0; i < compl_match_arraysize; i++) { - int src_idx = compl_match_array[i].pum_cpt_source_idx; - if (src_idx != -1) { - match_counts[src_idx]++; - } - } - - // Calculate size of trimmed array, respecting max_matches per source. - int new_size = 0; - for (int i = 0; i < cpt_sources_count; i++) { - int limit = cpt_sources_array[i].cs_max_matches; - new_size += (limit > 0 && match_counts[i] > limit) ? limit : match_counts[i]; - } - - if (new_size == compl_match_arraysize) { - goto theend; - } - - // Create trimmed array while enforcing per-source limits - pumitem_T *trimmed = xcalloc((size_t)new_size, sizeof(pumitem_T)); - memset(match_counts, 0, sizeof(int) * (size_t)cpt_sources_count); - int trimmed_idx = 0; - for (int i = 0; i < compl_match_arraysize; i++) { - int src_idx = compl_match_array[i].pum_cpt_source_idx; - if (src_idx != -1) { - int limit = cpt_sources_array[src_idx].cs_max_matches; - if (limit <= 0 || match_counts[src_idx] < limit) { - trimmed[trimmed_idx++] = compl_match_array[i]; - match_counts[src_idx]++; - } else if (i < shown_match_idx) { - remove_count++; - } - } else { - trimmed[trimmed_idx++] = compl_match_array[i]; - } - } - xfree(compl_match_array); - compl_match_array = trimmed; - compl_match_arraysize = new_size; - -theend: - xfree(match_counts); - return shown_match_idx - remove_count; + return ((compl_T *)node)->cp_next; } -/// pumitem qsort compare func -static int ins_compl_fuzzy_cmp(const void *a, const void *b) +static void cp_set_next(void *node, void *next) { - const int sa = (*(pumitem_T *)a).pum_score; - const int sb = (*(pumitem_T *)b).pum_score; - const int ia = (*(pumitem_T *)a).pum_idx; - const int ib = (*(pumitem_T *)b).pum_idx; - return sa == sb ? (ia == ib ? 0 : (ia < ib ? -1 : 1)) : (sa < sb ? 1 : -1); + ((compl_T *)node)->cp_next = (compl_T *)next; +} + +static void *cp_get_prev(void *node) +{ + return ((compl_T *)node)->cp_prev; +} + +static void cp_set_prev(void *node, void *prev) +{ + ((compl_T *)node)->cp_prev = (compl_T *)prev; +} + +static int cp_compare_fuzzy(const void *a, const void *b) +{ + int score_a = ((compl_T *)a)->cp_score; + int score_b = ((compl_T *)b)->cp_score; + return (score_b > score_a) ? 1 : (score_b < score_a) ? -1 : 0; } /// Build a popup menu to show the completion matches. @@ -1440,7 +1400,6 @@ static int ins_compl_build_pum(void) { // Need to build the popup menu list. compl_match_arraysize = 0; - compl_T *comp = compl_first_match; // If it's user complete function and refresh_always, // do not use "compl_leader" as prefix filter. @@ -1448,44 +1407,73 @@ static int ins_compl_build_pum(void) XFREE_CLEAR(compl_leader); } - int max_fuzzy_score = 0; unsigned cur_cot_flags = get_cot_flags(); bool compl_no_select = (cur_cot_flags & kOptCotFlagNoselect) != 0; bool fuzzy_filter = (cur_cot_flags & kOptCotFlagFuzzy) != 0; bool fuzzy_sort = fuzzy_filter && !(cur_cot_flags & kOptCotFlagNosort); compl_T *match_head = NULL, *match_tail = NULL; - int match_count = 0; - int cur_source = -1; - bool max_matches_found = false; + int *match_count = NULL; bool is_forward = compl_shows_dir_forward(); + bool is_cpt_completion = (cpt_sources_array != NULL); // If the current match is the original text don't find the first // match after it, don't highlight anything. bool shown_match_ok = match_at_original_text(compl_shown_match); - bool update_shown_match = fuzzy_filter; - if (fuzzy_filter && ctrl_x_mode_normal() - && compl_leader.data == NULL && compl_shown_match->cp_score > 0) { - update_shown_match = false; - } - if (strequal(compl_leader.data, compl_orig_text.data) && !shown_match_ok) { compl_shown_match = compl_no_select ? compl_first_match : compl_first_match->cp_next; } bool did_find_shown_match = false; + compl_T *comp; compl_T *shown_compl = NULL; int i = 0; int cur = -1; + // When 'completeopt' contains "fuzzy" and leader is not NULL or empty, + // set the cp_score for later comparisons. + if (fuzzy_filter && compl_leader.data != NULL && compl_leader.size > 0) { + comp = compl_first_match; + do { + comp->cp_score = fuzzy_match_str(comp->cp_str.data, compl_leader.data); + comp = comp->cp_next; + } while (comp != NULL && !is_first_match(comp)); + } + + // Sort the linked list based on fuzzy score + if (fuzzy_sort && compl_leader.data != NULL && compl_leader.size > 0 + && !is_first_match(compl_first_match->cp_next)) { + comp = compl_first_match->cp_prev; + ins_compl_make_linear(); + if (is_forward) { + compl_first_match->cp_next->cp_prev = NULL; + compl_first_match->cp_next = mergesort_list(compl_first_match->cp_next, + cp_get_next, cp_set_next, + cp_get_prev, cp_set_prev, + cp_compare_fuzzy); + compl_first_match->cp_next->cp_prev = compl_first_match; + } else { + comp->cp_prev->cp_next = NULL; + compl_first_match = mergesort_list(compl_first_match, cp_get_next, cp_set_next, + cp_get_prev, cp_set_prev, cp_compare_fuzzy); + compl_T *tail = compl_first_match; + while (tail->cp_next != NULL) { + tail = tail->cp_next; + } + tail->cp_next = comp; + comp->cp_prev = tail; + } + (void)ins_compl_make_cyclic(); + } + + if (is_cpt_completion) { + match_count = xcalloc((size_t)cpt_sources_count, sizeof(int)); + } + + comp = compl_first_match; do { comp->cp_in_match_array = false; - // When 'completeopt' contains "fuzzy" and leader is not NULL or empty, - // set the cp_score for later comparisons. - if (fuzzy_filter && compl_leader.data != NULL && compl_leader.size > 0) { - comp->cp_score = fuzzy_match_str(comp->cp_str.data, compl_leader.data); - } // Apply 'smartcase' behavior during normal mode if (ctrl_x_mode_normal() && !p_inf && compl_leader.data @@ -1493,67 +1481,56 @@ static int ins_compl_build_pum(void) comp->cp_flags &= ~CP_ICASE; } - if (is_forward && !fuzzy_sort && comp->cp_cpt_source_idx != -1) { - if (cur_source != comp->cp_cpt_source_idx) { - cur_source = comp->cp_cpt_source_idx; - match_count = 1; - max_matches_found = false; - } else if (cpt_sources_array && !max_matches_found) { - int max_matches = cpt_sources_array[cur_source].cs_max_matches; - if (max_matches > 0 && match_count > max_matches) { - max_matches_found = true; - } - } - } - if (!match_at_original_text(comp) - && !max_matches_found && (compl_leader.data == NULL || ins_compl_equal(comp, compl_leader.data, compl_leader.size) || (fuzzy_filter && comp->cp_score > 0))) { - compl_match_arraysize++; - comp->cp_in_match_array = true; - if (match_head == NULL) { - match_head = comp; - } else { - match_tail->cp_match_next = comp; + // Limit number of items from each source if max_items is set. + bool match_limit_exceeded = false; + int cur_source = comp->cp_cpt_source_idx; + if (is_forward && cur_source != -1 && is_cpt_completion) { + match_count[cur_source]++; + int max_matches = cpt_sources_array[cur_source].cs_max_matches; + if (max_matches > 0 && match_count[cur_source] > max_matches) { + match_limit_exceeded = true; + } } - match_tail = comp; - if (!shown_match_ok && !fuzzy_filter) { - if (comp == compl_shown_match || did_find_shown_match) { - // This item is the shown match or this is the - // first displayed item after the shown match. - compl_shown_match = comp; - did_find_shown_match = true; - shown_match_ok = true; + + if (!match_limit_exceeded) { + compl_match_arraysize++; + comp->cp_in_match_array = true; + if (match_head == NULL) { + match_head = comp; } else { - // Remember this displayed match for when the - // shown match is just below it. - shown_compl = comp; + match_tail->cp_match_next = comp; } - cur = i; - } else if (fuzzy_filter) { - if (i == 0) { - shown_compl = comp; - } - // Update the maximum fuzzy score and the shown match - // if the current item's score is higher - if (fuzzy_sort && comp->cp_score > max_fuzzy_score && update_shown_match) { - did_find_shown_match = true; - max_fuzzy_score = comp->cp_score; - if (!compl_no_select) { + match_tail = comp; + + if (!shown_match_ok && !fuzzy_filter) { + if (comp == compl_shown_match || did_find_shown_match) { + // This item is the shown match or this is the + // first displayed item after the shown match. compl_shown_match = comp; + did_find_shown_match = true; + shown_match_ok = true; + } else { + // Remember this displayed match for when the + // shown match is just below it. + shown_compl = comp; + } + cur = i; + } else if (fuzzy_filter) { + if (i == 0) { + shown_compl = comp; + } + + if (!shown_match_ok && comp == compl_shown_match) { + cur = i; + shown_match_ok = true; } } - if (!shown_match_ok && comp == compl_shown_match) { - cur = i; - shown_match_ok = true; - } + i++; } - if (is_forward && !fuzzy_sort && comp->cp_cpt_source_idx != -1) { - match_count++; - } - i++; } if (comp == compl_shown_match && !fuzzy_filter) { @@ -1573,11 +1550,13 @@ static int ins_compl_build_pum(void) comp = comp->cp_next; } while (comp != NULL && !is_first_match(comp)); + xfree(match_count); + if (compl_match_arraysize == 0) { return -1; } - if (fuzzy_filter && !fuzzy_sort && !compl_no_select && !shown_match_ok) { + if (fuzzy_filter && !compl_no_select && !shown_match_ok) { compl_shown_match = shown_compl; shown_match_ok = true; cur = 0; @@ -1593,7 +1572,6 @@ static int ins_compl_build_pum(void) ? comp->cp_text[CPT_ABBR] : comp->cp_str.data; compl_match_array[i].pum_kind = comp->cp_text[CPT_KIND]; compl_match_array[i].pum_info = comp->cp_text[CPT_INFO]; - compl_match_array[i].pum_score = comp->cp_score; compl_match_array[i].pum_cpt_source_idx = comp->cp_cpt_source_idx; compl_match_array[i].pum_user_abbr_hlattr = comp->cp_user_abbr_hlattr; compl_match_array[i].pum_user_kind_hlattr = comp->cp_user_kind_hlattr; @@ -1604,19 +1582,6 @@ static int ins_compl_build_pum(void) comp = match_next; } - if (fuzzy_sort && compl_leader.data != NULL && compl_leader.size > 0) { - for (i = 0; i < compl_match_arraysize; i++) { - compl_match_array[i].pum_idx = i; - } - // sort by the largest score of fuzzy match - qsort(compl_match_array, (size_t)compl_match_arraysize, sizeof(pumitem_T), - ins_compl_fuzzy_cmp); - shown_match_ok = true; - } - - if (fuzzy_sort && cpt_sources_array != NULL) { - cur = trim_compl_match_array(cur); // Truncate by max_matches in 'cpt' - } if (!shown_match_ok) { // no displayed match at all cur = -1; } @@ -3397,7 +3362,6 @@ static void get_complete_info(list_T *what_list, dict_T *retdict) #define CI_WHAT_MATCHES 0x20 #define CI_WHAT_ALL 0xff int what_flag; - const bool compl_fuzzy_match = (get_cot_flags() & kOptCotFlagFuzzy) != 0; if (what_list == NULL) { what_flag = CI_WHAT_ALL & ~(CI_WHAT_MATCHES|CI_WHAT_COMPLETED); @@ -3464,7 +3428,7 @@ static void get_complete_info(list_T *what_list, dict_T *retdict) && compl_curr_match->cp_number == match->cp_number) { selected_idx = list_idx; } - if (compl_fuzzy_match || match->cp_in_match_array) { + if (match->cp_in_match_array) { list_idx += 1; } } @@ -4813,43 +4777,6 @@ static void ins_compl_show_filename(void) redraw_cmdline = false; // don't overwrite! } -/// Find a completion item when 'completeopt' contains "fuzzy". -static compl_T *find_comp_when_fuzzy(void) -{ - int target_idx = -1; - const bool is_forward = compl_shows_dir_forward(); - const bool is_backward = compl_shows_dir_backward(); - compl_T *comp = NULL; - - assert(compl_match_array != NULL); - if ((is_forward && compl_selected_item == compl_match_arraysize - 1) - || (is_backward && compl_selected_item == 0)) { - return match_at_original_text(compl_first_match) ? compl_first_match - : compl_first_match->cp_prev; - } - - if (is_forward) { - target_idx = compl_selected_item + 1; - } else if (is_backward) { - target_idx = compl_selected_item == -1 ? compl_match_arraysize - 1 - : compl_selected_item - 1; - } - - const int score = compl_match_array[target_idx].pum_score; - char *const str = compl_match_array[target_idx].pum_text; - - comp = compl_first_match; - do { - if (comp->cp_score == score - && (str == comp->cp_str.data || str == comp->cp_text[CPT_ABBR])) { - return comp; - } - comp = comp->cp_next; - } while (comp != NULL && !is_first_match(comp)); - - return NULL; -} - /// Find the appropriate completion item when 'complete' ('cpt') includes /// a 'max_matches' postfix. In this case, we search for a match where /// 'cp_in_match_array' is set, indicating that the match is also present @@ -4890,9 +4817,7 @@ static int find_next_completion_match(bool allow_get_expansion, int todo, bool a while (--todo >= 0) { if (compl_shows_dir_forward() && compl_shown_match->cp_next != NULL) { - if (compl_match_array != NULL && compl_fuzzy_match) { - compl_shown_match = find_comp_when_fuzzy(); - } else if (cpt_sources_active) { + if (cpt_sources_active) { compl_shown_match = find_comp_when_cpt_sources(); } else { compl_shown_match = compl_shown_match->cp_next; @@ -4903,9 +4828,7 @@ static int find_next_completion_match(bool allow_get_expansion, int todo, bool a } else if (compl_shows_dir_backward() && compl_shown_match->cp_prev != NULL) { found_end = is_first_match(compl_shown_match); - if (compl_match_array != NULL && compl_fuzzy_match) { - compl_shown_match = find_comp_when_fuzzy(); - } else if (cpt_sources_active) { + if (cpt_sources_active) { compl_shown_match = find_comp_when_cpt_sources(); } else { compl_shown_match = compl_shown_match->cp_prev; diff --git a/src/nvim/memory.c b/src/nvim/memory.c index 5464390d52..130c50b85c 100644 --- a/src/nvim/memory.c +++ b/src/nvim/memory.c @@ -568,6 +568,115 @@ void time_to_bytes(time_t time_, uint8_t buf[8]) } } +/// Iterative merge sort for doubly linked list. +/// O(NlogN) worst case, and stable. +/// - The list is divided into blocks of increasing size (1, 2, 4, 8, ...). +/// - Each pair of blocks is merged in sorted order. +/// - Merged blocks are reconnected to build the sorted list. +void *mergesort_list(void *head, MergeSortGetFunc get_next, MergeSortSetFunc set_next, + MergeSortGetFunc get_prev, MergeSortSetFunc set_prev, + MergeSortCompareFunc compare) +{ + if (!head || !get_next(head)) { + return head; + } + + // Count length + int n = 0; + void *curr = head; + while (curr) { + n++; + curr = get_next(curr); + } + + for (int size = 1; size < n; size *= 2) { + void *new_head = NULL; + void *tail = NULL; + curr = head; + + while (curr) { + // Split two runs + void *left = curr; + void *right = left; + for (int i = 0; i < size && right; i++) { + right = get_next(right); + } + + void *next = right; + for (int i = 0; i < size && next; i++) { + next = get_next(next); + } + + // Break links + void *l_end = right ? get_prev(right) : NULL; + if (l_end) { + set_next(l_end, NULL); + } + if (right) { + set_prev(right, NULL); + } + + void *r_end = next ? get_prev(next) : NULL; + if (r_end) { + set_next(r_end, NULL); + } + if (next) { + set_prev(next, NULL); + } + + // Merge + void *merged = NULL; + void *merged_tail = NULL; + + while (left || right) { + void *chosen = NULL; + if (!left) { + chosen = right; + right = get_next(right); + } else if (!right) { + chosen = left; + left = get_next(left); + } else if (compare(left, right) <= 0) { + chosen = left; + left = get_next(left); + } else { + chosen = right; + right = get_next(right); + } + + if (merged_tail) { + set_next(merged_tail, chosen); + set_prev(chosen, merged_tail); + merged_tail = chosen; + } else { + merged = merged_tail = chosen; + set_prev(chosen, NULL); + } + } + + // Connect to full list + if (!new_head) { + new_head = merged; + } else { + set_next(tail, merged); + set_prev(merged, tail); + } + + // Move tail to end + while (get_next(merged_tail)) { + merged_tail = get_next(merged_tail); + } + tail = merged_tail; + + curr = next; + } + + head = new_head; + } + + return head; +} + #define REUSE_MAX 4 static struct consumed_blk *arena_reuse_blk; diff --git a/src/nvim/memory.h b/src/nvim/memory.h index 0955c5306c..71acb22e7a 100644 --- a/src/nvim/memory.h +++ b/src/nvim/memory.h @@ -21,6 +21,10 @@ typedef void *(*MemCalloc)(size_t, size_t); /// `realloc()` function signature typedef void *(*MemRealloc)(void *, size_t); +typedef void *(*MergeSortGetFunc)(void *); +typedef void (*MergeSortSetFunc)(void *, void *); +typedef int (*MergeSortCompareFunc)(const void *, const void *); + #ifdef UNIT_TESTING /// When unit testing: pointer to the `malloc()` function, may be altered extern MemMalloc mem_malloc; diff --git a/src/nvim/popupmenu.h b/src/nvim/popupmenu.h index 44bf24e9c2..61cc4d2b6d 100644 --- a/src/nvim/popupmenu.h +++ b/src/nvim/popupmenu.h @@ -14,8 +14,6 @@ typedef struct { char *pum_kind; ///< extra kind text (may be truncated) char *pum_extra; ///< extra menu text (may be truncated) char *pum_info; ///< extra info - int pum_score; ///< fuzzy match score - int pum_idx; ///< index of item before sorting by score int pum_cpt_source_idx; ///< index of completion source in 'cpt' int pum_user_abbr_hlattr; ///< highlight attribute for abbr int pum_user_kind_hlattr; ///< highlight attribute for kind diff --git a/test/old/testdir/test_ins_complete.vim b/test/old/testdir/test_ins_complete.vim index f319b51c7f..24e76d1014 100644 --- a/test/old/testdir/test_ins_complete.vim +++ b/test/old/testdir/test_ins_complete.vim @@ -3546,7 +3546,7 @@ func Test_complete_opt_fuzzy() set cot+=noinsert call feedkeys("i\=CompAnother()\f", 'tx') call assert_equal("for", g:abbr) - call assert_equal(2, g:selected) + call assert_equal(0, g:selected) set cot=menu,menuone,noselect,fuzzy call feedkeys("i\=CompAnother()\\\\\", 'tx') @@ -3784,7 +3784,7 @@ func Test_cfc_with_longest() call assert_equal('hello', getline('.')) " continue search for new leader after insert common prefix - exe "normal ohellokate\h\\k\\" + exe "normal ohellokate\h\\k\\\" call assert_equal('hellokate', getline('.')) bw! @@ -4191,6 +4191,12 @@ func Test_complete_match_count() set cpt=.^1,w exe "normal! Gof\\=PrintMenuWords()\" call assert_equal('f{''matches'': [''fo''], ''selected'': -1}', getline(5)) + " With non-matching items + %d + call setline(1, ["free", "freebar", "foo", "fobarbaz"]) + set cpt=.^2,w + exe "normal! Gofo\\=PrintMenuWords()\" + call assert_equal('fo{''matches'': [''foo'', ''fobarbaz''], ''selected'': -1}', getline(5)) set cot& func ComplFunc(findstart, base) @@ -4280,16 +4286,21 @@ func Test_complete_match_count() bw! " Test 'fuzzy' with max_items - " XXX: Cannot use complete_info() since it is broken for 'fuzzy' new set completeopt=menu,noselect,fuzzy set complete=. call setline(1, ["abcd", "abac", "abdc"]) - execute "normal Goa\c\" + exe "normal! Goa\c\=PrintMenuWords()\" + call assert_equal('ac{''matches'': [''abac'', ''abcd'', ''abdc''], ''selected'': -1}', getline(4)) + exe "normal! Sa\c\\\\=PrintMenuWords()\" + call assert_equal('abac{''matches'': [''abac'', ''abcd'', ''abdc''], ''selected'': 0}', getline(4)) + execute "normal Sa\c\" call assert_equal('abac', getline(4)) execute "normal Sa\c\\\\\" call assert_equal('abac', getline(4)) set complete=.^1 + exe "normal! Sa\c\\\\=PrintMenuWords()\" + call assert_equal('abac{''matches'': [''abac''], ''selected'': 0}', getline(4)) execute "normal Sa\c\\\" call assert_equal('abac', getline(4)) set complete=.^2