mirror of
https://github.com/neovim/neovim.git
synced 2026-02-20 09:19:43 +10:00
Allow Include What You Use to remove unnecessary includes and only include what is necessary. This helps with reducing compilation times and makes it easier to visualise which dependencies are actually required. Work on https://github.com/neovim/neovim/issues/549, but doesn't close it since this only works fully for .c files and not headers.
3663 lines
108 KiB
C
3663 lines
108 KiB
C
// This is an open source non-commercial project. Dear PVS-Studio, please check
|
|
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
|
|
|
|
// spell.c: code for spell checking
|
|
//
|
|
// See spellfile.c for the Vim spell file format.
|
|
//
|
|
// The spell checking mechanism uses a tree (aka trie). Each node in the tree
|
|
// has a list of bytes that can appear (siblings). For each byte there is a
|
|
// pointer to the node with the byte that follows in the word (child).
|
|
//
|
|
// A NUL byte is used where the word may end. The bytes are sorted, so that
|
|
// binary searching can be used and the NUL bytes are at the start. The
|
|
// number of possible bytes is stored before the list of bytes.
|
|
//
|
|
// The tree uses two arrays: "byts" stores the characters, "idxs" stores
|
|
// either the next index or flags. The tree starts at index 0. For example,
|
|
// to lookup "vi" this sequence is followed:
|
|
// i = 0
|
|
// len = byts[i]
|
|
// n = where "v" appears in byts[i + 1] to byts[i + len]
|
|
// i = idxs[n]
|
|
// len = byts[i]
|
|
// n = where "i" appears in byts[i + 1] to byts[i + len]
|
|
// i = idxs[n]
|
|
// len = byts[i]
|
|
// find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
|
|
//
|
|
// There are two word trees: one with case-folded words and one with words in
|
|
// original case. The second one is only used for keep-case words and is
|
|
// usually small.
|
|
//
|
|
// There is one additional tree for when not all prefixes are applied when
|
|
// generating the .spl file. This tree stores all the possible prefixes, as
|
|
// if they were words. At each word (prefix) end the prefix nr is stored, the
|
|
// following word must support this prefix nr. And the condition nr is
|
|
// stored, used to lookup the condition that the word must match with.
|
|
//
|
|
// Thanks to Olaf Seibert for providing an example implementation of this tree
|
|
// and the compression mechanism.
|
|
// LZ trie ideas:
|
|
// http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
|
|
// More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
|
|
//
|
|
// Matching involves checking the caps type: Onecap ALLCAP KeepCap.
|
|
//
|
|
// Why doesn't Vim use aspell/ispell/myspell/etc.?
|
|
// See ":help develop-spell".
|
|
|
|
// Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
|
|
// Only use it for small word lists!
|
|
|
|
// Use SPELL_COMPRESS_ALWAYS for debugging: compress the word tree after
|
|
// adding a word. Only use it for small word lists!
|
|
|
|
// Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
|
|
// specific word.
|
|
|
|
#include <assert.h>
|
|
#include <inttypes.h>
|
|
#include <limits.h>
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "nvim/ascii.h"
|
|
#include "nvim/autocmd.h"
|
|
#include "nvim/buffer.h"
|
|
#include "nvim/change.h"
|
|
#include "nvim/charset.h"
|
|
#include "nvim/cursor.h"
|
|
#include "nvim/decoration.h"
|
|
#include "nvim/decoration_provider.h"
|
|
#include "nvim/drawscreen.h"
|
|
#include "nvim/ex_cmds.h"
|
|
#include "nvim/ex_cmds_defs.h"
|
|
#include "nvim/ex_docmd.h"
|
|
#include "nvim/garray.h"
|
|
#include "nvim/gettext.h"
|
|
#include "nvim/globals.h"
|
|
#include "nvim/hashtab.h"
|
|
#include "nvim/highlight_defs.h"
|
|
#include "nvim/insexpand.h"
|
|
#include "nvim/log.h"
|
|
#include "nvim/macros.h"
|
|
#include "nvim/mark.h"
|
|
#include "nvim/mbyte.h"
|
|
#include "nvim/memline.h"
|
|
#include "nvim/memory.h"
|
|
#include "nvim/message.h"
|
|
#include "nvim/option.h"
|
|
#include "nvim/os/fs.h"
|
|
#include "nvim/os/input.h"
|
|
#include "nvim/os/os_defs.h"
|
|
#include "nvim/path.h"
|
|
#include "nvim/pos.h"
|
|
#include "nvim/regexp.h"
|
|
#include "nvim/runtime.h"
|
|
#include "nvim/search.h"
|
|
#include "nvim/spell.h"
|
|
#include "nvim/spell_defs.h"
|
|
#include "nvim/spellfile.h"
|
|
#include "nvim/spellsuggest.h"
|
|
#include "nvim/strings.h"
|
|
#include "nvim/syntax.h"
|
|
#include "nvim/types.h"
|
|
#include "nvim/undo.h"
|
|
#include "nvim/vim.h"
|
|
#include "nvim/window.h"
|
|
|
|
// Result values. Lower number is accepted over higher one.
|
|
enum {
|
|
SP_BANNED = -1,
|
|
SP_RARE = 0,
|
|
SP_OK = 1,
|
|
SP_LOCAL = 2,
|
|
SP_BAD = 3,
|
|
};
|
|
|
|
// First language that is loaded, start of the linked list of loaded
|
|
// languages.
|
|
slang_T *first_lang = NULL;
|
|
|
|
// file used for "zG" and "zW"
|
|
char_u *int_wordlist = NULL;
|
|
|
|
// Structure to store info for word matching.
|
|
typedef struct matchinf_S {
|
|
langp_T *mi_lp; // info for language and region
|
|
|
|
// pointers to original text to be checked
|
|
char_u *mi_word; // start of word being checked
|
|
char_u *mi_end; // end of matching word so far
|
|
char_u *mi_fend; // next char to be added to mi_fword
|
|
char_u *mi_cend; // char after what was used for
|
|
// mi_capflags
|
|
|
|
// case-folded text
|
|
char_u mi_fword[MAXWLEN + 1]; // mi_word case-folded
|
|
int mi_fwordlen; // nr of valid bytes in mi_fword
|
|
|
|
// for when checking word after a prefix
|
|
int mi_prefarridx; // index in sl_pidxs with list of
|
|
// affixID/condition
|
|
int mi_prefcnt; // number of entries at mi_prefarridx
|
|
int mi_prefixlen; // byte length of prefix
|
|
int mi_cprefixlen; // byte length of prefix in original
|
|
// case
|
|
|
|
// for when checking a compound word
|
|
int mi_compoff; // start of following word offset
|
|
char_u mi_compflags[MAXWLEN]; // flags for compound words used
|
|
int mi_complen; // nr of compound words used
|
|
int mi_compextra; // nr of COMPOUNDROOT words
|
|
|
|
// others
|
|
int mi_result; // result so far: SP_BAD, SP_OK, etc.
|
|
int mi_capflags; // WF_ONECAP WF_ALLCAP WF_KEEPCAP
|
|
win_T *mi_win; // buffer being checked
|
|
|
|
// for NOBREAK
|
|
int mi_result2; // "mi_result" without following word
|
|
char_u *mi_end2; // "mi_end" without following word
|
|
} matchinf_T;
|
|
|
|
// Structure used for the cookie argument of do_in_runtimepath().
|
|
typedef struct spelload_S {
|
|
char_u sl_lang[MAXWLEN + 1]; // language name
|
|
slang_T *sl_slang; // resulting slang_T struct
|
|
int sl_nobreak; // NOBREAK language found
|
|
} spelload_T;
|
|
|
|
#define SY_MAXLEN 30
|
|
typedef struct syl_item_S {
|
|
char_u sy_chars[SY_MAXLEN]; // the sequence of chars
|
|
int sy_len;
|
|
} syl_item_T;
|
|
|
|
spelltab_T spelltab;
|
|
int did_set_spelltab;
|
|
|
|
#ifdef INCLUDE_GENERATED_DECLARATIONS
|
|
# include "spell.c.generated.h"
|
|
#endif
|
|
|
|
// mode values for find_word
|
|
#define FIND_FOLDWORD 0 // find word case-folded
|
|
#define FIND_KEEPWORD 1 // find keep-case word
|
|
#define FIND_PREFIX 2 // find word after prefix
|
|
#define FIND_COMPOUND 3 // find case-folded compound word
|
|
#define FIND_KEEPCOMPOUND 4 // find keep-case compound word
|
|
|
|
char *e_format = N_("E759: Format error in spell file");
|
|
|
|
// Remember what "z?" replaced.
|
|
char *repl_from = NULL;
|
|
char *repl_to = NULL;
|
|
|
|
/// Main spell-checking function.
|
|
/// "ptr" points to a character that could be the start of a word.
|
|
/// "*attrp" is set to the highlight index for a badly spelled word. For a
|
|
/// non-word or when it's OK it remains unchanged.
|
|
/// This must only be called when 'spelllang' is not empty.
|
|
///
|
|
/// "capcol" is used to check for a Capitalised word after the end of a
|
|
/// sentence. If it's zero then perform the check. Return the column where to
|
|
/// check next, or -1 when no sentence end was found. If it's NULL then don't
|
|
/// worry.
|
|
///
|
|
/// @param wp current window
|
|
/// @param capcol column to check for Capital
|
|
/// @param docount count good words
|
|
///
|
|
/// @return the length of the word in bytes, also when it's OK, so that the
|
|
/// caller can skip over the word.
|
|
size_t spell_check(win_T *wp, char_u *ptr, hlf_T *attrp, int *capcol, bool docount)
|
|
{
|
|
matchinf_T mi; // Most things are put in "mi" so that it can
|
|
// be passed to functions quickly.
|
|
size_t nrlen = 0; // found a number first
|
|
size_t wrongcaplen = 0;
|
|
bool count_word = docount;
|
|
bool use_camel_case = (wp->w_s->b_p_spo_flags & SPO_CAMEL) != 0;
|
|
bool camel_case = false;
|
|
|
|
// A word never starts at a space or a control character. Return quickly
|
|
// then, skipping over the character.
|
|
if (*ptr <= ' ') {
|
|
return 1;
|
|
}
|
|
|
|
// Return here when loading language files failed.
|
|
if (GA_EMPTY(&wp->w_s->b_langp)) {
|
|
return 1;
|
|
}
|
|
|
|
CLEAR_FIELD(mi);
|
|
|
|
// A number is always OK. Also skip hexadecimal numbers 0xFF99 and
|
|
// 0X99FF. But always do check spelling to find "3GPP" and "11
|
|
// julifeest".
|
|
if (*ptr >= '0' && *ptr <= '9') {
|
|
if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) {
|
|
mi.mi_end = (char_u *)skipbin((char *)ptr + 2);
|
|
} else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) {
|
|
mi.mi_end = (char_u *)skiphex((char *)ptr + 2);
|
|
} else {
|
|
mi.mi_end = (char_u *)skipdigits((char *)ptr);
|
|
}
|
|
nrlen = (size_t)(mi.mi_end - ptr);
|
|
}
|
|
|
|
// Find the normal end of the word (until the next non-word character).
|
|
mi.mi_word = ptr;
|
|
mi.mi_fend = ptr;
|
|
if (spell_iswordp(mi.mi_fend, wp)) {
|
|
bool this_upper = false; // init for gcc
|
|
|
|
if (use_camel_case) {
|
|
int c = utf_ptr2char((char *)mi.mi_fend);
|
|
this_upper = SPELL_ISUPPER(c);
|
|
}
|
|
|
|
do {
|
|
MB_PTR_ADV(mi.mi_fend);
|
|
if (use_camel_case) {
|
|
const bool prev_upper = this_upper;
|
|
int c = utf_ptr2char((char *)mi.mi_fend);
|
|
this_upper = SPELL_ISUPPER(c);
|
|
camel_case = !prev_upper && this_upper;
|
|
}
|
|
} while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp)
|
|
&& !camel_case);
|
|
|
|
if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL) {
|
|
// Check word starting with capital letter.
|
|
int c = utf_ptr2char((char *)ptr);
|
|
if (!SPELL_ISUPPER(c)) {
|
|
wrongcaplen = (size_t)(mi.mi_fend - ptr);
|
|
}
|
|
}
|
|
}
|
|
if (capcol != NULL) {
|
|
*capcol = -1;
|
|
}
|
|
|
|
// We always use the characters up to the next non-word character,
|
|
// also for bad words.
|
|
mi.mi_end = mi.mi_fend;
|
|
|
|
// Check caps type later.
|
|
mi.mi_capflags = 0;
|
|
mi.mi_cend = NULL;
|
|
mi.mi_win = wp;
|
|
|
|
// case-fold the word with one non-word character, so that we can check
|
|
// for the word end.
|
|
if (*mi.mi_fend != NUL) {
|
|
MB_PTR_ADV(mi.mi_fend);
|
|
}
|
|
|
|
(void)spell_casefold(wp, ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
|
|
MAXWLEN + 1);
|
|
mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
|
|
|
|
if (camel_case && mi.mi_fwordlen > 0) {
|
|
// introduce a fake word end space into the folded word.
|
|
mi.mi_fword[mi.mi_fwordlen - 1] = ' ';
|
|
}
|
|
|
|
// The word is bad unless we recognize it.
|
|
mi.mi_result = SP_BAD;
|
|
mi.mi_result2 = SP_BAD;
|
|
|
|
// Loop over the languages specified in 'spelllang'.
|
|
// We check them all, because a word may be matched longer in another
|
|
// language.
|
|
for (int lpi = 0; lpi < wp->w_s->b_langp.ga_len; lpi++) {
|
|
mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi);
|
|
|
|
// If reloading fails the language is still in the list but everything
|
|
// has been cleared.
|
|
if (mi.mi_lp->lp_slang->sl_fidxs == NULL) {
|
|
continue;
|
|
}
|
|
|
|
// Check for a matching word in case-folded words.
|
|
find_word(&mi, FIND_FOLDWORD);
|
|
|
|
// Check for a matching word in keep-case words.
|
|
find_word(&mi, FIND_KEEPWORD);
|
|
|
|
// Check for matching prefixes.
|
|
find_prefix(&mi, FIND_FOLDWORD);
|
|
|
|
// For a NOBREAK language, may want to use a word without a following
|
|
// word as a backup.
|
|
if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
|
|
&& mi.mi_result2 != SP_BAD) {
|
|
mi.mi_result = mi.mi_result2;
|
|
mi.mi_end = mi.mi_end2;
|
|
}
|
|
|
|
// Count the word in the first language where it's found to be OK.
|
|
if (count_word && mi.mi_result == SP_OK) {
|
|
count_common_word(mi.mi_lp->lp_slang, (char *)ptr,
|
|
(int)(mi.mi_end - ptr), 1);
|
|
count_word = false;
|
|
}
|
|
}
|
|
|
|
if (mi.mi_result != SP_OK) {
|
|
// If we found a number skip over it. Allows for "42nd". Do flag
|
|
// rare and local words, e.g., "3GPP".
|
|
if (nrlen > 0) {
|
|
if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) {
|
|
return nrlen;
|
|
}
|
|
} else if (!spell_iswordp_nmw(ptr, wp)) {
|
|
// When we are at a non-word character there is no error, just
|
|
// skip over the character (try looking for a word after it).
|
|
if (capcol != NULL && wp->w_s->b_cap_prog != NULL) {
|
|
regmatch_T regmatch;
|
|
|
|
// Check for end of sentence.
|
|
regmatch.regprog = wp->w_s->b_cap_prog;
|
|
regmatch.rm_ic = false;
|
|
int r = vim_regexec(®match, (char *)ptr, 0);
|
|
wp->w_s->b_cap_prog = regmatch.regprog;
|
|
if (r) {
|
|
*capcol = (int)(regmatch.endp[0] - (char *)ptr);
|
|
}
|
|
}
|
|
|
|
return (size_t)(utfc_ptr2len((char *)ptr));
|
|
} else if (mi.mi_end == ptr) {
|
|
// Always include at least one character. Required for when there
|
|
// is a mixup in "midword".
|
|
MB_PTR_ADV(mi.mi_end);
|
|
} else if (mi.mi_result == SP_BAD
|
|
&& LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak) {
|
|
char_u *p, *fp;
|
|
int save_result = mi.mi_result;
|
|
|
|
// First language in 'spelllang' is NOBREAK. Find first position
|
|
// at which any word would be valid.
|
|
mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0);
|
|
if (mi.mi_lp->lp_slang->sl_fidxs != NULL) {
|
|
p = mi.mi_word;
|
|
fp = mi.mi_fword;
|
|
for (;;) {
|
|
MB_PTR_ADV(p);
|
|
MB_PTR_ADV(fp);
|
|
if (p >= mi.mi_end) {
|
|
break;
|
|
}
|
|
mi.mi_compoff = (int)(fp - mi.mi_fword);
|
|
find_word(&mi, FIND_COMPOUND);
|
|
if (mi.mi_result != SP_BAD) {
|
|
mi.mi_end = p;
|
|
break;
|
|
}
|
|
}
|
|
mi.mi_result = save_result;
|
|
}
|
|
}
|
|
|
|
if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) {
|
|
*attrp = HLF_SPB;
|
|
} else if (mi.mi_result == SP_RARE) {
|
|
*attrp = HLF_SPR;
|
|
} else {
|
|
*attrp = HLF_SPL;
|
|
}
|
|
}
|
|
|
|
if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) {
|
|
// Report SpellCap only when the word isn't badly spelled.
|
|
*attrp = HLF_SPC;
|
|
return wrongcaplen;
|
|
}
|
|
|
|
return (size_t)(mi.mi_end - ptr);
|
|
}
|
|
|
|
// Check if the word at "mip->mi_word" is in the tree.
|
|
// When "mode" is FIND_FOLDWORD check in fold-case word tree.
|
|
// When "mode" is FIND_KEEPWORD check in keep-case word tree.
|
|
// When "mode" is FIND_PREFIX check for word after prefix in fold-case word
|
|
// tree.
|
|
//
|
|
// For a match mip->mi_result is updated.
|
|
static void find_word(matchinf_T *mip, int mode)
|
|
{
|
|
int wlen = 0;
|
|
int flen;
|
|
char_u *ptr;
|
|
slang_T *slang = mip->mi_lp->lp_slang;
|
|
char_u *byts;
|
|
idx_T *idxs;
|
|
|
|
if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) {
|
|
// Check for word with matching case in keep-case tree.
|
|
ptr = mip->mi_word;
|
|
flen = 9999; // no case folding, always enough bytes
|
|
byts = slang->sl_kbyts;
|
|
idxs = slang->sl_kidxs;
|
|
|
|
if (mode == FIND_KEEPCOMPOUND) {
|
|
// Skip over the previously found word(s).
|
|
wlen += mip->mi_compoff;
|
|
}
|
|
} else {
|
|
// Check for case-folded in case-folded tree.
|
|
ptr = mip->mi_fword;
|
|
flen = mip->mi_fwordlen; // available case-folded bytes
|
|
byts = slang->sl_fbyts;
|
|
idxs = slang->sl_fidxs;
|
|
|
|
if (mode == FIND_PREFIX) {
|
|
// Skip over the prefix.
|
|
wlen = mip->mi_prefixlen;
|
|
flen -= mip->mi_prefixlen;
|
|
} else if (mode == FIND_COMPOUND) {
|
|
// Skip over the previously found word(s).
|
|
wlen = mip->mi_compoff;
|
|
flen -= mip->mi_compoff;
|
|
}
|
|
}
|
|
|
|
if (byts == NULL) {
|
|
return; // array is empty
|
|
}
|
|
idx_T arridx = 0;
|
|
int endlen[MAXWLEN]; // length at possible word endings
|
|
idx_T endidx[MAXWLEN]; // possible word endings
|
|
int endidxcnt = 0;
|
|
int c;
|
|
|
|
// Repeat advancing in the tree until:
|
|
// - there is a byte that doesn't match,
|
|
// - we reach the end of the tree,
|
|
// - or we reach the end of the line.
|
|
for (;;) {
|
|
if (flen <= 0 && *mip->mi_fend != NUL) {
|
|
flen = fold_more(mip);
|
|
}
|
|
|
|
int len = byts[arridx++];
|
|
|
|
// If the first possible byte is a zero the word could end here.
|
|
// Remember this index, we first check for the longest word.
|
|
if (byts[arridx] == 0) {
|
|
if (endidxcnt == MAXWLEN) {
|
|
// Must be a corrupted spell file.
|
|
emsg(_(e_format));
|
|
return;
|
|
}
|
|
endlen[endidxcnt] = wlen;
|
|
endidx[endidxcnt++] = arridx++;
|
|
len--;
|
|
|
|
// Skip over the zeros, there can be several flag/region
|
|
// combinations.
|
|
while (len > 0 && byts[arridx] == 0) {
|
|
arridx++;
|
|
len--;
|
|
}
|
|
if (len == 0) {
|
|
break; // no children, word must end here
|
|
}
|
|
}
|
|
|
|
// Stop looking at end of the line.
|
|
if (ptr[wlen] == NUL) {
|
|
break;
|
|
}
|
|
|
|
// Perform a binary search in the list of accepted bytes.
|
|
c = ptr[wlen];
|
|
if (c == TAB) { // <Tab> is handled like <Space>
|
|
c = ' ';
|
|
}
|
|
idx_T lo = arridx;
|
|
idx_T hi = arridx + len - 1;
|
|
while (lo < hi) {
|
|
idx_T m = (lo + hi) / 2;
|
|
if (byts[m] > c) {
|
|
hi = m - 1;
|
|
} else if (byts[m] < c) {
|
|
lo = m + 1;
|
|
} else {
|
|
lo = hi = m;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Stop if there is no matching byte.
|
|
if (hi < lo || byts[lo] != c) {
|
|
break;
|
|
}
|
|
|
|
// Continue at the child (if there is one).
|
|
arridx = idxs[lo];
|
|
wlen++;
|
|
flen--;
|
|
|
|
// One space in the good word may stand for several spaces in the
|
|
// checked word.
|
|
if (c == ' ') {
|
|
for (;;) {
|
|
if (flen <= 0 && *mip->mi_fend != NUL) {
|
|
flen = fold_more(mip);
|
|
}
|
|
if (ptr[wlen] != ' ' && ptr[wlen] != TAB) {
|
|
break;
|
|
}
|
|
wlen++;
|
|
flen--;
|
|
}
|
|
}
|
|
}
|
|
|
|
char_u *p;
|
|
bool word_ends;
|
|
|
|
// Verify that one of the possible endings is valid. Try the longest
|
|
// first.
|
|
while (endidxcnt > 0) {
|
|
endidxcnt--;
|
|
arridx = endidx[endidxcnt];
|
|
wlen = endlen[endidxcnt];
|
|
|
|
if (utf_head_off((char *)ptr, (char *)ptr + wlen) > 0) {
|
|
continue; // not at first byte of character
|
|
}
|
|
if (spell_iswordp(ptr + wlen, mip->mi_win)) {
|
|
if (slang->sl_compprog == NULL && !slang->sl_nobreak) {
|
|
continue; // next char is a word character
|
|
}
|
|
word_ends = false;
|
|
} else {
|
|
word_ends = true;
|
|
}
|
|
// The prefix flag is before compound flags. Once a valid prefix flag
|
|
// has been found we try compound flags.
|
|
bool prefix_found = false;
|
|
|
|
if (mode != FIND_KEEPWORD) {
|
|
// Compute byte length in original word, length may change
|
|
// when folding case. This can be slow, take a shortcut when the
|
|
// case-folded word is equal to the keep-case word.
|
|
p = mip->mi_word;
|
|
if (STRNCMP(ptr, p, wlen) != 0) {
|
|
for (char_u *s = ptr; s < ptr + wlen; MB_PTR_ADV(s)) {
|
|
MB_PTR_ADV(p);
|
|
}
|
|
wlen = (int)(p - mip->mi_word);
|
|
}
|
|
}
|
|
|
|
// Check flags and region. For FIND_PREFIX check the condition and
|
|
// prefix ID.
|
|
// Repeat this if there are more flags/region alternatives until there
|
|
// is a match.
|
|
for (int len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; len--, arridx++) {
|
|
uint32_t flags = (uint32_t)idxs[arridx];
|
|
|
|
// For the fold-case tree check that the case of the checked word
|
|
// matches with what the word in the tree requires.
|
|
// For keep-case tree the case is always right. For prefixes we
|
|
// don't bother to check.
|
|
if (mode == FIND_FOLDWORD) {
|
|
if (mip->mi_cend != mip->mi_word + wlen) {
|
|
// mi_capflags was set for a different word length, need
|
|
// to do it again.
|
|
mip->mi_cend = mip->mi_word + wlen;
|
|
mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
|
|
}
|
|
|
|
if (mip->mi_capflags == WF_KEEPCAP
|
|
|| !spell_valid_case(mip->mi_capflags, (int)flags)) {
|
|
continue;
|
|
}
|
|
} else if (mode == FIND_PREFIX && !prefix_found) {
|
|
// When mode is FIND_PREFIX the word must support the prefix:
|
|
// check the prefix ID and the condition. Do that for the list at
|
|
// mip->mi_prefarridx that find_prefix() filled.
|
|
c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
|
|
(int)flags,
|
|
mip->mi_word + mip->mi_cprefixlen, slang,
|
|
false);
|
|
if (c == 0) {
|
|
continue;
|
|
}
|
|
|
|
// Use the WF_RARE flag for a rare prefix.
|
|
if (c & WF_RAREPFX) {
|
|
flags |= WF_RARE;
|
|
}
|
|
prefix_found = true;
|
|
}
|
|
|
|
if (slang->sl_nobreak) {
|
|
if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
|
|
&& (flags & WF_BANNED) == 0) {
|
|
// NOBREAK: found a valid following word. That's all we
|
|
// need to know, so return.
|
|
mip->mi_result = SP_OK;
|
|
break;
|
|
}
|
|
} else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
|
|
|| !word_ends)) {
|
|
// If there is no compound flag or the word is shorter than
|
|
// COMPOUNDMIN reject it quickly.
|
|
// Makes you wonder why someone puts a compound flag on a word
|
|
// that's too short... Myspell compatibility requires this
|
|
// anyway.
|
|
if (((unsigned)flags >> 24) == 0
|
|
|| wlen - mip->mi_compoff < slang->sl_compminlen) {
|
|
continue;
|
|
}
|
|
// For multi-byte chars check character length against
|
|
// COMPOUNDMIN.
|
|
if (slang->sl_compminlen > 0
|
|
&& mb_charlen_len(mip->mi_word + mip->mi_compoff,
|
|
wlen - mip->mi_compoff) < slang->sl_compminlen) {
|
|
continue;
|
|
}
|
|
|
|
// Limit the number of compound words to COMPOUNDWORDMAX if no
|
|
// maximum for syllables is specified.
|
|
if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
|
|
> slang->sl_compmax
|
|
&& slang->sl_compsylmax == MAXWLEN) {
|
|
continue;
|
|
}
|
|
|
|
// Don't allow compounding on a side where an affix was added,
|
|
// unless COMPOUNDPERMITFLAG was used.
|
|
if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF)) {
|
|
continue;
|
|
}
|
|
if (!word_ends && (flags & WF_NOCOMPAFT)) {
|
|
continue;
|
|
}
|
|
|
|
// Quickly check if compounding is possible with this flag.
|
|
if (!byte_in_str(mip->mi_complen == 0 ? slang->sl_compstartflags : slang->sl_compallflags,
|
|
(int)((unsigned)flags >> 24))) {
|
|
continue;
|
|
}
|
|
|
|
// If there is a match with a CHECKCOMPOUNDPATTERN rule
|
|
// discard the compound word.
|
|
if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat)) {
|
|
continue;
|
|
}
|
|
|
|
if (mode == FIND_COMPOUND) {
|
|
int capflags;
|
|
|
|
// Need to check the caps type of the appended compound
|
|
// word.
|
|
if (STRNCMP(ptr, mip->mi_word, mip->mi_compoff) != 0) {
|
|
// case folding may have changed the length
|
|
p = mip->mi_word;
|
|
for (char_u *s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s)) {
|
|
MB_PTR_ADV(p);
|
|
}
|
|
} else {
|
|
p = mip->mi_word + mip->mi_compoff;
|
|
}
|
|
capflags = captype(p, mip->mi_word + wlen);
|
|
if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
|
|
&& (flags & WF_FIXCAP) != 0)) {
|
|
continue;
|
|
}
|
|
|
|
if (capflags != WF_ALLCAP) {
|
|
// When the character before the word is a word
|
|
// character we do not accept a Onecap word. We do
|
|
// accept a no-caps word, even when the dictionary
|
|
// word specifies ONECAP.
|
|
MB_PTR_BACK(mip->mi_word, p);
|
|
if (spell_iswordp_nmw(p, mip->mi_win)
|
|
? capflags == WF_ONECAP
|
|
: (flags & WF_ONECAP) != 0
|
|
&& capflags != WF_ONECAP) {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If the word ends the sequence of compound flags of the
|
|
// words must match with one of the COMPOUNDRULE items and
|
|
// the number of syllables must not be too large.
|
|
mip->mi_compflags[mip->mi_complen] = (char_u)((unsigned)flags >> 24);
|
|
mip->mi_compflags[mip->mi_complen + 1] = NUL;
|
|
if (word_ends) {
|
|
char_u fword[MAXWLEN] = { 0 };
|
|
|
|
if (slang->sl_compsylmax < MAXWLEN) {
|
|
// "fword" is only needed for checking syllables.
|
|
if (ptr == mip->mi_word) {
|
|
(void)spell_casefold(mip->mi_win, ptr, wlen, fword, MAXWLEN);
|
|
} else {
|
|
STRLCPY(fword, ptr, endlen[endidxcnt] + 1);
|
|
}
|
|
}
|
|
if (!can_compound(slang, fword, mip->mi_compflags)) {
|
|
continue;
|
|
}
|
|
} else if (slang->sl_comprules != NULL
|
|
&& !match_compoundrule(slang, mip->mi_compflags)) {
|
|
// The compound flags collected so far do not match any
|
|
// COMPOUNDRULE, discard the compounded word.
|
|
continue;
|
|
}
|
|
} else if (flags & WF_NEEDCOMP) {
|
|
// skip if word is only valid in a compound
|
|
continue;
|
|
}
|
|
|
|
int nobreak_result = SP_OK;
|
|
|
|
if (!word_ends) {
|
|
int save_result = mip->mi_result;
|
|
char_u *save_end = mip->mi_end;
|
|
langp_T *save_lp = mip->mi_lp;
|
|
|
|
// Check that a valid word follows. If there is one and we
|
|
// are compounding, it will set "mi_result", thus we are
|
|
// always finished here. For NOBREAK we only check that a
|
|
// valid word follows.
|
|
// Recursive!
|
|
if (slang->sl_nobreak) {
|
|
mip->mi_result = SP_BAD;
|
|
}
|
|
|
|
// Find following word in case-folded tree.
|
|
mip->mi_compoff = endlen[endidxcnt];
|
|
if (mode == FIND_KEEPWORD) {
|
|
// Compute byte length in case-folded word from "wlen":
|
|
// byte length in keep-case word. Length may change when
|
|
// folding case. This can be slow, take a shortcut when
|
|
// the case-folded word is equal to the keep-case word.
|
|
p = mip->mi_fword;
|
|
if (STRNCMP(ptr, p, wlen) != 0) {
|
|
for (char_u *s = ptr; s < ptr + wlen; MB_PTR_ADV(s)) {
|
|
MB_PTR_ADV(p);
|
|
}
|
|
mip->mi_compoff = (int)(p - mip->mi_fword);
|
|
}
|
|
}
|
|
#if 0
|
|
c = mip->mi_compoff;
|
|
#endif
|
|
mip->mi_complen++;
|
|
if (flags & WF_COMPROOT) {
|
|
mip->mi_compextra++;
|
|
}
|
|
|
|
// For NOBREAK we need to try all NOBREAK languages, at least
|
|
// to find the ".add" file(s).
|
|
for (int lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; lpi++) {
|
|
if (slang->sl_nobreak) {
|
|
mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi);
|
|
if (mip->mi_lp->lp_slang->sl_fidxs == NULL
|
|
|| !mip->mi_lp->lp_slang->sl_nobreak) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
find_word(mip, FIND_COMPOUND);
|
|
|
|
// When NOBREAK any word that matches is OK. Otherwise we
|
|
// need to find the longest match, thus try with keep-case
|
|
// and prefix too.
|
|
if (!slang->sl_nobreak || mip->mi_result == SP_BAD) {
|
|
// Find following word in keep-case tree.
|
|
mip->mi_compoff = wlen;
|
|
find_word(mip, FIND_KEEPCOMPOUND);
|
|
|
|
#if 0 // Disabled, a prefix must not appear halfway through a compound
|
|
// word, unless the COMPOUNDPERMITFLAG is used, in which case it
|
|
// can't be a postponed prefix.
|
|
if (!slang->sl_nobreak || mip->mi_result == SP_BAD) {
|
|
// Check for following word with prefix.
|
|
mip->mi_compoff = c;
|
|
find_prefix(mip, FIND_COMPOUND);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
if (!slang->sl_nobreak) {
|
|
break;
|
|
}
|
|
}
|
|
mip->mi_complen--;
|
|
if (flags & WF_COMPROOT) {
|
|
mip->mi_compextra--;
|
|
}
|
|
mip->mi_lp = save_lp;
|
|
|
|
if (slang->sl_nobreak) {
|
|
nobreak_result = mip->mi_result;
|
|
mip->mi_result = save_result;
|
|
mip->mi_end = save_end;
|
|
} else {
|
|
if (mip->mi_result == SP_OK) {
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
|
|
int res = SP_BAD;
|
|
if (flags & WF_BANNED) {
|
|
res = SP_BANNED;
|
|
} else if (flags & WF_REGION) {
|
|
// Check region.
|
|
if (((unsigned)mip->mi_lp->lp_region & (flags >> 16)) != 0) {
|
|
res = SP_OK;
|
|
} else {
|
|
res = SP_LOCAL;
|
|
}
|
|
} else if (flags & WF_RARE) {
|
|
res = SP_RARE;
|
|
} else {
|
|
res = SP_OK;
|
|
}
|
|
|
|
// Always use the longest match and the best result. For NOBREAK
|
|
// we separately keep the longest match without a following good
|
|
// word as a fall-back.
|
|
if (nobreak_result == SP_BAD) {
|
|
if (mip->mi_result2 > res) {
|
|
mip->mi_result2 = res;
|
|
mip->mi_end2 = mip->mi_word + wlen;
|
|
} else if (mip->mi_result2 == res
|
|
&& mip->mi_end2 < mip->mi_word + wlen) {
|
|
mip->mi_end2 = mip->mi_word + wlen;
|
|
}
|
|
} else if (mip->mi_result > res) {
|
|
mip->mi_result = res;
|
|
mip->mi_end = mip->mi_word + wlen;
|
|
} else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) {
|
|
mip->mi_end = mip->mi_word + wlen;
|
|
}
|
|
|
|
if (mip->mi_result == SP_OK) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (mip->mi_result == SP_OK) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns true if there is a match between the word ptr[wlen] and
|
|
/// CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another
|
|
/// word.
|
|
/// A match means that the first part of CHECKCOMPOUNDPATTERN matches at the
|
|
/// end of ptr[wlen] and the second part matches after it.
|
|
///
|
|
/// @param gap &sl_comppat
|
|
bool match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap)
|
|
{
|
|
for (int i = 0; i + 1 < gap->ga_len; i += 2) {
|
|
char *p = ((char **)gap->ga_data)[i + 1];
|
|
if (STRNCMP(ptr + wlen, p, strlen(p)) == 0) {
|
|
// Second part matches at start of following compound word, now
|
|
// check if first part matches at end of previous word.
|
|
p = ((char **)gap->ga_data)[i];
|
|
int len = (int)strlen(p);
|
|
if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Returns true if "flags" is a valid sequence of compound flags and "word"
|
|
// does not have too many syllables.
|
|
bool can_compound(slang_T *slang, const char_u *word, const char_u *flags)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
char_u uflags[MAXWLEN * 2] = { 0 };
|
|
|
|
if (slang->sl_compprog == NULL) {
|
|
return false;
|
|
}
|
|
// Need to convert the single byte flags to utf8 characters.
|
|
char_u *p = uflags;
|
|
for (int i = 0; flags[i] != NUL; i++) {
|
|
p += utf_char2bytes(flags[i], (char *)p);
|
|
}
|
|
*p = NUL;
|
|
p = uflags;
|
|
if (!vim_regexec_prog(&slang->sl_compprog, false, p, 0)) {
|
|
return false;
|
|
}
|
|
|
|
// Count the number of syllables. This may be slow, do it last. If there
|
|
// are too many syllables AND the number of compound words is above
|
|
// COMPOUNDWORDMAX then compounding is not allowed.
|
|
if (slang->sl_compsylmax < MAXWLEN
|
|
&& count_syllables(slang, word) > slang->sl_compsylmax) {
|
|
return (int)STRLEN(flags) < slang->sl_compmax;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Returns true if the compound flags in compflags[] match the start of any
|
|
// compound rule. This is used to stop trying a compound if the flags
|
|
// collected so far can't possibly match any compound rule.
|
|
// Caller must check that slang->sl_comprules is not NULL.
|
|
bool match_compoundrule(slang_T *slang, const char_u *compflags)
|
|
{
|
|
// loop over all the COMPOUNDRULE entries
|
|
for (char_u *p = slang->sl_comprules; *p != NUL; p++) {
|
|
// loop over the flags in the compound word we have made, match
|
|
// them against the current rule entry
|
|
for (int i = 0;; i++) {
|
|
int c = compflags[i];
|
|
if (c == NUL) {
|
|
// found a rule that matches for the flags we have so far
|
|
return true;
|
|
}
|
|
if (*p == '/' || *p == NUL) {
|
|
break; // end of rule, it's too short
|
|
}
|
|
if (*p == '[') {
|
|
bool match = false;
|
|
|
|
// compare against all the flags in []
|
|
p++;
|
|
while (*p != ']' && *p != NUL) {
|
|
if (*p++ == c) {
|
|
match = true;
|
|
}
|
|
}
|
|
if (!match) {
|
|
break; // none matches
|
|
}
|
|
} else if (*p != c) {
|
|
break; // flag of word doesn't match flag in pattern
|
|
}
|
|
p++;
|
|
}
|
|
|
|
// Skip to the next "/", where the next pattern starts.
|
|
p = (char_u *)vim_strchr((char *)p, '/');
|
|
if (p == NULL) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Checked all the rules and none of them match the flags, so there
|
|
// can't possibly be a compound starting with these flags.
|
|
return false;
|
|
}
|
|
|
|
/// Return non-zero if the prefix indicated by "arridx" matches with the prefix
|
|
/// ID in "flags" for the word "word".
|
|
/// The WF_RAREPFX flag is included in the return value for a rare prefix.
|
|
///
|
|
/// @param totprefcnt nr of prefix IDs
|
|
/// @param arridx idx in sl_pidxs[]
|
|
/// @param cond_req only use prefixes with a condition
|
|
int valid_word_prefix(int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang,
|
|
bool cond_req)
|
|
{
|
|
int prefid = (int)((unsigned)flags >> 24);
|
|
for (int prefcnt = totprefcnt - 1; prefcnt >= 0; prefcnt--) {
|
|
int pidx = slang->sl_pidxs[arridx + prefcnt];
|
|
|
|
// Check the prefix ID.
|
|
if (prefid != (pidx & 0xff)) {
|
|
continue;
|
|
}
|
|
|
|
// Check if the prefix doesn't combine and the word already has a
|
|
// suffix.
|
|
if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC)) {
|
|
continue;
|
|
}
|
|
|
|
// Check the condition, if there is one. The condition index is
|
|
// stored in the two bytes above the prefix ID byte.
|
|
regprog_T **rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
|
|
if (*rp != NULL) {
|
|
if (!vim_regexec_prog(rp, false, word, 0)) {
|
|
continue;
|
|
}
|
|
} else if (cond_req) {
|
|
continue;
|
|
}
|
|
|
|
// It's a match! Return the WF_ flags.
|
|
return pidx;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Check if the word at "mip->mi_word" has a matching prefix.
|
|
// If it does, then check the following word.
|
|
//
|
|
// If "mode" is "FIND_COMPOUND" then do the same after another word, find a
|
|
// prefix in a compound word.
|
|
//
|
|
// For a match mip->mi_result is updated.
|
|
static void find_prefix(matchinf_T *mip, int mode)
|
|
{
|
|
idx_T arridx = 0;
|
|
int wlen = 0;
|
|
slang_T *slang = mip->mi_lp->lp_slang;
|
|
|
|
char_u *byts = slang->sl_pbyts;
|
|
if (byts == NULL) {
|
|
return; // array is empty
|
|
}
|
|
// We use the case-folded word here, since prefixes are always
|
|
// case-folded.
|
|
char_u *ptr = mip->mi_fword;
|
|
int flen = mip->mi_fwordlen; // available case-folded bytes
|
|
if (mode == FIND_COMPOUND) {
|
|
// Skip over the previously found word(s).
|
|
ptr += mip->mi_compoff;
|
|
flen -= mip->mi_compoff;
|
|
}
|
|
idx_T *idxs = slang->sl_pidxs;
|
|
|
|
// Repeat advancing in the tree until:
|
|
// - there is a byte that doesn't match,
|
|
// - we reach the end of the tree,
|
|
// - or we reach the end of the line.
|
|
for (;;) {
|
|
if (flen == 0 && *mip->mi_fend != NUL) {
|
|
flen = fold_more(mip);
|
|
}
|
|
|
|
int len = byts[arridx++];
|
|
|
|
// If the first possible byte is a zero the prefix could end here.
|
|
// Check if the following word matches and supports the prefix.
|
|
if (byts[arridx] == 0) {
|
|
// There can be several prefixes with different conditions. We
|
|
// try them all, since we don't know which one will give the
|
|
// longest match. The word is the same each time, pass the list
|
|
// of possible prefixes to find_word().
|
|
mip->mi_prefarridx = arridx;
|
|
mip->mi_prefcnt = len;
|
|
while (len > 0 && byts[arridx] == 0) {
|
|
arridx++;
|
|
len--;
|
|
}
|
|
mip->mi_prefcnt -= len;
|
|
|
|
// Find the word that comes after the prefix.
|
|
mip->mi_prefixlen = wlen;
|
|
if (mode == FIND_COMPOUND) {
|
|
// Skip over the previously found word(s).
|
|
mip->mi_prefixlen += mip->mi_compoff;
|
|
}
|
|
|
|
// Case-folded length may differ from original length.
|
|
mip->mi_cprefixlen = nofold_len(mip->mi_fword, mip->mi_prefixlen,
|
|
mip->mi_word);
|
|
find_word(mip, FIND_PREFIX);
|
|
|
|
if (len == 0) {
|
|
break; // no children, word must end here
|
|
}
|
|
}
|
|
|
|
// Stop looking at end of the line.
|
|
if (ptr[wlen] == NUL) {
|
|
break;
|
|
}
|
|
|
|
// Perform a binary search in the list of accepted bytes.
|
|
int c = ptr[wlen];
|
|
idx_T lo = arridx;
|
|
idx_T hi = arridx + len - 1;
|
|
while (lo < hi) {
|
|
idx_T m = (lo + hi) / 2;
|
|
if (byts[m] > c) {
|
|
hi = m - 1;
|
|
} else if (byts[m] < c) {
|
|
lo = m + 1;
|
|
} else {
|
|
lo = hi = m;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Stop if there is no matching byte.
|
|
if (hi < lo || byts[lo] != c) {
|
|
break;
|
|
}
|
|
|
|
// Continue at the child (if there is one).
|
|
arridx = idxs[lo];
|
|
wlen++;
|
|
flen--;
|
|
}
|
|
}
|
|
|
|
// Need to fold at least one more character. Do until next non-word character
|
|
// for efficiency. Include the non-word character too.
|
|
// Return the length of the folded chars in bytes.
|
|
static int fold_more(matchinf_T *mip)
|
|
{
|
|
char_u *p = mip->mi_fend;
|
|
do {
|
|
MB_PTR_ADV(mip->mi_fend);
|
|
} while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win));
|
|
|
|
// Include the non-word character so that we can check for the word end.
|
|
if (*mip->mi_fend != NUL) {
|
|
MB_PTR_ADV(mip->mi_fend);
|
|
}
|
|
|
|
(void)spell_casefold(mip->mi_win, p, (int)(mip->mi_fend - p),
|
|
mip->mi_fword + mip->mi_fwordlen,
|
|
MAXWLEN - mip->mi_fwordlen);
|
|
int flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
|
|
mip->mi_fwordlen += flen;
|
|
return flen;
|
|
}
|
|
|
|
/// Checks case flags for a word. Returns true, if the word has the requested
|
|
/// case.
|
|
///
|
|
/// @param wordflags Flags for the checked word.
|
|
/// @param treeflags Flags for the word in the spell tree.
|
|
bool spell_valid_case(int wordflags, int treeflags)
|
|
{
|
|
return (wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
|
|
|| ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
|
|
&& ((treeflags & WF_ONECAP) == 0
|
|
|| (wordflags & WF_ONECAP) != 0));
|
|
}
|
|
|
|
// Returns true if spell checking is not enabled.
|
|
bool no_spell_checking(win_T *wp)
|
|
{
|
|
if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL
|
|
|| GA_EMPTY(&wp->w_s->b_langp)) {
|
|
emsg(_(e_no_spell));
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void decor_spell_nav_start(win_T *wp)
|
|
{
|
|
decor_state = (DecorState){ 0 };
|
|
decor_redraw_reset(wp->w_buffer, &decor_state);
|
|
}
|
|
|
|
static bool decor_spell_nav_col(win_T *wp, linenr_T lnum, linenr_T *decor_lnum, int col,
|
|
char **decor_error)
|
|
{
|
|
if (*decor_lnum != lnum) {
|
|
decor_providers_invoke_spell(wp, lnum - 1, col, lnum - 1, -1, decor_error);
|
|
decor_redraw_line(wp->w_buffer, lnum - 1, &decor_state);
|
|
*decor_lnum = lnum;
|
|
}
|
|
decor_redraw_col(wp->w_buffer, col, col, false, &decor_state);
|
|
return decor_state.spell == kTrue;
|
|
}
|
|
|
|
static inline bool can_syn_spell(win_T *wp, linenr_T lnum, int col)
|
|
{
|
|
bool can_spell;
|
|
(void)syn_get_id(wp, lnum, col, false, &can_spell, false);
|
|
return can_spell;
|
|
}
|
|
|
|
/// Moves to the next spell error.
|
|
/// "curline" is false for "[s", "]s", "[S" and "]S".
|
|
/// "curline" is true to find word under/after cursor in the same line.
|
|
/// For Insert mode completion "dir" is BACKWARD and "curline" is true: move
|
|
/// to after badly spelled word before the cursor.
|
|
///
|
|
/// @param dir FORWARD or BACKWARD
|
|
/// @param allwords true for "[s"/"]s", false for "[S"/"]S"
|
|
/// @param attrp return: attributes of bad word or NULL (only when "dir" is FORWARD)
|
|
///
|
|
/// @return 0 if not found, length of the badly spelled word otherwise.
|
|
size_t spell_move_to(win_T *wp, int dir, bool allwords, bool curline, hlf_T *attrp)
|
|
{
|
|
pos_T found_pos;
|
|
size_t found_len = 0;
|
|
hlf_T attr = HLF_COUNT;
|
|
size_t len;
|
|
int has_syntax = syntax_present(wp);
|
|
colnr_T col;
|
|
char *buf = NULL;
|
|
size_t buflen = 0;
|
|
int skip = 0;
|
|
colnr_T capcol = -1;
|
|
bool found_one = false;
|
|
bool wrapped = false;
|
|
|
|
if (no_spell_checking(wp)) {
|
|
return 0;
|
|
}
|
|
|
|
size_t ret = 0;
|
|
|
|
// Start looking for bad word at the start of the line, because we can't
|
|
// start halfway through a word, we don't know where it starts or ends.
|
|
//
|
|
// When searching backwards, we continue in the line to find the last
|
|
// bad word (in the cursor line: before the cursor).
|
|
//
|
|
// We concatenate the start of the next line, so that wrapped words work
|
|
// (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
|
|
// though...
|
|
linenr_T lnum = wp->w_cursor.lnum;
|
|
clearpos(&found_pos);
|
|
|
|
char *decor_error = NULL;
|
|
// Ephemeral extmarks are currently stored in the global decor_state.
|
|
// When looking for spell errors, we need to:
|
|
// - temporarily reset decor_state
|
|
// - run the _on_spell_nav decor callback for each line we look at
|
|
// - detect if any spell marks are present
|
|
// - restore decor_state to the value saved here.
|
|
// TODO(lewis6991): un-globalize decor_state and allow ephemeral marks to be stored into a
|
|
// temporary DecorState.
|
|
DecorState saved_decor_start = decor_state;
|
|
linenr_T decor_lnum = -1;
|
|
decor_spell_nav_start(wp);
|
|
|
|
while (!got_int) {
|
|
char *line = ml_get_buf(wp->w_buffer, lnum, false);
|
|
|
|
len = strlen(line);
|
|
if (buflen < len + MAXWLEN + 2) {
|
|
xfree(buf);
|
|
buflen = len + MAXWLEN + 2;
|
|
buf = xmalloc(buflen);
|
|
}
|
|
assert(buf && buflen >= len + MAXWLEN + 2);
|
|
|
|
// In first line check first word for Capital.
|
|
if (lnum == 1) {
|
|
capcol = 0;
|
|
}
|
|
|
|
// For checking first word with a capital skip white space.
|
|
if (capcol == 0) {
|
|
capcol = (colnr_T)getwhitecols(line);
|
|
} else if (curline && wp == curwin) {
|
|
// For spellbadword(): check if first word needs a capital.
|
|
col = (colnr_T)getwhitecols(line);
|
|
if (check_need_cap(lnum, col)) {
|
|
capcol = col;
|
|
}
|
|
|
|
// Need to get the line again, may have looked at the previous
|
|
// one.
|
|
line = ml_get_buf(wp->w_buffer, lnum, false);
|
|
}
|
|
|
|
// Copy the line into "buf" and append the start of the next line if
|
|
// possible. Note: this ml_get_buf() may make "line" invalid, check
|
|
// for empty line first.
|
|
bool empty_line = *skipwhite((const char *)line) == NUL;
|
|
STRCPY(buf, line);
|
|
if (lnum < wp->w_buffer->b_ml.ml_line_count) {
|
|
spell_cat_line((char_u *)buf + strlen(buf),
|
|
(char_u *)ml_get_buf(wp->w_buffer, lnum + 1, false),
|
|
MAXWLEN);
|
|
}
|
|
char *p = buf + skip;
|
|
char *endp = buf + len;
|
|
while (p < endp) {
|
|
// When searching backward don't search after the cursor. Unless
|
|
// we wrapped around the end of the buffer.
|
|
if (dir == BACKWARD
|
|
&& lnum == wp->w_cursor.lnum
|
|
&& !wrapped
|
|
&& (colnr_T)(p - buf) >= wp->w_cursor.col) {
|
|
break;
|
|
}
|
|
|
|
// start of word
|
|
attr = HLF_COUNT;
|
|
len = spell_check(wp, (char_u *)p, &attr, &capcol, false);
|
|
|
|
if (attr != HLF_COUNT) {
|
|
// We found a bad word. Check the attribute.
|
|
if (allwords || attr == HLF_SPB) {
|
|
// When searching forward only accept a bad word after
|
|
// the cursor.
|
|
if (dir == BACKWARD
|
|
|| lnum != wp->w_cursor.lnum
|
|
|| wrapped
|
|
|| ((colnr_T)(curline
|
|
? p - buf + (ptrdiff_t)len
|
|
: p - buf) > wp->w_cursor.col)) {
|
|
col = (colnr_T)(p - buf);
|
|
|
|
bool can_spell = (!has_syntax && (wp->w_s->b_p_spo_flags & SPO_NPBUFFER) == 0)
|
|
|| decor_spell_nav_col(wp, lnum, &decor_lnum, col, &decor_error)
|
|
|| (has_syntax && can_syn_spell(wp, lnum, col));
|
|
|
|
if (!can_spell) {
|
|
attr = HLF_COUNT;
|
|
}
|
|
|
|
if (can_spell) {
|
|
found_one = true;
|
|
found_pos = (pos_T) {
|
|
.lnum = lnum,
|
|
.col = col,
|
|
.coladd = 0
|
|
};
|
|
if (dir == FORWARD) {
|
|
// No need to search further.
|
|
wp->w_cursor = found_pos;
|
|
if (attrp != NULL) {
|
|
*attrp = attr;
|
|
}
|
|
ret = len;
|
|
goto theend;
|
|
} else if (curline) {
|
|
// Insert mode completion: put cursor after
|
|
// the bad word.
|
|
assert(len <= INT_MAX);
|
|
found_pos.col += (int)len;
|
|
}
|
|
found_len = len;
|
|
}
|
|
} else {
|
|
found_one = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// advance to character after the word
|
|
p += len;
|
|
assert(len <= INT_MAX);
|
|
capcol -= (int)len;
|
|
}
|
|
|
|
if (dir == BACKWARD && found_pos.lnum != 0) {
|
|
// Use the last match in the line (before the cursor).
|
|
wp->w_cursor = found_pos;
|
|
ret = found_len;
|
|
goto theend;
|
|
}
|
|
|
|
if (curline) {
|
|
break; // only check cursor line
|
|
}
|
|
|
|
// If we are back at the starting line and searched it again there
|
|
// is no match, give up.
|
|
if (lnum == wp->w_cursor.lnum && wrapped) {
|
|
break;
|
|
}
|
|
|
|
// Advance to next line.
|
|
if (dir == BACKWARD) {
|
|
if (lnum > 1) {
|
|
lnum--;
|
|
} else if (!p_ws) {
|
|
break; // at first line and 'nowrapscan'
|
|
} else {
|
|
// Wrap around to the end of the buffer. May search the
|
|
// starting line again and accept the last match.
|
|
lnum = wp->w_buffer->b_ml.ml_line_count;
|
|
wrapped = true;
|
|
if (!shortmess(SHM_SEARCH)) {
|
|
give_warning(_(top_bot_msg), true);
|
|
}
|
|
}
|
|
capcol = -1;
|
|
} else {
|
|
if (lnum < wp->w_buffer->b_ml.ml_line_count) {
|
|
lnum++;
|
|
} else if (!p_ws) {
|
|
break; // at first line and 'nowrapscan'
|
|
} else {
|
|
// Wrap around to the start of the buffer. May search the
|
|
// starting line again and accept the first match.
|
|
lnum = 1;
|
|
wrapped = true;
|
|
if (!shortmess(SHM_SEARCH)) {
|
|
give_warning(_(bot_top_msg), true);
|
|
}
|
|
}
|
|
|
|
// If we are back at the starting line and there is no match then
|
|
// give up.
|
|
if (lnum == wp->w_cursor.lnum && !found_one) {
|
|
break;
|
|
}
|
|
|
|
// Skip the characters at the start of the next line that were
|
|
// included in a match crossing line boundaries.
|
|
if (attr == HLF_COUNT) {
|
|
skip = (int)(p - endp);
|
|
} else {
|
|
skip = 0;
|
|
}
|
|
|
|
// Capcol skips over the inserted space.
|
|
capcol--;
|
|
|
|
// But after empty line check first word in next line
|
|
if (empty_line) {
|
|
capcol = 0;
|
|
}
|
|
}
|
|
|
|
line_breakcheck();
|
|
}
|
|
|
|
theend:
|
|
decor_state_free(&decor_state);
|
|
xfree(decor_error);
|
|
decor_state = saved_decor_start;
|
|
xfree(buf);
|
|
return ret;
|
|
}
|
|
|
|
// For spell checking: concatenate the start of the following line "line" into
|
|
// "buf", blanking-out special characters. Copy less than "maxlen" bytes.
|
|
// Keep the blanks at the start of the next line, this is used in win_line()
|
|
// to skip those bytes if the word was OK.
|
|
void spell_cat_line(char_u *buf, char_u *line, int maxlen)
|
|
{
|
|
char_u *p = (char_u *)skipwhite((char *)line);
|
|
while (vim_strchr("*#/\"\t", *p) != NULL) {
|
|
p = (char_u *)skipwhite((char *)p + 1);
|
|
}
|
|
|
|
if (*p != NUL) {
|
|
// Only worth concatenating if there is something else than spaces to
|
|
// concatenate.
|
|
int n = (int)(p - line) + 1;
|
|
if (n < maxlen - 1) {
|
|
memset(buf, ' ', (size_t)n);
|
|
STRLCPY(buf + n, p, maxlen - n);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Load word list(s) for "lang" from Vim spell file(s).
|
|
// "lang" must be the language without the region: e.g., "en".
|
|
static void spell_load_lang(char_u *lang)
|
|
{
|
|
char fname_enc[85];
|
|
int r;
|
|
spelload_T sl;
|
|
|
|
// Copy the language name to pass it to spell_load_cb() as a cookie.
|
|
// It's truncated when an error is detected.
|
|
STRCPY(sl.sl_lang, lang);
|
|
sl.sl_slang = NULL;
|
|
sl.sl_nobreak = false;
|
|
|
|
// Disallow deleting the current buffer. Autocommands can do weird things
|
|
// and cause "lang" to be freed.
|
|
curbuf->b_locked++;
|
|
|
|
// We may retry when no spell file is found for the language, an
|
|
// autocommand may load it then.
|
|
for (int round = 1; round <= 2; round++) {
|
|
// Find the first spell file for "lang" in 'runtimepath' and load it.
|
|
vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
|
|
"spell/%s.%s.spl", lang, spell_enc());
|
|
r = do_in_runtimepath((char *)fname_enc, 0, spell_load_cb, &sl);
|
|
|
|
if (r == FAIL && *sl.sl_lang != NUL) {
|
|
// Try loading the ASCII version.
|
|
vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
|
|
"spell/%s.ascii.spl", lang);
|
|
r = do_in_runtimepath((char *)fname_enc, 0, spell_load_cb, &sl);
|
|
|
|
if (r == FAIL && *sl.sl_lang != NUL && round == 1
|
|
&& apply_autocmds(EVENT_SPELLFILEMISSING, (char *)lang,
|
|
curbuf->b_fname, false, curbuf)) {
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (r == FAIL) {
|
|
if (starting) {
|
|
// Prompt the user at VimEnter if spell files are missing. #3027
|
|
// Plugins aren't loaded yet, so spellfile.vim cannot handle this case.
|
|
char autocmd_buf[512] = { 0 };
|
|
snprintf(autocmd_buf, sizeof(autocmd_buf),
|
|
"autocmd VimEnter * call spellfile#LoadFile('%s')|set spell",
|
|
lang);
|
|
do_cmdline_cmd(autocmd_buf);
|
|
} else {
|
|
smsg(_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
|
|
lang, spell_enc(), lang);
|
|
}
|
|
} else if (sl.sl_slang != NULL) {
|
|
// At least one file was loaded, now load ALL the additions.
|
|
STRCPY(fname_enc + strlen(fname_enc) - 3, "add.spl");
|
|
do_in_runtimepath((char *)fname_enc, DIP_ALL, spell_load_cb, &sl);
|
|
}
|
|
|
|
curbuf->b_locked--;
|
|
}
|
|
|
|
// Return the encoding used for spell checking: Use 'encoding', except that we
|
|
// use "latin1" for "latin9". And limit to 60 characters (just in case).
|
|
char_u *spell_enc(void)
|
|
{
|
|
if (strlen(p_enc) < 60 && strcmp(p_enc, "iso-8859-15") != 0) {
|
|
return (char_u *)p_enc;
|
|
}
|
|
return (char_u *)"latin1";
|
|
}
|
|
|
|
// Get the name of the .spl file for the internal wordlist into
|
|
// "fname[MAXPATHL]".
|
|
static void int_wordlist_spl(char_u *fname)
|
|
{
|
|
vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL,
|
|
int_wordlist, spell_enc());
|
|
}
|
|
|
|
/// Allocate a new slang_T for language "lang". "lang" can be NULL.
|
|
/// Caller must fill "sl_next".
|
|
slang_T *slang_alloc(char *lang)
|
|
FUNC_ATTR_NONNULL_RET
|
|
{
|
|
slang_T *lp = xcalloc(1, sizeof(slang_T));
|
|
|
|
if (lang != NULL) {
|
|
lp->sl_name = xstrdup(lang);
|
|
}
|
|
ga_init(&lp->sl_rep, sizeof(fromto_T), 10);
|
|
ga_init(&lp->sl_repsal, sizeof(fromto_T), 10);
|
|
lp->sl_compmax = MAXWLEN;
|
|
lp->sl_compsylmax = MAXWLEN;
|
|
hash_init(&lp->sl_wordcount);
|
|
|
|
return lp;
|
|
}
|
|
|
|
// Free the contents of an slang_T and the structure itself.
|
|
void slang_free(slang_T *lp)
|
|
{
|
|
xfree(lp->sl_name);
|
|
xfree(lp->sl_fname);
|
|
slang_clear(lp);
|
|
xfree(lp);
|
|
}
|
|
|
|
/// Frees a salitem_T
|
|
static void free_salitem(salitem_T *smp)
|
|
{
|
|
xfree(smp->sm_lead);
|
|
// Don't free sm_oneof and sm_rules, they point into sm_lead.
|
|
xfree(smp->sm_to);
|
|
xfree(smp->sm_lead_w);
|
|
xfree(smp->sm_oneof_w);
|
|
xfree(smp->sm_to_w);
|
|
}
|
|
|
|
/// Frees a fromto_T
|
|
static void free_fromto(fromto_T *ftp)
|
|
{
|
|
xfree(ftp->ft_from);
|
|
xfree(ftp->ft_to);
|
|
}
|
|
|
|
// Clear an slang_T so that the file can be reloaded.
|
|
void slang_clear(slang_T *lp)
|
|
{
|
|
garray_T *gap;
|
|
|
|
XFREE_CLEAR(lp->sl_fbyts);
|
|
XFREE_CLEAR(lp->sl_kbyts);
|
|
XFREE_CLEAR(lp->sl_pbyts);
|
|
|
|
XFREE_CLEAR(lp->sl_fidxs);
|
|
XFREE_CLEAR(lp->sl_kidxs);
|
|
XFREE_CLEAR(lp->sl_pidxs);
|
|
|
|
GA_DEEP_CLEAR(&lp->sl_rep, fromto_T, free_fromto);
|
|
GA_DEEP_CLEAR(&lp->sl_repsal, fromto_T, free_fromto);
|
|
|
|
gap = &lp->sl_sal;
|
|
if (lp->sl_sofo) {
|
|
// "ga_len" is set to 1 without adding an item for latin1
|
|
GA_DEEP_CLEAR_PTR(gap);
|
|
} else {
|
|
// SAL items: free salitem_T items
|
|
GA_DEEP_CLEAR(gap, salitem_T, free_salitem);
|
|
}
|
|
|
|
for (int i = 0; i < lp->sl_prefixcnt; i++) {
|
|
vim_regfree(lp->sl_prefprog[i]);
|
|
}
|
|
lp->sl_prefixcnt = 0;
|
|
XFREE_CLEAR(lp->sl_prefprog);
|
|
XFREE_CLEAR(lp->sl_info);
|
|
XFREE_CLEAR(lp->sl_midword);
|
|
|
|
vim_regfree(lp->sl_compprog);
|
|
lp->sl_compprog = NULL;
|
|
XFREE_CLEAR(lp->sl_comprules);
|
|
XFREE_CLEAR(lp->sl_compstartflags);
|
|
XFREE_CLEAR(lp->sl_compallflags);
|
|
|
|
XFREE_CLEAR(lp->sl_syllable);
|
|
ga_clear(&lp->sl_syl_items);
|
|
|
|
ga_clear_strings(&lp->sl_comppat);
|
|
|
|
hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
|
|
hash_init(&lp->sl_wordcount);
|
|
|
|
hash_clear_all(&lp->sl_map_hash, 0);
|
|
|
|
// Clear info from .sug file.
|
|
slang_clear_sug(lp);
|
|
|
|
lp->sl_compmax = MAXWLEN;
|
|
lp->sl_compminlen = 0;
|
|
lp->sl_compsylmax = MAXWLEN;
|
|
lp->sl_regions[0] = NUL;
|
|
}
|
|
|
|
// Clear the info from the .sug file in "lp".
|
|
void slang_clear_sug(slang_T *lp)
|
|
{
|
|
XFREE_CLEAR(lp->sl_sbyts);
|
|
XFREE_CLEAR(lp->sl_sidxs);
|
|
close_spellbuf(lp->sl_sugbuf);
|
|
lp->sl_sugbuf = NULL;
|
|
lp->sl_sugloaded = false;
|
|
lp->sl_sugtime = 0;
|
|
}
|
|
|
|
// Load one spell file and store the info into a slang_T.
|
|
// Invoked through do_in_runtimepath().
|
|
static void spell_load_cb(char *fname, void *cookie)
|
|
{
|
|
spelload_T *slp = (spelload_T *)cookie;
|
|
slang_T *slang = spell_load_file(fname, (char *)slp->sl_lang, NULL, false);
|
|
if (slang != NULL) {
|
|
// When a previously loaded file has NOBREAK also use it for the
|
|
// ".add" files.
|
|
if (slp->sl_nobreak && slang->sl_add) {
|
|
slang->sl_nobreak = true;
|
|
} else if (slang->sl_nobreak) {
|
|
slp->sl_nobreak = true;
|
|
}
|
|
|
|
slp->sl_slang = slang;
|
|
}
|
|
}
|
|
|
|
/// Add a word to the hashtable of common words.
|
|
/// If it's already there then the counter is increased.
|
|
///
|
|
/// @param[in] lp
|
|
/// @param[in] word added to common words hashtable
|
|
/// @param[in] len length of word or -1 for NUL terminated
|
|
/// @param[in] count 1 to count once, 10 to init
|
|
void count_common_word(slang_T *lp, char *word, int len, uint8_t count)
|
|
{
|
|
char buf[MAXWLEN];
|
|
char *p;
|
|
|
|
if (len == -1) {
|
|
p = word;
|
|
} else if (len >= MAXWLEN) {
|
|
return;
|
|
} else {
|
|
STRLCPY(buf, word, len + 1);
|
|
p = buf;
|
|
}
|
|
|
|
wordcount_T *wc;
|
|
hash_T hash = hash_hash((char_u *)p);
|
|
const size_t p_len = strlen(p);
|
|
hashitem_T *hi = hash_lookup(&lp->sl_wordcount, (const char *)p, p_len, hash);
|
|
if (HASHITEM_EMPTY(hi)) {
|
|
wc = xmalloc(sizeof(wordcount_T) + p_len);
|
|
memcpy(wc->wc_word, p, p_len + 1);
|
|
wc->wc_count = count;
|
|
hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
|
|
} else {
|
|
wc = HI2WC(hi);
|
|
wc->wc_count = (uint16_t)(wc->wc_count + count);
|
|
if (wc->wc_count < count) { // check for overflow
|
|
wc->wc_count = MAXWORDCOUNT;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Returns true if byte "n" appears in "str".
|
|
// Like strchr() but independent of locale.
|
|
bool byte_in_str(char_u *str, int n)
|
|
{
|
|
for (char_u *p = str; *p != NUL; p++) {
|
|
if (*p == n) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Truncate "slang->sl_syllable" at the first slash and put the following items
|
|
// in "slang->sl_syl_items".
|
|
int init_syl_tab(slang_T *slang)
|
|
{
|
|
ga_init(&slang->sl_syl_items, sizeof(syl_item_T), 4);
|
|
char *p = vim_strchr((char *)slang->sl_syllable, '/');
|
|
while (p != NULL) {
|
|
*p++ = NUL;
|
|
if (*p == NUL) { // trailing slash
|
|
break;
|
|
}
|
|
char *s = p;
|
|
p = vim_strchr(p, '/');
|
|
int l;
|
|
if (p == NULL) {
|
|
l = (int)strlen(s);
|
|
} else {
|
|
l = (int)(p - s);
|
|
}
|
|
if (l >= SY_MAXLEN) {
|
|
return SP_FORMERROR;
|
|
}
|
|
|
|
syl_item_T *syl = GA_APPEND_VIA_PTR(syl_item_T, &slang->sl_syl_items);
|
|
STRLCPY(syl->sy_chars, s, l + 1);
|
|
syl->sy_len = l;
|
|
}
|
|
return OK;
|
|
}
|
|
|
|
// Count the number of syllables in "word".
|
|
// When "word" contains spaces the syllables after the last space are counted.
|
|
// Returns zero if syllables are not defines.
|
|
static int count_syllables(slang_T *slang, const char_u *word)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
int cnt = 0;
|
|
bool skip = false;
|
|
int len;
|
|
|
|
if (slang->sl_syllable == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
for (const char_u *p = word; *p != NUL; p += len) {
|
|
// When running into a space reset counter.
|
|
if (*p == ' ') {
|
|
len = 1;
|
|
cnt = 0;
|
|
continue;
|
|
}
|
|
|
|
// Find longest match of syllable items.
|
|
len = 0;
|
|
for (int i = 0; i < slang->sl_syl_items.ga_len; i++) {
|
|
syl_item_T *syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
|
|
if (syl->sy_len > len
|
|
&& STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) {
|
|
len = syl->sy_len;
|
|
}
|
|
}
|
|
if (len != 0) { // found a match, count syllable
|
|
cnt++;
|
|
skip = false;
|
|
} else {
|
|
// No recognized syllable item, at least a syllable char then?
|
|
int c = utf_ptr2char((char *)p);
|
|
len = utfc_ptr2len((char *)p);
|
|
if (vim_strchr((char *)slang->sl_syllable, c) == NULL) {
|
|
skip = false; // No, search for next syllable
|
|
} else if (!skip) {
|
|
cnt++; // Yes, count it
|
|
skip = true; // don't count following syllable chars
|
|
}
|
|
}
|
|
}
|
|
return cnt;
|
|
}
|
|
|
|
/// Parse 'spelllang' and set w_s->b_langp accordingly.
|
|
/// @return NULL if it's OK, an untranslated error message otherwise.
|
|
char *did_set_spelllang(win_T *wp)
|
|
{
|
|
garray_T ga;
|
|
char *splp;
|
|
char *region;
|
|
char region_cp[3];
|
|
bool filename;
|
|
int region_mask;
|
|
slang_T *slang;
|
|
int c;
|
|
char lang[MAXWLEN + 1];
|
|
char spf_name[MAXPATHL];
|
|
int len;
|
|
char *p;
|
|
int round;
|
|
char *spf;
|
|
char *use_region = NULL;
|
|
bool dont_use_region = false;
|
|
bool nobreak = false;
|
|
langp_T *lp, *lp2;
|
|
static bool recursive = false;
|
|
char *ret_msg = NULL;
|
|
char *spl_copy;
|
|
|
|
bufref_T bufref;
|
|
set_bufref(&bufref, wp->w_buffer);
|
|
|
|
// We don't want to do this recursively. May happen when a language is
|
|
// not available and the SpellFileMissing autocommand opens a new buffer
|
|
// in which 'spell' is set.
|
|
if (recursive) {
|
|
return NULL;
|
|
}
|
|
recursive = true;
|
|
|
|
ga_init(&ga, sizeof(langp_T), 2);
|
|
clear_midword(wp);
|
|
|
|
// Make a copy of 'spelllang', the SpellFileMissing autocommands may change
|
|
// it under our fingers.
|
|
spl_copy = xstrdup(wp->w_s->b_p_spl);
|
|
|
|
wp->w_s->b_cjk = 0;
|
|
|
|
// Loop over comma separated language names.
|
|
for (splp = spl_copy; *splp != NUL;) {
|
|
// Get one language name.
|
|
copy_option_part(&splp, (char *)lang, MAXWLEN, ",");
|
|
region = NULL;
|
|
len = (int)strlen(lang);
|
|
|
|
if (!valid_spelllang((char *)lang)) {
|
|
continue;
|
|
}
|
|
|
|
if (strcmp(lang, "cjk") == 0) {
|
|
wp->w_s->b_cjk = 1;
|
|
continue;
|
|
}
|
|
|
|
// If the name ends in ".spl" use it as the name of the spell file.
|
|
// If there is a region name let "region" point to it and remove it
|
|
// from the name.
|
|
if (len > 4 && path_fnamecmp(lang + len - 4, ".spl") == 0) {
|
|
filename = true;
|
|
|
|
// Locate a region and remove it from the file name.
|
|
p = vim_strchr(path_tail((char *)lang), '_');
|
|
if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
|
|
&& !ASCII_ISALPHA(p[3])) {
|
|
STRLCPY(region_cp, p + 1, 3);
|
|
memmove(p, p + 3, (size_t)(len - (p - lang) - 2));
|
|
region = region_cp;
|
|
} else {
|
|
dont_use_region = true;
|
|
}
|
|
|
|
// Check if we loaded this language before.
|
|
for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
|
|
if (path_full_compare((char *)lang, slang->sl_fname, false, true)
|
|
== kEqualFiles) {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
filename = false;
|
|
if (len > 3 && lang[len - 3] == '_') {
|
|
region = lang + len - 2;
|
|
lang[len - 3] = NUL;
|
|
} else {
|
|
dont_use_region = true;
|
|
}
|
|
|
|
// Check if we loaded this language before.
|
|
for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
|
|
if (STRICMP(lang, slang->sl_name) == 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (region != NULL) {
|
|
// If the region differs from what was used before then don't
|
|
// use it for 'spellfile'.
|
|
if (use_region != NULL && strcmp(region, use_region) != 0) {
|
|
dont_use_region = true;
|
|
}
|
|
use_region = region;
|
|
}
|
|
|
|
// If not found try loading the language now.
|
|
if (slang == NULL) {
|
|
if (filename) {
|
|
(void)spell_load_file((char *)lang, (char *)lang, NULL, false);
|
|
} else {
|
|
spell_load_lang((char_u *)lang);
|
|
// SpellFileMissing autocommands may do anything, including
|
|
// destroying the buffer we are using or closing the window.
|
|
if (!bufref_valid(&bufref) || !win_valid_any_tab(wp)) {
|
|
ret_msg = N_("E797: SpellFileMissing autocommand deleted buffer");
|
|
goto theend;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Loop over the languages, there can be several files for "lang".
|
|
for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
|
|
if (filename
|
|
? path_full_compare((char *)lang, slang->sl_fname, false, true) == kEqualFiles
|
|
: STRICMP(lang, slang->sl_name) == 0) {
|
|
region_mask = REGION_ALL;
|
|
if (!filename && region != NULL) {
|
|
// find region in sl_regions
|
|
c = find_region(slang->sl_regions, (char_u *)region);
|
|
if (c == REGION_ALL) {
|
|
if (slang->sl_add) {
|
|
if (*slang->sl_regions != NUL) {
|
|
// This addition file is for other regions.
|
|
region_mask = 0;
|
|
}
|
|
} else {
|
|
// This is probably an error. Give a warning and
|
|
// accept the words anyway.
|
|
smsg(_("Warning: region %s not supported"),
|
|
region);
|
|
}
|
|
} else {
|
|
region_mask = 1 << c;
|
|
}
|
|
}
|
|
|
|
if (region_mask != 0) {
|
|
langp_T *p_ = GA_APPEND_VIA_PTR(langp_T, &ga);
|
|
p_->lp_slang = slang;
|
|
p_->lp_region = region_mask;
|
|
|
|
use_midword(slang, wp);
|
|
if (slang->sl_nobreak) {
|
|
nobreak = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// round 0: load int_wordlist, if possible.
|
|
// round 1: load first name in 'spellfile'.
|
|
// round 2: load second name in 'spellfile.
|
|
// etc.
|
|
spf = curwin->w_s->b_p_spf;
|
|
for (round = 0; round == 0 || *spf != NUL; round++) {
|
|
if (round == 0) {
|
|
// Internal wordlist, if there is one.
|
|
if (int_wordlist == NULL) {
|
|
continue;
|
|
}
|
|
int_wordlist_spl((char_u *)spf_name);
|
|
} else {
|
|
// One entry in 'spellfile'.
|
|
copy_option_part(&spf, (char *)spf_name, MAXPATHL - 5, ",");
|
|
STRCAT(spf_name, ".spl");
|
|
|
|
// If it was already found above then skip it.
|
|
for (c = 0; c < ga.ga_len; c++) {
|
|
p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
|
|
if (p != NULL
|
|
&& path_full_compare((char *)spf_name, p, false, true) == kEqualFiles) {
|
|
break;
|
|
}
|
|
}
|
|
if (c < ga.ga_len) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Check if it was loaded already.
|
|
for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
|
|
if (path_full_compare((char *)spf_name, slang->sl_fname, false, true)
|
|
== kEqualFiles) {
|
|
break;
|
|
}
|
|
}
|
|
if (slang == NULL) {
|
|
// Not loaded, try loading it now. The language name includes the
|
|
// region name, the region is ignored otherwise. for int_wordlist
|
|
// use an arbitrary name.
|
|
if (round == 0) {
|
|
STRCPY(lang, "internal wordlist");
|
|
} else {
|
|
STRLCPY(lang, path_tail((char *)spf_name), MAXWLEN + 1);
|
|
p = vim_strchr((char *)lang, '.');
|
|
if (p != NULL) {
|
|
*p = NUL; // truncate at ".encoding.add"
|
|
}
|
|
}
|
|
slang = spell_load_file((char *)spf_name, (char *)lang, NULL, true);
|
|
|
|
// If one of the languages has NOBREAK we assume the addition
|
|
// files also have this.
|
|
if (slang != NULL && nobreak) {
|
|
slang->sl_nobreak = true;
|
|
}
|
|
}
|
|
if (slang != NULL) {
|
|
region_mask = REGION_ALL;
|
|
if (use_region != NULL && !dont_use_region) {
|
|
// find region in sl_regions
|
|
c = find_region(slang->sl_regions, (char_u *)use_region);
|
|
if (c != REGION_ALL) {
|
|
region_mask = 1 << c;
|
|
} else if (*slang->sl_regions != NUL) {
|
|
// This spell file is for other regions.
|
|
region_mask = 0;
|
|
}
|
|
}
|
|
|
|
if (region_mask != 0) {
|
|
langp_T *p_ = GA_APPEND_VIA_PTR(langp_T, &ga);
|
|
p_->lp_slang = slang;
|
|
p_->lp_sallang = NULL;
|
|
p_->lp_replang = NULL;
|
|
p_->lp_region = region_mask;
|
|
|
|
use_midword(slang, wp);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Everything is fine, store the new b_langp value.
|
|
ga_clear(&wp->w_s->b_langp);
|
|
wp->w_s->b_langp = ga;
|
|
|
|
// For each language figure out what language to use for sound folding and
|
|
// REP items. If the language doesn't support it itself use another one
|
|
// with the same name. E.g. for "en-math" use "en".
|
|
for (int i = 0; i < ga.ga_len; i++) {
|
|
lp = LANGP_ENTRY(ga, i);
|
|
|
|
// sound folding
|
|
if (!GA_EMPTY(&lp->lp_slang->sl_sal)) {
|
|
// language does sound folding itself
|
|
lp->lp_sallang = lp->lp_slang;
|
|
} else {
|
|
// find first similar language that does sound folding
|
|
for (int j = 0; j < ga.ga_len; j++) {
|
|
lp2 = LANGP_ENTRY(ga, j);
|
|
if (!GA_EMPTY(&lp2->lp_slang->sl_sal)
|
|
&& STRNCMP(lp->lp_slang->sl_name,
|
|
lp2->lp_slang->sl_name, 2) == 0) {
|
|
lp->lp_sallang = lp2->lp_slang;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// REP items
|
|
if (!GA_EMPTY(&lp->lp_slang->sl_rep)) {
|
|
// language has REP items itself
|
|
lp->lp_replang = lp->lp_slang;
|
|
} else {
|
|
// find first similar language that has REP items
|
|
for (int j = 0; j < ga.ga_len; j++) {
|
|
lp2 = LANGP_ENTRY(ga, j);
|
|
if (!GA_EMPTY(&lp2->lp_slang->sl_rep)
|
|
&& STRNCMP(lp->lp_slang->sl_name,
|
|
lp2->lp_slang->sl_name, 2) == 0) {
|
|
lp->lp_replang = lp2->lp_slang;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
redraw_later(wp, UPD_NOT_VALID);
|
|
|
|
theend:
|
|
xfree(spl_copy);
|
|
recursive = false;
|
|
return ret_msg;
|
|
}
|
|
|
|
// Clear the midword characters for buffer "buf".
|
|
static void clear_midword(win_T *wp)
|
|
{
|
|
CLEAR_FIELD(wp->w_s->b_spell_ismw);
|
|
XFREE_CLEAR(wp->w_s->b_spell_ismw_mb);
|
|
}
|
|
|
|
/// Use the "sl_midword" field of language "lp" for buffer "buf".
|
|
/// They add up to any currently used midword characters.
|
|
static void use_midword(slang_T *lp, win_T *wp)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
if (lp->sl_midword == NULL) { // there aren't any
|
|
return;
|
|
}
|
|
|
|
for (char *p = (char *)lp->sl_midword; *p != NUL;) {
|
|
const int c = utf_ptr2char(p);
|
|
const int l = utfc_ptr2len(p);
|
|
if (c < 256 && l <= 2) {
|
|
wp->w_s->b_spell_ismw[c] = true;
|
|
} else if (wp->w_s->b_spell_ismw_mb == NULL) {
|
|
// First multi-byte char in "b_spell_ismw_mb".
|
|
wp->w_s->b_spell_ismw_mb = xstrnsave(p, (size_t)l);
|
|
} else {
|
|
// Append multi-byte chars to "b_spell_ismw_mb".
|
|
const int n = (int)strlen(wp->w_s->b_spell_ismw_mb);
|
|
char *bp = xstrnsave(wp->w_s->b_spell_ismw_mb, (size_t)n + (size_t)l);
|
|
xfree(wp->w_s->b_spell_ismw_mb);
|
|
wp->w_s->b_spell_ismw_mb = bp;
|
|
STRLCPY(bp + n, p, l + 1);
|
|
}
|
|
p += l;
|
|
}
|
|
}
|
|
|
|
// Find the region "region[2]" in "rp" (points to "sl_regions").
|
|
// Each region is simply stored as the two characters of its name.
|
|
// Returns the index if found (first is 0), REGION_ALL if not found.
|
|
static int find_region(const char_u *rp, const char_u *region)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0;; i += 2) {
|
|
if (rp[i] == NUL) {
|
|
return REGION_ALL;
|
|
}
|
|
if (rp[i] == region[0] && rp[i + 1] == region[1]) {
|
|
break;
|
|
}
|
|
}
|
|
return i / 2;
|
|
}
|
|
|
|
/// Return case type of word:
|
|
/// w word 0
|
|
/// Word WF_ONECAP
|
|
/// W WORD WF_ALLCAP
|
|
/// WoRd wOrd WF_KEEPCAP
|
|
///
|
|
/// @param[in] word
|
|
/// @param[in] end End of word or NULL for NUL delimited string
|
|
///
|
|
/// @returns Case type of word
|
|
int captype(char_u *word, const char_u *end)
|
|
FUNC_ATTR_NONNULL_ARG(1)
|
|
{
|
|
char_u *p;
|
|
|
|
// find first letter
|
|
for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p)) {
|
|
if (end == NULL ? *p == NUL : p >= end) {
|
|
return 0; // only non-word characters, illegal word
|
|
}
|
|
}
|
|
int c = mb_ptr2char_adv((const char_u **)&p);
|
|
bool allcap;
|
|
bool firstcap = allcap = SPELL_ISUPPER(c);
|
|
bool past_second = false; // past second word char
|
|
|
|
// Need to check all letters to find a word with mixed upper/lower.
|
|
// But a word with an upper char only at start is a ONECAP.
|
|
for (; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p)) {
|
|
if (spell_iswordp_nmw(p, curwin)) {
|
|
c = utf_ptr2char((char *)p);
|
|
if (!SPELL_ISUPPER(c)) {
|
|
// UUl -> KEEPCAP
|
|
if (past_second && allcap) {
|
|
return WF_KEEPCAP;
|
|
}
|
|
allcap = false;
|
|
} else if (!allcap) {
|
|
// UlU -> KEEPCAP
|
|
return WF_KEEPCAP;
|
|
}
|
|
past_second = true;
|
|
}
|
|
}
|
|
|
|
if (allcap) {
|
|
return WF_ALLCAP;
|
|
}
|
|
if (firstcap) {
|
|
return WF_ONECAP;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Delete the internal wordlist and its .spl file.
|
|
void spell_delete_wordlist(void)
|
|
{
|
|
if (int_wordlist != NULL) {
|
|
char_u fname[MAXPATHL] = { 0 };
|
|
os_remove((char *)int_wordlist);
|
|
int_wordlist_spl(fname);
|
|
os_remove((char *)fname);
|
|
XFREE_CLEAR(int_wordlist);
|
|
}
|
|
}
|
|
|
|
// Free all languages.
|
|
void spell_free_all(void)
|
|
{
|
|
// Go through all buffers and handle 'spelllang'. <VN>
|
|
FOR_ALL_BUFFERS(buf) {
|
|
ga_clear(&buf->b_s.b_langp);
|
|
}
|
|
|
|
while (first_lang != NULL) {
|
|
slang_T *slang = first_lang;
|
|
first_lang = slang->sl_next;
|
|
slang_free(slang);
|
|
}
|
|
|
|
spell_delete_wordlist();
|
|
|
|
XFREE_CLEAR(repl_to);
|
|
XFREE_CLEAR(repl_from);
|
|
}
|
|
|
|
// Clear all spelling tables and reload them.
|
|
// Used after 'encoding' is set and when ":mkspell" was used.
|
|
void spell_reload(void)
|
|
{
|
|
// Initialize the table for spell_iswordp().
|
|
init_spell_chartab();
|
|
|
|
// Unload all allocated memory.
|
|
spell_free_all();
|
|
|
|
// Go through all buffers and handle 'spelllang'.
|
|
FOR_ALL_WINDOWS_IN_TAB(wp, curtab) {
|
|
// Only load the wordlists when 'spelllang' is set and there is a
|
|
// window for this buffer in which 'spell' is set.
|
|
if (*wp->w_s->b_p_spl != NUL) {
|
|
if (wp->w_p_spell) {
|
|
(void)did_set_spelllang(wp);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Open a spell buffer. This is a nameless buffer that is not in the buffer
|
|
// list and only contains text lines. Can use a swapfile to reduce memory
|
|
// use.
|
|
// Most other fields are invalid! Esp. watch out for string options being
|
|
// NULL and there is no undo info.
|
|
buf_T *open_spellbuf(void)
|
|
{
|
|
buf_T *buf = xcalloc(1, sizeof(buf_T));
|
|
|
|
buf->b_spell = true;
|
|
buf->b_p_swf = true; // may create a swap file
|
|
if (ml_open(buf) == FAIL) {
|
|
ELOG("Error opening a new memline");
|
|
}
|
|
ml_open_file(buf); // create swap file now
|
|
|
|
return buf;
|
|
}
|
|
|
|
// Close the buffer used for spell info.
|
|
void close_spellbuf(buf_T *buf)
|
|
{
|
|
if (buf != NULL) {
|
|
ml_close(buf, true);
|
|
xfree(buf);
|
|
}
|
|
}
|
|
|
|
// Init the chartab used for spelling for ASCII.
|
|
void clear_spell_chartab(spelltab_T *sp)
|
|
{
|
|
// Init everything to false (zero).
|
|
CLEAR_FIELD(sp->st_isw);
|
|
CLEAR_FIELD(sp->st_isu);
|
|
|
|
for (int i = 0; i < 256; i++) {
|
|
sp->st_fold[i] = (char_u)i;
|
|
sp->st_upper[i] = (char_u)i;
|
|
}
|
|
|
|
// We include digits. A word shouldn't start with a digit, but handling
|
|
// that is done separately.
|
|
for (int i = '0'; i <= '9'; i++) {
|
|
sp->st_isw[i] = true;
|
|
}
|
|
for (int i = 'A'; i <= 'Z'; i++) {
|
|
sp->st_isw[i] = true;
|
|
sp->st_isu[i] = true;
|
|
sp->st_fold[i] = (char_u)(i + 0x20);
|
|
}
|
|
for (int i = 'a'; i <= 'z'; i++) {
|
|
sp->st_isw[i] = true;
|
|
sp->st_upper[i] = (char_u)(i - 0x20);
|
|
}
|
|
}
|
|
|
|
// Init the chartab used for spelling. Called once while starting up.
|
|
// The default is to use isalpha(), but the spell file should define the word
|
|
// characters to make it possible that 'encoding' differs from the current
|
|
// locale. For utf-8 we don't use isalpha() but our own functions.
|
|
void init_spell_chartab(void)
|
|
{
|
|
did_set_spelltab = false;
|
|
clear_spell_chartab(&spelltab);
|
|
for (int i = 128; i < 256; i++) {
|
|
int f = utf_fold(i);
|
|
int u = mb_toupper(i);
|
|
|
|
spelltab.st_isu[i] = mb_isupper(i);
|
|
spelltab.st_isw[i] = spelltab.st_isu[i] || mb_islower(i);
|
|
// The folded/upper-cased value is different between latin1 and
|
|
// utf8 for 0xb5, causing E763 for no good reason. Use the latin1
|
|
// value for utf-8 to avoid this.
|
|
spelltab.st_fold[i] = (f < 256) ? (char_u)f : (char_u)i;
|
|
spelltab.st_upper[i] = (u < 256) ? (char_u)u : (char_u)i;
|
|
}
|
|
}
|
|
|
|
/// Returns true if "p" points to a word character.
|
|
/// As a special case we see "midword" characters as word character when it is
|
|
/// followed by a word character. This finds they'there but not 'they there'.
|
|
/// Thus this only works properly when past the first character of the word.
|
|
///
|
|
/// @param wp Buffer used.
|
|
bool spell_iswordp(const char_u *p, const win_T *wp)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
const int l = utfc_ptr2len((char *)p);
|
|
const char_u *s = p;
|
|
if (l == 1) {
|
|
// be quick for ASCII
|
|
if (wp->w_s->b_spell_ismw[*p]) {
|
|
s = p + 1; // skip a mid-word character
|
|
}
|
|
} else {
|
|
int c = utf_ptr2char((char *)p);
|
|
if (c < 256
|
|
? wp->w_s->b_spell_ismw[c]
|
|
: (wp->w_s->b_spell_ismw_mb != NULL
|
|
&& vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL)) {
|
|
s = p + l;
|
|
}
|
|
}
|
|
|
|
int c = utf_ptr2char((char *)s);
|
|
if (c > 255) {
|
|
return spell_mb_isword_class(mb_get_class(s), wp);
|
|
}
|
|
return spelltab.st_isw[c];
|
|
}
|
|
|
|
// Returns true if "p" points to a word character.
|
|
// Unlike spell_iswordp() this doesn't check for "midword" characters.
|
|
bool spell_iswordp_nmw(const char_u *p, win_T *wp)
|
|
{
|
|
int c = utf_ptr2char((char *)p);
|
|
if (c > 255) {
|
|
return spell_mb_isword_class(mb_get_class(p), wp);
|
|
}
|
|
return spelltab.st_isw[c];
|
|
}
|
|
|
|
// Returns true if word class indicates a word character.
|
|
// Only for characters above 255.
|
|
// Unicode subscript and superscript are not considered word characters.
|
|
// See also utf_class() in mbyte.c.
|
|
static bool spell_mb_isword_class(int cl, const win_T *wp)
|
|
FUNC_ATTR_PURE FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
|
|
{
|
|
if (wp->w_s->b_cjk) {
|
|
// East Asian characters are not considered word characters.
|
|
return cl == 2 || cl == 0x2800;
|
|
}
|
|
return cl >= 2 && cl != 0x2070 && cl != 0x2080 && cl != 3;
|
|
}
|
|
|
|
// Returns true if "p" points to a word character.
|
|
// Wide version of spell_iswordp().
|
|
static bool spell_iswordp_w(const int *p, const win_T *wp)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
const int *s;
|
|
|
|
if (*p <
|
|
256 ? wp->w_s->b_spell_ismw[*p] : (wp->w_s->b_spell_ismw_mb != NULL
|
|
&& vim_strchr(wp->w_s->b_spell_ismw_mb,
|
|
*p) != NULL)) {
|
|
s = p + 1;
|
|
} else {
|
|
s = p;
|
|
}
|
|
|
|
if (*s > 255) {
|
|
return spell_mb_isword_class(utf_class(*s), wp);
|
|
}
|
|
return spelltab.st_isw[*s];
|
|
}
|
|
|
|
// Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
|
|
// Uses the character definitions from the .spl file.
|
|
// When using a multi-byte 'encoding' the length may change!
|
|
// Returns FAIL when something wrong.
|
|
int spell_casefold(const win_T *wp, char_u *str, int len, char_u *buf, int buflen)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
if (len >= buflen) {
|
|
buf[0] = NUL;
|
|
return FAIL; // result will not fit
|
|
}
|
|
|
|
int outi = 0;
|
|
|
|
// Fold one character at a time.
|
|
for (char_u *p = str; p < str + len;) {
|
|
if (outi + MB_MAXBYTES > buflen) {
|
|
buf[outi] = NUL;
|
|
return FAIL;
|
|
}
|
|
int c = mb_cptr2char_adv((const char_u **)&p);
|
|
|
|
// Exception: greek capital sigma 0x03A3 folds to 0x03C3, except
|
|
// when it is the last character in a word, then it folds to
|
|
// 0x03C2.
|
|
if (c == 0x03a3 || c == 0x03c2) {
|
|
if (p == str + len || !spell_iswordp(p, wp)) {
|
|
c = 0x03c2;
|
|
} else {
|
|
c = 0x03c3;
|
|
}
|
|
} else {
|
|
c = SPELL_TOFOLD(c);
|
|
}
|
|
|
|
outi += utf_char2bytes(c, (char *)buf + outi);
|
|
}
|
|
buf[outi] = NUL;
|
|
|
|
return OK;
|
|
}
|
|
|
|
// Check if the word at line "lnum" column "col" is required to start with a
|
|
// capital. This uses 'spellcapcheck' of the current buffer.
|
|
bool check_need_cap(linenr_T lnum, colnr_T col)
|
|
{
|
|
bool need_cap = false;
|
|
|
|
if (curwin->w_s->b_cap_prog == NULL) {
|
|
return false;
|
|
}
|
|
|
|
char *line = get_cursor_line_ptr();
|
|
char *line_copy = NULL;
|
|
colnr_T endcol = 0;
|
|
if (getwhitecols(line) >= (int)col) {
|
|
// At start of line, check if previous line is empty or sentence
|
|
// ends there.
|
|
if (lnum == 1) {
|
|
need_cap = true;
|
|
} else {
|
|
line = ml_get(lnum - 1);
|
|
if (*skipwhite(line) == NUL) {
|
|
need_cap = true;
|
|
} else {
|
|
// Append a space in place of the line break.
|
|
line_copy = concat_str(line, " ");
|
|
line = line_copy;
|
|
endcol = (colnr_T)strlen(line);
|
|
}
|
|
}
|
|
} else {
|
|
endcol = col;
|
|
}
|
|
|
|
if (endcol > 0) {
|
|
// Check if sentence ends before the bad word.
|
|
regmatch_T regmatch = {
|
|
.regprog = curwin->w_s->b_cap_prog,
|
|
.rm_ic = false
|
|
};
|
|
char *p = line + endcol;
|
|
for (;;) {
|
|
MB_PTR_BACK(line, p);
|
|
if (p == line || spell_iswordp_nmw((char_u *)p, curwin)) {
|
|
break;
|
|
}
|
|
if (vim_regexec(®match, p, 0)
|
|
&& regmatch.endp[0] == line + endcol) {
|
|
need_cap = true;
|
|
break;
|
|
}
|
|
}
|
|
curwin->w_s->b_cap_prog = regmatch.regprog;
|
|
}
|
|
|
|
xfree(line_copy);
|
|
|
|
return need_cap;
|
|
}
|
|
|
|
// ":spellrepall"
|
|
void ex_spellrepall(exarg_T *eap)
|
|
{
|
|
pos_T pos = curwin->w_cursor;
|
|
bool save_ws = p_ws;
|
|
linenr_T prev_lnum = 0;
|
|
|
|
if (repl_from == NULL || repl_to == NULL) {
|
|
emsg(_("E752: No previous spell replacement"));
|
|
return;
|
|
}
|
|
int addlen = (int)(strlen(repl_to) - strlen(repl_from));
|
|
|
|
size_t frompatlen = strlen(repl_from) + 7;
|
|
char_u *frompat = xmalloc(frompatlen);
|
|
snprintf((char *)frompat, frompatlen, "\\V\\<%s\\>", repl_from);
|
|
p_ws = false;
|
|
|
|
sub_nsubs = 0;
|
|
sub_nlines = 0;
|
|
curwin->w_cursor.lnum = 0;
|
|
while (!got_int) {
|
|
if (do_search(NULL, '/', '/', frompat, 1L, SEARCH_KEEP, NULL) == 0
|
|
|| u_save_cursor() == FAIL) {
|
|
break;
|
|
}
|
|
|
|
// Only replace when the right word isn't there yet. This happens
|
|
// when changing "etc" to "etc.".
|
|
char *line = get_cursor_line_ptr();
|
|
if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
|
|
repl_to, strlen(repl_to)) != 0) {
|
|
char_u *p = xmalloc(STRLEN(line) + (size_t)addlen + 1);
|
|
memmove(p, line, (size_t)curwin->w_cursor.col);
|
|
STRCPY(p + curwin->w_cursor.col, repl_to);
|
|
STRCAT(p, line + curwin->w_cursor.col + strlen(repl_from));
|
|
ml_replace(curwin->w_cursor.lnum, (char *)p, false);
|
|
changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
|
|
|
|
if (curwin->w_cursor.lnum != prev_lnum) {
|
|
sub_nlines++;
|
|
prev_lnum = curwin->w_cursor.lnum;
|
|
}
|
|
sub_nsubs++;
|
|
}
|
|
curwin->w_cursor.col += (colnr_T)strlen(repl_to);
|
|
}
|
|
|
|
p_ws = save_ws;
|
|
curwin->w_cursor = pos;
|
|
xfree(frompat);
|
|
|
|
if (sub_nsubs == 0) {
|
|
semsg(_("E753: Not found: %s"), repl_from);
|
|
} else {
|
|
do_sub_msg(false);
|
|
}
|
|
}
|
|
|
|
/// Make a copy of "word", with the first letter upper or lower cased, to
|
|
/// "wcopy[MAXWLEN]". "word" must not be empty.
|
|
/// The result is NUL terminated.
|
|
///
|
|
/// @param[in] word source string to copy
|
|
/// @param[in,out] wcopy copied string, with case of first letter changed
|
|
/// @param[in] upper True to upper case, otherwise lower case
|
|
void onecap_copy(char_u *word, char_u *wcopy, bool upper)
|
|
{
|
|
char_u *p = word;
|
|
int c = mb_cptr2char_adv((const char_u **)&p);
|
|
if (upper) {
|
|
c = SPELL_TOUPPER(c);
|
|
} else {
|
|
c = SPELL_TOFOLD(c);
|
|
}
|
|
int l = utf_char2bytes(c, (char *)wcopy);
|
|
STRLCPY(wcopy + l, p, MAXWLEN - l);
|
|
}
|
|
|
|
// Make a copy of "word" with all the letters upper cased into
|
|
// "wcopy[MAXWLEN]". The result is NUL terminated.
|
|
void allcap_copy(char_u *word, char_u *wcopy)
|
|
{
|
|
char_u *d = wcopy;
|
|
for (char_u *s = word; *s != NUL;) {
|
|
int c = mb_cptr2char_adv((const char_u **)&s);
|
|
|
|
if (c == 0xdf) {
|
|
c = 'S';
|
|
if (d - wcopy >= MAXWLEN - 1) {
|
|
break;
|
|
}
|
|
*d++ = (char_u)c;
|
|
} else {
|
|
c = SPELL_TOUPPER(c);
|
|
}
|
|
|
|
if (d - wcopy >= MAXWLEN - MB_MAXBYTES) {
|
|
break;
|
|
}
|
|
d += utf_char2bytes(c, (char *)d);
|
|
}
|
|
*d = NUL;
|
|
}
|
|
|
|
// Case-folding may change the number of bytes: Count nr of chars in
|
|
// fword[flen] and return the byte length of that many chars in "word".
|
|
int nofold_len(char_u *fword, int flen, char_u *word)
|
|
{
|
|
char_u *p;
|
|
int i = 0;
|
|
|
|
for (p = fword; p < fword + flen; MB_PTR_ADV(p)) {
|
|
i++;
|
|
}
|
|
for (p = word; i > 0; MB_PTR_ADV(p)) {
|
|
i--;
|
|
}
|
|
return (int)(p - word);
|
|
}
|
|
|
|
// Copy "fword" to "cword", fixing case according to "flags".
|
|
void make_case_word(char_u *fword, char_u *cword, int flags)
|
|
{
|
|
if (flags & WF_ALLCAP) {
|
|
// Make it all upper-case
|
|
allcap_copy(fword, cword);
|
|
} else if (flags & WF_ONECAP) {
|
|
// Make the first letter upper-case
|
|
onecap_copy(fword, cword, true);
|
|
} else {
|
|
// Use goodword as-is.
|
|
STRCPY(cword, fword);
|
|
}
|
|
}
|
|
|
|
/// Soundfold a string, for soundfold()
|
|
///
|
|
/// @param[in] word Word to soundfold.
|
|
///
|
|
/// @return [allocated] soundfolded string or NULL in case of error. May return
|
|
/// copy of the input string if soundfolding is not
|
|
/// supported by any of the languages in &spellang.
|
|
char *eval_soundfold(const char *const word)
|
|
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL) {
|
|
// Use the sound-folding of the first language that supports it.
|
|
for (int lpi = 0; lpi < curwin->w_s->b_langp.ga_len; lpi++) {
|
|
langp_T *const lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
|
|
if (!GA_EMPTY(&lp->lp_slang->sl_sal)) {
|
|
// soundfold the word
|
|
char_u sound[MAXWLEN];
|
|
spell_soundfold(lp->lp_slang, (char_u *)word, false, sound);
|
|
return xstrdup((const char *)sound);
|
|
}
|
|
}
|
|
}
|
|
|
|
// No language with sound folding, return word as-is.
|
|
return xstrdup(word);
|
|
}
|
|
|
|
/// Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
|
|
///
|
|
/// There are many ways to turn a word into a sound-a-like representation. The
|
|
/// oldest is Soundex (1918!). A nice overview can be found in "Approximate
|
|
/// swedish name matching - survey and test of different algorithms" by Klas
|
|
/// Erikson.
|
|
///
|
|
/// We support two methods:
|
|
/// 1. SOFOFROM/SOFOTO do a simple character mapping.
|
|
/// 2. SAL items define a more advanced sound-folding (and much slower).
|
|
///
|
|
/// @param[in] slang
|
|
/// @param[in] inword word to soundfold
|
|
/// @param[in] folded whether inword is already case-folded
|
|
/// @param[in,out] res destination for soundfolded word
|
|
void spell_soundfold(slang_T *slang, char_u *inword, bool folded, char_u *res)
|
|
{
|
|
if (slang->sl_sofo) {
|
|
// SOFOFROM and SOFOTO used
|
|
spell_soundfold_sofo(slang, inword, res);
|
|
} else {
|
|
char_u fword[MAXWLEN];
|
|
char_u *word;
|
|
// SAL items used. Requires the word to be case-folded.
|
|
if (folded) {
|
|
word = inword;
|
|
} else {
|
|
(void)spell_casefold(curwin, inword, (int)STRLEN(inword), fword, MAXWLEN);
|
|
word = fword;
|
|
}
|
|
|
|
spell_soundfold_wsal(slang, word, res);
|
|
}
|
|
}
|
|
|
|
// Perform sound folding of "inword" into "res" according to SOFOFROM and
|
|
// SOFOTO lines.
|
|
static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res)
|
|
{
|
|
int ri = 0;
|
|
|
|
int prevc = 0;
|
|
|
|
// The sl_sal_first[] table contains the translation for chars up to
|
|
// 255, sl_sal the rest.
|
|
for (char_u *s = inword; *s != NUL;) {
|
|
int c = mb_cptr2char_adv((const char_u **)&s);
|
|
if (utf_class(c) == 0) {
|
|
c = ' ';
|
|
} else if (c < 256) {
|
|
c = slang->sl_sal_first[c];
|
|
} else {
|
|
int *ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
|
|
if (ip == NULL) { // empty list, can't match
|
|
c = NUL;
|
|
} else {
|
|
for (;;) { // find "c" in the list
|
|
if (*ip == 0) { // not found
|
|
c = NUL;
|
|
break;
|
|
}
|
|
if (*ip == c) { // match!
|
|
c = ip[1];
|
|
break;
|
|
}
|
|
ip += 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (c != NUL && c != prevc) {
|
|
ri += utf_char2bytes(c, (char *)res + ri);
|
|
if (ri + MB_MAXBYTES > MAXWLEN) {
|
|
break;
|
|
}
|
|
prevc = c;
|
|
}
|
|
}
|
|
|
|
res[ri] = NUL;
|
|
}
|
|
|
|
// Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
|
|
// Multi-byte version of spell_soundfold().
|
|
static void spell_soundfold_wsal(slang_T *slang, const char_u *inword, char_u *res)
|
|
{
|
|
salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data;
|
|
int word[MAXWLEN] = { 0 };
|
|
int wres[MAXWLEN] = { 0 };
|
|
int *ws;
|
|
int *pf;
|
|
int j, z;
|
|
int reslen;
|
|
int k = 0;
|
|
int z0;
|
|
int k0;
|
|
int n0;
|
|
int pri;
|
|
int p0 = -333;
|
|
int c0;
|
|
bool did_white = false;
|
|
|
|
// Convert the multi-byte string to a wide-character string.
|
|
// Remove accents, if wanted. We actually remove all non-word characters.
|
|
// But keep white space.
|
|
int wordlen = 0;
|
|
for (const char_u *s = inword; *s != NUL;) {
|
|
const char_u *t = s;
|
|
int c = mb_cptr2char_adv(&s);
|
|
if (slang->sl_rem_accents) {
|
|
if (utf_class(c) == 0) {
|
|
if (did_white) {
|
|
continue;
|
|
}
|
|
c = ' ';
|
|
did_white = true;
|
|
} else {
|
|
did_white = false;
|
|
if (!spell_iswordp_nmw(t, curwin)) {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
word[wordlen++] = c;
|
|
}
|
|
word[wordlen] = NUL;
|
|
|
|
int c;
|
|
// This algorithm comes from Aspell phonet.cpp.
|
|
// Converted from C++ to C. Added support for multi-byte chars.
|
|
// Changed to keep spaces.
|
|
int i = reslen = z = 0;
|
|
while ((c = word[i]) != NUL) {
|
|
// Start with the first rule that has the character in the word.
|
|
int n = slang->sl_sal_first[c & 0xff];
|
|
z0 = 0;
|
|
|
|
if (n >= 0) {
|
|
// Check all rules for the same index byte.
|
|
// If c is 0x300 need extra check for the end of the array, as
|
|
// (c & 0xff) is NUL.
|
|
for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff)
|
|
&& ws[0] != NUL; n++) {
|
|
// Quickly skip entries that don't match the word. Most
|
|
// entries are less than three chars, optimize for that.
|
|
if (c != ws[0]) {
|
|
continue;
|
|
}
|
|
k = smp[n].sm_leadlen;
|
|
if (k > 1) {
|
|
if (word[i + 1] != ws[1]) {
|
|
continue;
|
|
}
|
|
if (k > 2) {
|
|
for (j = 2; j < k; j++) {
|
|
if (word[i + j] != ws[j]) {
|
|
break;
|
|
}
|
|
}
|
|
if (j < k) {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((pf = smp[n].sm_oneof_w) != NULL) {
|
|
// Check for match with one of the chars in "sm_oneof".
|
|
while (*pf != NUL && *pf != word[i + k]) {
|
|
pf++;
|
|
}
|
|
if (*pf == NUL) {
|
|
continue;
|
|
}
|
|
k++;
|
|
}
|
|
char_u *s = smp[n].sm_rules;
|
|
pri = 5; // default priority
|
|
|
|
p0 = *s;
|
|
k0 = k;
|
|
while (*s == '-' && k > 1) {
|
|
k--;
|
|
s++;
|
|
}
|
|
if (*s == '<') {
|
|
s++;
|
|
}
|
|
if (ascii_isdigit(*s)) {
|
|
// determine priority
|
|
pri = *s - '0';
|
|
s++;
|
|
}
|
|
if (*s == '^' && *(s + 1) == '^') {
|
|
s++;
|
|
}
|
|
|
|
if (*s == NUL
|
|
|| (*s == '^'
|
|
&& (i == 0 || !(word[i - 1] == ' '
|
|
|| spell_iswordp_w(word + i - 1, curwin)))
|
|
&& (*(s + 1) != '$'
|
|
|| (!spell_iswordp_w(word + i + k0, curwin))))
|
|
|| (*s == '$' && i > 0
|
|
&& spell_iswordp_w(word + i - 1, curwin)
|
|
&& (!spell_iswordp_w(word + i + k0, curwin)))) {
|
|
// search for followup rules, if:
|
|
// followup and k > 1 and NO '-' in searchstring
|
|
c0 = word[i + k - 1];
|
|
n0 = slang->sl_sal_first[c0 & 0xff];
|
|
|
|
if (slang->sl_followup && k > 1 && n0 >= 0
|
|
&& p0 != '-' && word[i + k] != NUL) {
|
|
// Test follow-up rule for "word[i + k]"; loop over
|
|
// all entries with the same index byte.
|
|
for (; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
|
|
== (c0 & 0xff); n0++) {
|
|
// Quickly skip entries that don't match the word.
|
|
if (c0 != ws[0]) {
|
|
continue;
|
|
}
|
|
k0 = smp[n0].sm_leadlen;
|
|
if (k0 > 1) {
|
|
if (word[i + k] != ws[1]) {
|
|
continue;
|
|
}
|
|
if (k0 > 2) {
|
|
pf = word + i + k + 1;
|
|
for (j = 2; j < k0; j++) {
|
|
if (*pf++ != ws[j]) {
|
|
break;
|
|
}
|
|
}
|
|
if (j < k0) {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
k0 += k - 1;
|
|
|
|
if ((pf = smp[n0].sm_oneof_w) != NULL) {
|
|
// Check for match with one of the chars in
|
|
// "sm_oneof".
|
|
while (*pf != NUL && *pf != word[i + k0]) {
|
|
pf++;
|
|
}
|
|
if (*pf == NUL) {
|
|
continue;
|
|
}
|
|
k0++;
|
|
}
|
|
|
|
p0 = 5;
|
|
s = smp[n0].sm_rules;
|
|
while (*s == '-') {
|
|
// "k0" gets NOT reduced because
|
|
// "if (k0 == k)"
|
|
s++;
|
|
}
|
|
if (*s == '<') {
|
|
s++;
|
|
}
|
|
if (ascii_isdigit(*s)) {
|
|
p0 = *s - '0';
|
|
s++;
|
|
}
|
|
|
|
if (*s == NUL
|
|
// *s == '^' cuts
|
|
|| (*s == '$'
|
|
&& !spell_iswordp_w(word + i + k0,
|
|
curwin))) {
|
|
if (k0 == k) {
|
|
// this is just a piece of the string
|
|
continue;
|
|
}
|
|
|
|
if (p0 < pri) {
|
|
// priority too low
|
|
continue;
|
|
}
|
|
// rule fits; stop search
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
|
|
== (c0 & 0xff)) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// replace string
|
|
ws = smp[n].sm_to_w;
|
|
s = smp[n].sm_rules;
|
|
p0 = (vim_strchr((char *)s, '<') != NULL) ? 1 : 0;
|
|
if (p0 == 1 && z == 0) {
|
|
// rule with '<' is used
|
|
if (reslen > 0 && ws != NULL && *ws != NUL
|
|
&& (wres[reslen - 1] == c
|
|
|| wres[reslen - 1] == *ws)) {
|
|
reslen--;
|
|
}
|
|
z0 = 1;
|
|
z = 1;
|
|
k0 = 0;
|
|
if (ws != NULL) {
|
|
while (*ws != NUL && word[i + k0] != NUL) {
|
|
word[i + k0] = *ws;
|
|
k0++;
|
|
ws++;
|
|
}
|
|
}
|
|
if (k > k0) {
|
|
memmove(word + i + k0, word + i + k, sizeof(int) * (size_t)(wordlen - (i + k) + 1));
|
|
}
|
|
|
|
// new "actual letter"
|
|
c = word[i];
|
|
} else {
|
|
// no '<' rule used
|
|
i += k - 1;
|
|
z = 0;
|
|
if (ws != NULL) {
|
|
while (*ws != NUL && ws[1] != NUL
|
|
&& reslen < MAXWLEN) {
|
|
if (reslen == 0 || wres[reslen - 1] != *ws) {
|
|
wres[reslen++] = *ws;
|
|
}
|
|
ws++;
|
|
}
|
|
}
|
|
// new "actual letter"
|
|
if (ws == NULL) {
|
|
c = NUL;
|
|
} else {
|
|
c = *ws;
|
|
}
|
|
if (strstr((char *)s, "^^") != NULL) {
|
|
if (c != NUL) {
|
|
wres[reslen++] = c;
|
|
}
|
|
memmove(word, word + i + 1, sizeof(int) * (size_t)(wordlen - (i + 1) + 1));
|
|
i = 0;
|
|
z0 = 1;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
} else if (ascii_iswhite(c)) {
|
|
c = ' ';
|
|
k = 1;
|
|
}
|
|
|
|
if (z0 == 0) {
|
|
if (k && !p0 && reslen < MAXWLEN && c != NUL
|
|
&& (!slang->sl_collapse || reslen == 0
|
|
|| wres[reslen - 1] != c)) {
|
|
// condense only double letters
|
|
wres[reslen++] = c;
|
|
}
|
|
|
|
i++;
|
|
z = 0;
|
|
k = 0;
|
|
}
|
|
}
|
|
|
|
// Convert wide characters in "wres" to a multi-byte string in "res".
|
|
int l = 0;
|
|
for (int n = 0; n < reslen; n++) {
|
|
l += utf_char2bytes(wres[n], (char *)res + l);
|
|
if (l + MB_MAXBYTES > MAXWLEN) {
|
|
break;
|
|
}
|
|
}
|
|
res[l] = NUL;
|
|
}
|
|
|
|
// ":spellinfo"
|
|
void ex_spellinfo(exarg_T *eap)
|
|
{
|
|
if (no_spell_checking(curwin)) {
|
|
return;
|
|
}
|
|
|
|
msg_start();
|
|
for (int lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; lpi++) {
|
|
langp_T *const lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
|
|
msg_puts("file: ");
|
|
msg_puts((const char *)lp->lp_slang->sl_fname);
|
|
msg_putchar('\n');
|
|
const char *const p = (const char *)lp->lp_slang->sl_info;
|
|
if (p != NULL) {
|
|
msg_puts(p);
|
|
msg_putchar('\n');
|
|
}
|
|
}
|
|
msg_end();
|
|
}
|
|
|
|
#define DUMPFLAG_KEEPCASE 1 // round 2: keep-case tree
|
|
#define DUMPFLAG_COUNT 2 // include word count
|
|
#define DUMPFLAG_ICASE 4 // ignore case when finding matches
|
|
#define DUMPFLAG_ONECAP 8 // pattern starts with capital
|
|
#define DUMPFLAG_ALLCAP 16 // pattern is all capitals
|
|
|
|
// ":spelldump"
|
|
void ex_spelldump(exarg_T *eap)
|
|
{
|
|
if (no_spell_checking(curwin)) {
|
|
return;
|
|
}
|
|
char *spl;
|
|
long dummy;
|
|
(void)get_option_value("spl", &dummy, &spl, NULL, OPT_LOCAL);
|
|
|
|
// Create a new empty buffer in a new window.
|
|
do_cmdline_cmd("new");
|
|
|
|
// enable spelling locally in the new window
|
|
set_option_value_give_err("spell", true, "", OPT_LOCAL);
|
|
set_option_value_give_err("spl", dummy, spl, OPT_LOCAL);
|
|
xfree(spl);
|
|
|
|
if (!buf_is_empty(curbuf)) {
|
|
return;
|
|
}
|
|
|
|
spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
|
|
|
|
// Delete the empty line that we started with.
|
|
if (curbuf->b_ml.ml_line_count > 1) {
|
|
ml_delete(curbuf->b_ml.ml_line_count, false);
|
|
}
|
|
redraw_later(curwin, UPD_NOT_VALID);
|
|
}
|
|
|
|
/// Go through all possible words and:
|
|
/// 1. When "pat" is NULL: dump a list of all words in the current buffer.
|
|
/// "ic" and "dir" are not used.
|
|
/// 2. When "pat" is not NULL: add matching words to insert mode completion.
|
|
///
|
|
/// @param pat leading part of the word
|
|
/// @param ic ignore case
|
|
/// @param dir direction for adding matches
|
|
/// @param dumpflags_arg DUMPFLAG_*
|
|
void spell_dump_compl(char *pat, int ic, Direction *dir, int dumpflags_arg)
|
|
{
|
|
langp_T *lp;
|
|
slang_T *slang;
|
|
idx_T arridx[MAXWLEN];
|
|
int curi[MAXWLEN];
|
|
char word[MAXWLEN];
|
|
int c;
|
|
char *byts;
|
|
idx_T *idxs;
|
|
linenr_T lnum = 0;
|
|
int depth;
|
|
int n;
|
|
int flags;
|
|
char *region_names = NULL; // region names being used
|
|
bool do_region = true; // dump region names and numbers
|
|
char *p;
|
|
int dumpflags = dumpflags_arg;
|
|
int patlen;
|
|
|
|
// When ignoring case or when the pattern starts with capital pass this on
|
|
// to dump_word().
|
|
if (pat != NULL) {
|
|
if (ic) {
|
|
dumpflags |= DUMPFLAG_ICASE;
|
|
} else {
|
|
n = captype((char_u *)pat, NULL);
|
|
if (n == WF_ONECAP) {
|
|
dumpflags |= DUMPFLAG_ONECAP;
|
|
} else if (n == WF_ALLCAP
|
|
&& (int)STRLEN(pat) > utfc_ptr2len(pat)) {
|
|
dumpflags |= DUMPFLAG_ALLCAP;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Find out if we can support regions: All languages must support the same
|
|
// regions or none at all.
|
|
for (int lpi = 0; lpi < curwin->w_s->b_langp.ga_len; lpi++) {
|
|
lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
|
|
p = (char *)lp->lp_slang->sl_regions;
|
|
if (p[0] != 0) {
|
|
if (region_names == NULL) { // first language with regions
|
|
region_names = p;
|
|
} else if (strcmp(region_names, p) != 0) {
|
|
do_region = false; // region names are different
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (do_region && region_names != NULL) {
|
|
if (pat == NULL) {
|
|
vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
|
|
ml_append(lnum++, (char *)IObuff, (colnr_T)0, false);
|
|
}
|
|
} else {
|
|
do_region = false;
|
|
}
|
|
|
|
// Loop over all files loaded for the entries in 'spelllang'.
|
|
for (int lpi = 0; lpi < curwin->w_s->b_langp.ga_len; lpi++) {
|
|
lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
|
|
slang = lp->lp_slang;
|
|
if (slang->sl_fbyts == NULL) { // reloading failed
|
|
continue;
|
|
}
|
|
|
|
if (pat == NULL) {
|
|
vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
|
|
ml_append(lnum++, (char *)IObuff, (colnr_T)0, false);
|
|
}
|
|
|
|
// When matching with a pattern and there are no prefixes only use
|
|
// parts of the tree that match "pat".
|
|
if (pat != NULL && slang->sl_pbyts == NULL) {
|
|
patlen = (int)strlen(pat);
|
|
} else {
|
|
patlen = -1;
|
|
}
|
|
|
|
// round 1: case-folded tree
|
|
// round 2: keep-case tree
|
|
for (int round = 1; round <= 2; round++) {
|
|
if (round == 1) {
|
|
dumpflags &= ~DUMPFLAG_KEEPCASE;
|
|
byts = (char *)slang->sl_fbyts;
|
|
idxs = slang->sl_fidxs;
|
|
} else {
|
|
dumpflags |= DUMPFLAG_KEEPCASE;
|
|
byts = (char *)slang->sl_kbyts;
|
|
idxs = slang->sl_kidxs;
|
|
}
|
|
if (byts == NULL) {
|
|
continue; // array is empty
|
|
}
|
|
depth = 0;
|
|
arridx[0] = 0;
|
|
curi[0] = 1;
|
|
while (depth >= 0 && !got_int
|
|
&& (pat == NULL || !ins_compl_interrupted())) {
|
|
if (curi[depth] > byts[arridx[depth]]) {
|
|
// Done all bytes at this node, go up one level.
|
|
depth--;
|
|
line_breakcheck();
|
|
ins_compl_check_keys(50, false);
|
|
} else {
|
|
// Do one more byte at this node.
|
|
n = arridx[depth] + curi[depth];
|
|
curi[depth]++;
|
|
c = (uint8_t)byts[n];
|
|
if (c == 0 || depth >= MAXWLEN - 1) {
|
|
// End of word or reached maximum length, deal with the
|
|
// word.
|
|
// Don't use keep-case words in the fold-case tree,
|
|
// they will appear in the keep-case tree.
|
|
// Only use the word when the region matches.
|
|
flags = (int)idxs[n];
|
|
if ((round == 2 || (flags & WF_KEEPCAP) == 0)
|
|
&& (flags & WF_NEEDCOMP) == 0
|
|
&& (do_region
|
|
|| (flags & WF_REGION) == 0
|
|
|| (((unsigned)flags >> 16)
|
|
& (unsigned)lp->lp_region) != 0)) {
|
|
word[depth] = NUL;
|
|
if (!do_region) {
|
|
flags &= ~WF_REGION;
|
|
}
|
|
|
|
// Dump the basic word if there is no prefix or
|
|
// when it's the first one.
|
|
c = (int)((unsigned)flags >> 24);
|
|
if (c == 0 || curi[depth] == 2) {
|
|
dump_word(slang, (char_u *)word, (char_u *)pat, dir,
|
|
dumpflags, flags, lnum);
|
|
if (pat == NULL) {
|
|
lnum++;
|
|
}
|
|
}
|
|
|
|
// Apply the prefix, if there is one.
|
|
if (c != 0) {
|
|
lnum = dump_prefixes(slang, (char_u *)word, (char_u *)pat, dir,
|
|
dumpflags, flags, lnum);
|
|
}
|
|
}
|
|
} else {
|
|
// Normal char, go one level deeper.
|
|
word[depth++] = (char)c;
|
|
arridx[depth] = idxs[n];
|
|
curi[depth] = 1;
|
|
|
|
// Check if this character matches with the pattern.
|
|
// If not skip the whole tree below it.
|
|
// Always ignore case here, dump_word() will check
|
|
// proper case later. This isn't exactly right when
|
|
// length changes for multi-byte characters with
|
|
// ignore case...
|
|
assert(depth >= 0);
|
|
if (depth <= patlen
|
|
&& mb_strnicmp((char *)word, pat, (size_t)depth) != 0) {
|
|
depth--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Dumps one word: apply case modifications and append a line to the buffer.
|
|
// When "lnum" is zero add insert mode completion.
|
|
static void dump_word(slang_T *slang, char_u *word, char_u *pat, Direction *dir, int dumpflags,
|
|
int wordflags, linenr_T lnum)
|
|
{
|
|
bool keepcap = false;
|
|
char_u *p;
|
|
char_u cword[MAXWLEN];
|
|
char_u badword[MAXWLEN + 10];
|
|
int flags = wordflags;
|
|
|
|
if (dumpflags & DUMPFLAG_ONECAP) {
|
|
flags |= WF_ONECAP;
|
|
}
|
|
if (dumpflags & DUMPFLAG_ALLCAP) {
|
|
flags |= WF_ALLCAP;
|
|
}
|
|
|
|
if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0) {
|
|
// Need to fix case according to "flags".
|
|
make_case_word(word, cword, flags);
|
|
p = cword;
|
|
} else {
|
|
p = word;
|
|
if ((dumpflags & DUMPFLAG_KEEPCASE)
|
|
&& ((captype(word, NULL) & WF_KEEPCAP) == 0
|
|
|| (flags & WF_FIXCAP) != 0)) {
|
|
keepcap = true;
|
|
}
|
|
}
|
|
char_u *tw = p;
|
|
|
|
if (pat == NULL) {
|
|
// Add flags and regions after a slash.
|
|
if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) {
|
|
STRCPY(badword, p);
|
|
STRCAT(badword, "/");
|
|
if (keepcap) {
|
|
STRCAT(badword, "=");
|
|
}
|
|
if (flags & WF_BANNED) {
|
|
STRCAT(badword, "!");
|
|
} else if (flags & WF_RARE) {
|
|
STRCAT(badword, "?");
|
|
}
|
|
if (flags & WF_REGION) {
|
|
for (int i = 0; i < 7; i++) {
|
|
if (flags & (0x10000 << i)) {
|
|
const size_t badword_len = STRLEN(badword);
|
|
snprintf((char *)badword + badword_len,
|
|
sizeof(badword) - badword_len,
|
|
"%d", i + 1);
|
|
}
|
|
}
|
|
}
|
|
p = badword;
|
|
}
|
|
|
|
if (dumpflags & DUMPFLAG_COUNT) {
|
|
hashitem_T *hi;
|
|
|
|
// Include the word count for ":spelldump!".
|
|
hi = hash_find(&slang->sl_wordcount, (char *)tw);
|
|
if (!HASHITEM_EMPTY(hi)) {
|
|
vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
|
|
tw, HI2WC(hi)->wc_count);
|
|
p = (char_u *)IObuff;
|
|
}
|
|
}
|
|
|
|
ml_append(lnum, (char *)p, (colnr_T)0, false);
|
|
} else if (((dumpflags & DUMPFLAG_ICASE)
|
|
? mb_strnicmp((char *)p, (char *)pat, STRLEN(pat)) == 0
|
|
: STRNCMP(p, pat, STRLEN(pat)) == 0)
|
|
&& ins_compl_add_infercase(p, (int)STRLEN(p),
|
|
p_ic, NULL, *dir, false) == OK) {
|
|
// if dir was BACKWARD then honor it just once
|
|
*dir = FORWARD;
|
|
}
|
|
}
|
|
|
|
/// For ":spelldump": Find matching prefixes for "word". Prepend each to
|
|
/// "word" and append a line to the buffer.
|
|
/// When "lnum" is zero add insert mode completion.
|
|
///
|
|
/// @param word case-folded word
|
|
/// @param flags flags with prefix ID
|
|
///
|
|
/// @return the updated line number.
|
|
static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, Direction *dir,
|
|
int dumpflags, int flags, linenr_T startlnum)
|
|
{
|
|
idx_T arridx[MAXWLEN];
|
|
int curi[MAXWLEN];
|
|
char_u prefix[MAXWLEN];
|
|
char_u word_up[MAXWLEN];
|
|
bool has_word_up = false;
|
|
linenr_T lnum = startlnum;
|
|
|
|
// If the word starts with a lower-case letter make the word with an
|
|
// upper-case letter in word_up[].
|
|
int c = utf_ptr2char((char *)word);
|
|
if (SPELL_TOUPPER(c) != c) {
|
|
onecap_copy(word, word_up, true);
|
|
has_word_up = true;
|
|
}
|
|
|
|
char_u *byts = slang->sl_pbyts;
|
|
idx_T *idxs = slang->sl_pidxs;
|
|
if (byts != NULL) { // array not is empty
|
|
// Loop over all prefixes, building them byte-by-byte in prefix[].
|
|
// When at the end of a prefix check that it supports "flags".
|
|
int depth = 0;
|
|
arridx[0] = 0;
|
|
curi[0] = 1;
|
|
while (depth >= 0 && !got_int) {
|
|
int n = arridx[depth];
|
|
int len = byts[n];
|
|
if (curi[depth] > len) {
|
|
// Done all bytes at this node, go up one level.
|
|
depth--;
|
|
line_breakcheck();
|
|
} else {
|
|
// Do one more byte at this node.
|
|
n += curi[depth];
|
|
curi[depth]++;
|
|
c = byts[n];
|
|
if (c == 0) {
|
|
// End of prefix, find out how many IDs there are.
|
|
int i;
|
|
for (i = 1; i < len; i++) {
|
|
if (byts[n + i] != 0) {
|
|
break;
|
|
}
|
|
}
|
|
curi[depth] += i - 1;
|
|
|
|
c = valid_word_prefix(i, n, flags, word, slang, false);
|
|
if (c != 0) {
|
|
STRLCPY(prefix + depth, word, MAXWLEN - depth);
|
|
dump_word(slang, prefix, pat, dir, dumpflags,
|
|
(c & WF_RAREPFX) ? (flags | WF_RARE) : flags, lnum);
|
|
if (lnum != 0) {
|
|
lnum++;
|
|
}
|
|
}
|
|
|
|
// Check for prefix that matches the word when the
|
|
// first letter is upper-case, but only if the prefix has
|
|
// a condition.
|
|
if (has_word_up) {
|
|
c = valid_word_prefix(i, n, flags, word_up, slang,
|
|
true);
|
|
if (c != 0) {
|
|
STRLCPY(prefix + depth, word_up, MAXWLEN - depth);
|
|
dump_word(slang, prefix, pat, dir, dumpflags,
|
|
(c & WF_RAREPFX) ? (flags | WF_RARE) : flags, lnum);
|
|
if (lnum != 0) {
|
|
lnum++;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// Normal char, go one level deeper.
|
|
prefix[depth++] = (char_u)c;
|
|
arridx[depth] = idxs[n];
|
|
curi[depth] = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return lnum;
|
|
}
|
|
|
|
// Move "p" to the end of word "start".
|
|
// Uses the spell-checking word characters.
|
|
char_u *spell_to_word_end(char_u *start, win_T *win)
|
|
{
|
|
char_u *p = start;
|
|
|
|
while (*p != NUL && spell_iswordp(p, win)) {
|
|
MB_PTR_ADV(p);
|
|
}
|
|
return p;
|
|
}
|
|
|
|
// For Insert mode completion CTRL-X s:
|
|
// Find start of the word in front of column "startcol".
|
|
// We don't check if it is badly spelled, with completion we can only change
|
|
// the word in front of the cursor.
|
|
// Returns the column number of the word.
|
|
int spell_word_start(int startcol)
|
|
{
|
|
if (no_spell_checking(curwin)) {
|
|
return startcol;
|
|
}
|
|
|
|
char_u *line = (char_u *)get_cursor_line_ptr();
|
|
char_u *p;
|
|
|
|
// Find a word character before "startcol".
|
|
for (p = line + startcol; p > line;) {
|
|
MB_PTR_BACK(line, p);
|
|
if (spell_iswordp_nmw(p, curwin)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
int col = 0;
|
|
|
|
// Go back to start of the word.
|
|
while (p > line) {
|
|
col = (int)(p - line);
|
|
MB_PTR_BACK(line, p);
|
|
if (!spell_iswordp(p, curwin)) {
|
|
break;
|
|
}
|
|
col = 0;
|
|
}
|
|
|
|
return col;
|
|
}
|
|
|
|
// Need to check for 'spellcapcheck' now, the word is removed before
|
|
// expand_spelling() is called. Therefore the ugly global variable.
|
|
static bool spell_expand_need_cap;
|
|
|
|
void spell_expand_check_cap(colnr_T col)
|
|
{
|
|
spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
|
|
}
|
|
|
|
// Get list of spelling suggestions.
|
|
// Used for Insert mode completion CTRL-X ?.
|
|
// Returns the number of matches. The matches are in "matchp[]", array of
|
|
// allocated strings.
|
|
int expand_spelling(linenr_T lnum, char_u *pat, char ***matchp)
|
|
{
|
|
garray_T ga;
|
|
|
|
spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, true);
|
|
*matchp = ga.ga_data;
|
|
return ga.ga_len;
|
|
}
|
|
|
|
/// @return true if "val" is a valid 'spelllang' value.
|
|
bool valid_spelllang(const char *val)
|
|
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
|
|
{
|
|
return valid_name(val, ".-_,@");
|
|
}
|
|
|
|
/// @return true if "val" is a valid 'spellfile' value.
|
|
bool valid_spellfile(const char *val)
|
|
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
|
|
{
|
|
for (const char_u *s = (char_u *)val; *s != NUL; s++) {
|
|
if (!vim_isfilec(*s) && *s != ',' && *s != ' ') {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
char *did_set_spell_option(bool is_spellfile)
|
|
{
|
|
char *errmsg = NULL;
|
|
|
|
if (is_spellfile) {
|
|
int l = (int)strlen(curwin->w_s->b_p_spf);
|
|
if (l > 0
|
|
&& (l < 4 || strcmp(curwin->w_s->b_p_spf + l - 4, ".add") != 0)) {
|
|
errmsg = e_invarg;
|
|
}
|
|
}
|
|
|
|
if (errmsg == NULL) {
|
|
FOR_ALL_WINDOWS_IN_TAB(wp, curtab) {
|
|
if (wp->w_buffer == curbuf && wp->w_p_spell) {
|
|
errmsg = did_set_spelllang(wp);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return errmsg;
|
|
}
|
|
|
|
/// Set curbuf->b_cap_prog to the regexp program for 'spellcapcheck'.
|
|
/// Return error message when failed, NULL when OK.
|
|
char *compile_cap_prog(synblock_T *synblock)
|
|
FUNC_ATTR_NONNULL_ALL
|
|
{
|
|
regprog_T *rp = synblock->b_cap_prog;
|
|
|
|
if (synblock->b_p_spc == NULL || *synblock->b_p_spc == NUL) {
|
|
synblock->b_cap_prog = NULL;
|
|
} else {
|
|
// Prepend a ^ so that we only match at one column
|
|
char *re = concat_str("^", synblock->b_p_spc);
|
|
synblock->b_cap_prog = vim_regcomp(re, RE_MAGIC);
|
|
xfree(re);
|
|
if (synblock->b_cap_prog == NULL) {
|
|
synblock->b_cap_prog = rp; // restore the previous program
|
|
return e_invarg;
|
|
}
|
|
}
|
|
|
|
vim_regfree(rp);
|
|
return NULL;
|
|
}
|