092wb initial commit
This commit is contained in:
153
lua/092wb_new_spelling.lua
Normal file
153
lua/092wb_new_spelling.lua
Normal file
@@ -0,0 +1,153 @@
|
||||
--- 拆分提示
|
||||
--- 来源:https://github.com/yanhuacuo/98wubi-tables
|
||||
local basic = require('092wb_basic')
|
||||
local map = basic.map
|
||||
local index = basic.index
|
||||
local utf8chars = basic.utf8chars
|
||||
local matchstr = basic.matchstr
|
||||
|
||||
local function xform(input)
|
||||
if input == "" then return "" end
|
||||
input = input:gsub('%[', '〔')
|
||||
input = input:gsub('%]', '〕')
|
||||
input = input:gsub('※', ' ')
|
||||
input = input:gsub('_', ' ')
|
||||
input = input:gsub(',', '·')
|
||||
return input
|
||||
end
|
||||
|
||||
local function subspelling(str, ...)
|
||||
local first, last = ...
|
||||
if not first then return str end
|
||||
local radicals = {}
|
||||
local s = str
|
||||
s = s:gsub('{', ' {')
|
||||
s = s:gsub('}', '} ')
|
||||
for seg in s:gmatch('%S+') do
|
||||
if seg:find('^{.+}$') then
|
||||
table.insert(radicals, seg)
|
||||
else
|
||||
for pos, code in utf8.codes(seg) do
|
||||
table.insert(radicals, utf8.char(code))
|
||||
end
|
||||
end
|
||||
end
|
||||
return table.concat{ table.unpack(radicals, first, last) }
|
||||
end
|
||||
|
||||
local function lookup(db)
|
||||
return function (str)
|
||||
return db:lookup(str)
|
||||
end
|
||||
end
|
||||
|
||||
local function parse_spll(str)
|
||||
local s = string.gsub(str, ',.*', '')
|
||||
return string.gsub(s, '^%[', '')
|
||||
end
|
||||
|
||||
local function spell_phrase(s, spll_rvdb)
|
||||
local chars = utf8chars(s)
|
||||
local rvlk_results
|
||||
if #chars == 2 or #chars == 3 then
|
||||
rvlk_results = map(chars, lookup(spll_rvdb))
|
||||
else
|
||||
rvlk_results = map({chars[1], chars[2], chars[3], chars[#chars]},
|
||||
lookup(spll_rvdb))
|
||||
end
|
||||
if index(rvlk_results, '') then return '' end
|
||||
local spellings = map(rvlk_results, parse_spll)
|
||||
local sup = '◇'
|
||||
if #chars == 2 then
|
||||
return subspelling(spellings[1] .. sup, 2, 2) ..
|
||||
subspelling(spellings[1] .. sup, 4, 4) ..
|
||||
subspelling(spellings[2] .. sup, 2, 2) ..
|
||||
subspelling(spellings[2] .. sup, 4, 4)
|
||||
elseif #chars == 3 then
|
||||
return subspelling(spellings[1], 2, 2) ..
|
||||
subspelling(spellings[2], 2, 2) ..
|
||||
subspelling(spellings[3] .. sup, 2, 2) ..
|
||||
subspelling(spellings[3] .. sup, 4, 4)
|
||||
else
|
||||
return subspelling(spellings[1], 2, 2) ..
|
||||
subspelling(spellings[2], 2, 2) ..
|
||||
subspelling(spellings[3], 2, 2) ..
|
||||
subspelling(spellings[4], 2, 2)
|
||||
end
|
||||
end
|
||||
|
||||
local function get_tricomment(cand, env)
|
||||
local ctext = cand.text
|
||||
if utf8.len(ctext) == 1 then
|
||||
local spll_raw = env.spll_rvdb:lookup(ctext)
|
||||
if spll_raw ~= '' then
|
||||
if env.engine.context:get_option("new_hide_pinyin") then
|
||||
-- return xform(spll_raw:gsub('%[(.-,.-),.+%]', '[%1]'))
|
||||
return xform(spll_raw:gsub('%[(.-),.+%]', '[%1]'))
|
||||
else
|
||||
return xform(spll_raw)
|
||||
end
|
||||
end
|
||||
else
|
||||
local spelling = spell_phrase(ctext, env.spll_rvdb)
|
||||
if spelling ~= '' then
|
||||
spelling = spelling:gsub('{(.-)}', '<%1>')
|
||||
local code = env.code_rvdb:lookup(ctext)
|
||||
if code ~= '' then
|
||||
code = matchstr(code, '%S+')
|
||||
table.sort(code, function(i, j) return i:len() < j:len() end)
|
||||
code = table.concat(code, ' ')
|
||||
return '〔 ' .. spelling .. ' · ' .. code .. ' 〕'
|
||||
else
|
||||
return '〔 ' .. spelling .. ' 〕'
|
||||
end
|
||||
end
|
||||
end
|
||||
return ''
|
||||
end
|
||||
|
||||
local function filter(input, env)
|
||||
if env.engine.context:get_option("new_spelling") then
|
||||
for cand in input:iter() do
|
||||
if cand.type == 'simplified' and env.name_space == 'new_for_rvlk' then
|
||||
local comment = get_tricomment(cand, env) .. cand.comment
|
||||
yield(Candidate("simp_rvlk", cand.start, cand._end, cand.text, comment))
|
||||
else
|
||||
local add_comment = ''
|
||||
if cand.type == 'punct' then
|
||||
add_comment = env.code_rvdb:lookup(cand.text)
|
||||
elseif cand.type ~= 'sentence' then
|
||||
add_comment = get_tricomment(cand, env)
|
||||
end
|
||||
if add_comment ~= '' then
|
||||
if cand.type ~= 'completion' and (
|
||||
(env.name_space == 'new' and env.is_mixtyping) or
|
||||
(env.name_space == 'new_for_rvlk')
|
||||
) then
|
||||
cand.comment = add_comment
|
||||
else
|
||||
cand.comment = add_comment .. cand.comment
|
||||
end
|
||||
end
|
||||
yield(cand)
|
||||
end
|
||||
end
|
||||
else
|
||||
for cand in input:iter() do yield(cand) end
|
||||
end
|
||||
end
|
||||
|
||||
local function init(env)
|
||||
local config = env.engine.schema.config
|
||||
local spll_rvdb = config:get_string('lua_reverse_db/spelling')
|
||||
local code_rvdb = config:get_string('lua_reverse_db/code')
|
||||
local abc_extags_size = config:get_list_size('abc_segmentor/extra_tags')
|
||||
env.spll_rvdb = ReverseDb('build/' .. spll_rvdb .. '.reverse.bin')
|
||||
env.code_rvdb = ReverseDb('build/' .. code_rvdb .. '.reverse.bin')
|
||||
env.is_mixtyping = abc_extags_size > 0
|
||||
end
|
||||
|
||||
return { init = init, func = filter }
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user