154 lines
4.6 KiB
Lua
154 lines
4.6 KiB
Lua
--- 拆分提示
|
||
--- 来源:https://github.com/yanhuacuo/98wubi-tables
|
||
local basic = require('092wb_basic')
|
||
local map = basic.map
|
||
local index = basic.index
|
||
local utf8chars = basic.utf8chars
|
||
local matchstr = basic.matchstr
|
||
|
||
local function xform(input)
|
||
if input == "" then return "" end
|
||
input = input:gsub('%[', '〔')
|
||
input = input:gsub('%]', '〕')
|
||
input = input:gsub('※', ' ')
|
||
input = input:gsub('_', ' ')
|
||
input = input:gsub(',', '·')
|
||
return input
|
||
end
|
||
|
||
local function subspelling(str, ...)
|
||
local first, last = ...
|
||
if not first then return str end
|
||
local radicals = {}
|
||
local s = str
|
||
s = s:gsub('{', ' {')
|
||
s = s:gsub('}', '} ')
|
||
for seg in s:gmatch('%S+') do
|
||
if seg:find('^{.+}$') then
|
||
table.insert(radicals, seg)
|
||
else
|
||
for pos, code in utf8.codes(seg) do
|
||
table.insert(radicals, utf8.char(code))
|
||
end
|
||
end
|
||
end
|
||
return table.concat{ table.unpack(radicals, first, last) }
|
||
end
|
||
|
||
local function lookup(db)
|
||
return function (str)
|
||
return db:lookup(str)
|
||
end
|
||
end
|
||
|
||
local function parse_spll(str)
|
||
local s = string.gsub(str, ',.*', '')
|
||
return string.gsub(s, '^%[', '')
|
||
end
|
||
|
||
local function spell_phrase(s, spll_rvdb)
|
||
local chars = utf8chars(s)
|
||
local rvlk_results
|
||
if #chars == 2 or #chars == 3 then
|
||
rvlk_results = map(chars, lookup(spll_rvdb))
|
||
else
|
||
rvlk_results = map({chars[1], chars[2], chars[3], chars[#chars]},
|
||
lookup(spll_rvdb))
|
||
end
|
||
if index(rvlk_results, '') then return '' end
|
||
local spellings = map(rvlk_results, parse_spll)
|
||
local sup = '◇'
|
||
if #chars == 2 then
|
||
return subspelling(spellings[1] .. sup, 2, 2) ..
|
||
subspelling(spellings[1] .. sup, 4, 4) ..
|
||
subspelling(spellings[2] .. sup, 2, 2) ..
|
||
subspelling(spellings[2] .. sup, 4, 4)
|
||
elseif #chars == 3 then
|
||
return subspelling(spellings[1], 2, 2) ..
|
||
subspelling(spellings[2], 2, 2) ..
|
||
subspelling(spellings[3] .. sup, 2, 2) ..
|
||
subspelling(spellings[3] .. sup, 4, 4)
|
||
else
|
||
return subspelling(spellings[1], 2, 2) ..
|
||
subspelling(spellings[2], 2, 2) ..
|
||
subspelling(spellings[3], 2, 2) ..
|
||
subspelling(spellings[4], 2, 2)
|
||
end
|
||
end
|
||
|
||
local function get_tricomment(cand, env)
|
||
local ctext = cand.text
|
||
if utf8.len(ctext) == 1 then
|
||
local spll_raw = env.spll_rvdb:lookup(ctext)
|
||
if spll_raw ~= '' then
|
||
if env.engine.context:get_option("new_hide_pinyin") then
|
||
-- return xform(spll_raw:gsub('%[(.-,.-),.+%]', '[%1]'))
|
||
return xform(spll_raw:gsub('%[(.-),.+%]', '[%1]'))
|
||
else
|
||
return xform(spll_raw)
|
||
end
|
||
end
|
||
else
|
||
local spelling = spell_phrase(ctext, env.spll_rvdb)
|
||
if spelling ~= '' then
|
||
spelling = spelling:gsub('{(.-)}', '<%1>')
|
||
local code = env.code_rvdb:lookup(ctext)
|
||
if code ~= '' then
|
||
code = matchstr(code, '%S+')
|
||
table.sort(code, function(i, j) return i:len() < j:len() end)
|
||
code = table.concat(code, ' ')
|
||
return '〔 ' .. spelling .. ' · ' .. code .. ' 〕'
|
||
else
|
||
return '〔 ' .. spelling .. ' 〕'
|
||
end
|
||
end
|
||
end
|
||
return ''
|
||
end
|
||
|
||
local function filter(input, env)
|
||
if env.engine.context:get_option("new_spelling") then
|
||
for cand in input:iter() do
|
||
if cand.type == 'simplified' and env.name_space == 'new_for_rvlk' then
|
||
local comment = get_tricomment(cand, env) .. cand.comment
|
||
yield(Candidate("simp_rvlk", cand.start, cand._end, cand.text, comment))
|
||
else
|
||
local add_comment = ''
|
||
if cand.type == 'punct' then
|
||
add_comment = env.code_rvdb:lookup(cand.text)
|
||
elseif cand.type ~= 'sentence' then
|
||
add_comment = get_tricomment(cand, env)
|
||
end
|
||
if add_comment ~= '' then
|
||
if cand.type ~= 'completion' and (
|
||
(env.name_space == 'new' and env.is_mixtyping) or
|
||
(env.name_space == 'new_for_rvlk')
|
||
) then
|
||
cand.comment = add_comment
|
||
else
|
||
cand.comment = add_comment .. cand.comment
|
||
end
|
||
end
|
||
yield(cand)
|
||
end
|
||
end
|
||
else
|
||
for cand in input:iter() do yield(cand) end
|
||
end
|
||
end
|
||
|
||
local function init(env)
|
||
local config = env.engine.schema.config
|
||
local spll_rvdb = config:get_string('lua_reverse_db/spelling')
|
||
local code_rvdb = config:get_string('lua_reverse_db/code')
|
||
local abc_extags_size = config:get_list_size('abc_segmentor/extra_tags')
|
||
env.spll_rvdb = ReverseDb('build/' .. spll_rvdb .. '.reverse.bin')
|
||
env.code_rvdb = ReverseDb('build/' .. code_rvdb .. '.reverse.bin')
|
||
env.is_mixtyping = abc_extags_size > 0
|
||
end
|
||
|
||
return { init = init, func = filter }
|
||
|
||
|
||
|