092wb initial commit

This commit is contained in:
Zelong Kuang
2026-04-09 02:40:08 +10:00
commit 09b65144c8
62 changed files with 685966 additions and 0 deletions

49
lua/092wb_basic.lua Normal file
View File

@@ -0,0 +1,49 @@
-- basic.lua
local basic = {}
package.loaded[...] = basic
function basic.index(table, item)
for k, v in pairs(table) do
if v == item then return k end
end
end
function basic.map(table, func)
local t = {}
for k, v in pairs(table) do
t[k] = func(v)
end
return t
end
function basic.matchstr(str, pat)
local t = {}
for i in str:gmatch(pat) do
t[#t + 1] = i
end
return t
end
function basic.utf8chars(str, ...)
local chars = {}
for pos, code in utf8.codes(str) do
chars[#chars + 1] = utf8.char(code)
end
return chars
end
function basic.utf8sub(str, first, ...)
local last = ...
if last == nil or last > utf8.len(str) then
last = utf8.len(str)
elseif last < 0 then
last = utf8.len(str) + 1 + last
end
local fstoff = utf8.offset(str, first)
local lstoff = utf8.offset(str, last + 1)
if fstoff == nil then fstoff = 1 end
if lstoff ~= nil then lstoff = lstoff - 1 end
return string.sub(str, fstoff, lstoff)
end

66
lua/092wb_core_filter.lua Executable file
View File

@@ -0,0 +1,66 @@
--- 来自 中文输入法群938020953 中的“原版同文lua自定義字集過濾腳本”文件
local charsets = {
{ first = 0x4E00, last = 0x9FFF }, -- CJK 统一表意符号
{ first = 0x3400, last = 0x4DBF }, -- ExtA
{ first = 0x20000, last = 0x2A6DF }, -- ExtB
{ first = 0x2A700, last = 0x2B73F }, -- ExtC
{ first = 0x2B740, last = 0x2B81F }, -- ExtD
{ first = 0x2B820, last = 0x2CEAF }, -- ExtE
{ first = 0x2CEB0, last = 0x2EBEF }, -- ExtF
{ first = 0x30000, last = 0x3134F }, -- ExtG
{ first = 0x31350, last = 0x323AF }, -- ExtH
{ first = 0x2EBF0, last = 0x2EE4A }, -- ExtI
{ first = 0x323B0, last = 0x3347F }, -- ExtJ
{ first = 0xF900, last = 0xFAFF }, -- CJK 兼容象形文字
{ first = 0x2F800, last = 0x2FA1F }, -- Compatible Suplementary
{ first = 0x2F00, last = 0x2FD5 }, ---康熙字典部首
{ first = 0x2E80, last = 0x2EF3 }, ---CJK 部首补充
{ first = 0x31C0, last = 0x31E3 }, ---CJK 笔画
--- { first = 0x3005, last = 0x30FF }, ----CJK 符号和标点(已注释)
{ first = 0xE000, last = 0xF8FF }, --PUA
{ first = 0xF0000, last = 0xFFFFD }, --PUA
{ first = 0x100000, last = 0x10FFFD }, --PUA
}
local function is_cjk(code)
for i, charset in ipairs(charsets) do
if ((code >= charset.first) and (code <= charset.last)) then
return true
end
end
return false
end
local function should_yield(text, option, coredb)
local should_yield = true
if option then
for i in utf8.codes(text) do
local code = utf8.codepoint(text, i)
if is_cjk(code) then
charset = coredb:lookup(utf8.char(code))
if charset == "" then
should_yield = false
break
end
end
end
end
return should_yield
end
local function filter(input, env)
on = env.engine.context:get_option("extended_char")
for cand in input:iter() do
if should_yield(cand.text, on, env.coredb) then
yield(cand)
end
end
end
local function init(env)
-- 当此组件被载入时,打开反查库,并存入 `coredb` 中
env.coredb = ReverseDb("build/092wb_core.reverse.bin")
end
return { init = init, func = filter }

View File

@@ -0,0 +1,72 @@
-- 日期时间
-- 提高权重的原因:因为在方案中设置了大于 1 的 initial_quality导致 rq sj xq dt ts 产出的候选项在所有词语的最后。
local function yield_cand(seg, text)
local cand = Candidate('', seg.start, seg._end, text, '')
cand.quality = 100
yield(cand)
end
local M = {}
function M.init(env)
local config = env.engine.schema.config
env.name_space = env.name_space:gsub('^*', '')
M.date = config:get_string(env.name_space .. '/date') or 'rq'
M.time = config:get_string(env.name_space .. '/time') or 'sj'
M.week = config:get_string(env.name_space .. '/week') or 'xq'
M.datetime = config:get_string(env.name_space .. '/datetime') or 'dt'
M.timestamp = config:get_string(env.name_space .. '/timestamp') or 'ts'
end
function M.func(input, seg, env)
-- 日期
if (input == M.date) then
local current_time = os.time()
yield_cand(seg, os.date('%Y-%m-%d', current_time))
yield_cand(seg, os.date('%Y/%m/%d', current_time))
yield_cand(seg, os.date('%Y.%m.%d', current_time))
yield_cand(seg, os.date('%Y%m%d', current_time))
yield_cand(seg, os.date('%Y年%m月%d日', current_time):gsub('年0', ''):gsub('月0',''))
-- 时间
elseif (input == M.time) then
local current_time = os.time()
yield_cand(seg, os.date('%H:%M', current_time))
yield_cand(seg, os.date('%H:%M:%S', current_time))
-- 星期
elseif (input == M.week) then
local current_time = os.time()
local week_tab = {'', '', '', '', '', '', ''}
local text = week_tab[tonumber(os.date('%w', current_time) + 1)]
yield_cand(seg, '星期' .. text)
yield_cand(seg, '礼拜' .. text)
yield_cand(seg, '' .. text)
-- ISO 8601/RFC 3339 的时间格式 (固定东八区)(示例 2022-01-07T20:42:51+08:00
elseif (input == M.datetime) then
local current_time = os.time()
yield_cand(seg, os.date('%Y-%m-%dT%H:%M:%S+08:00', current_time))
yield_cand(seg, os.date('%Y-%m-%d %H:%M:%S', current_time))
yield_cand(seg, os.date('%Y%m%d%H%M%S', current_time))
-- 时间戳(十位数,到秒,示例 1650861664
elseif (input == M.timestamp) then
local current_time = os.time()
yield_cand(seg, string.format('%d', current_time))
end
-- -- 显示内存
-- local cand = Candidate("date", seg.start, seg._end, ("%.f"):format(collectgarbage('count')), "")
-- cand.quality = 100
-- yield(cand)
-- if input == "xxx" then
-- collectgarbage()
-- local cand = Candidate("date", seg.start, seg._end, "collectgarbage()", "")
-- cand.quality = 100
-- yield(cand)
-- end
end
return M

36
lua/092wb_helper.lua Normal file
View File

@@ -0,0 +1,36 @@
--- 来源https://github.com/yanhuacuo/98wubi-tables
--- helper.lua
--- List features and usage of the schema.
local function translator(input, seg)
if input:find('^aid$') then
local table = {
{ '拆分显隐', 'Ctrl + Shift + H' }
, { '拼音显隐', 'Ctrl + Shift + J' }
, { '字集切换', 'Ctrl + Shift + U' }
, { '繁简切换', 'Ctrl + Shift + F' }
, { 'Emoji表情', 'Ctrl + Shift + M' }
, { '临时拼音', 'z键引导临时拼音' }
, { '重复历史', 'z键兼有重复历史' }
, { '方案选单', 'Ctrl+Shift+`' }
, { '大写数字', 'R(大写) + 数字' }
, { '公历转农历', 'N(大写) + 20240422' }
, { '农历', 'nl' }
, { '时间', 'time' }
, { '日期', 'date' }
, { '星期', 'week' }
, { '全角', 'Shift + Space' }
, { '撤销上屏', 'Alt+Backspace' }
, { '英文标点', 'Ctrl+ + .' }
, { '帮助', 'aid' }
, { '注释', 'Ctrl + Shift + Return' }
}
for k, v in ipairs(table) do
local cand = Candidate('aid', seg.start, seg._end, v[2], ' ' .. v[1])
cand.preedit = input .. '\t说明'
yield(cand)
end
end
end
return translator

671
lua/092wb_lunar.lua Executable file
View File

@@ -0,0 +1,671 @@
--[[
Lua 阿拉伯数字转中文实现 https://blog.csdn.net/lp12345678910/article/details/121396243
农历功能复制自 https://github.com/boomker/rime-fast-xhup
--]]
-- 数字转中文:
local numerical_units = {
"",
"",
"",
"",
"",
"",
"",
"",
"亿",
"",
"",
"",
"",
"",
"",
"",
}
local numerical_names = {
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
}
local function convert_arab_to_chinese(number)
local n_number = tonumber(number)
assert(n_number, "传入参数非正确number类型!")
-- 0 ~ 9
if n_number < 10 then
return numerical_names[n_number + 1]
end
-- 一十九 => 十九
if n_number < 20 then
local digit = string.sub(n_number, 2, 2)
if digit == "0" then
return ""
else
return "" .. numerical_names[digit + 1]
end
end
--[[
1. 最大输入9位
超过9位string的len加2位因为有.0的两位)
零 ~ 九亿九千九百九十九万九千九百九十九
0 ~ 999999999
2. 最大输入14位超过14位会四舍五入
零 ~ 九十九兆九千九百九十九亿九千九百九十九万九千九百九十九万
0 ~ 99999999999999
--]]
local len_max = 9
local len_number = string.len(number)
assert(
len_number > 0 and len_number <= len_max,
"传入参数位数" .. len_number .. "必须在(0, " .. len_max .. "]之间!"
)
-- 01数字转成表结构存储
local numerical_tbl = {}
for i = 1, len_number do
numerical_tbl[i] = tonumber(string.sub(n_number, i, i))
end
local pre_zero = false
local result = ""
for index, digit in ipairs(numerical_tbl) do
local curr_unit = numerical_units[len_number - index + 1]
local curr_name = numerical_names[digit + 1]
if digit == 0 then
if not pre_zero then
result = result .. curr_name
end
pre_zero = true
else
result = result .. curr_name .. curr_unit
pre_zero = false
end
end
result = string.gsub(result, "零+$", "")
return result
end
-- 农历:
-- 天干名称
local cTianGan = {
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
}
-- 地支名称
local cDiZhi = {
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
}
-- 属相名称
local cShuXiang = {
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
}
-- 农历日期名
local cDayName = {
"初一",
"初二",
"初三",
"初四",
"初五",
"初六",
"初七",
"初八",
"初九",
"初十",
"十一",
"十二",
"十三",
"十四",
"十五",
"十六",
"十七",
"十八",
"十九",
"二十",
"廿一",
"廿二",
"廿三",
"廿四",
"廿五",
"廿六",
"廿七",
"廿八",
"廿九",
"三十",
}
-- 农历月份名
local cMonName = {
"正月",
"二月",
"三月",
"四月",
"五月",
"六月",
"七月",
"八月",
"九月",
"十月",
"冬月",
"腊月",
}
-- 农历数据
local wNongliData = {
"AB500D2",
"4BD0883",
"4AE00DB",
"A5700D0",
"54D0581",
"D2600D8",
"D9500CC",
"655147D",
"56A00D5",
"9AD00CA",
"55D027A",
"4AE00D2",
"A5B0682",
"A4D00DA",
"D2500CE",
"D25157E",
"B5500D6",
"56A00CC",
"ADA027B",
"95B00D3",
"49717C9",
"49B00DC",
"A4B00D0",
"B4B0580",
"6A500D8",
"6D400CD",
"AB5147C",
"2B600D5",
"95700CA",
"52F027B",
"49700D2",
"6560682",
"D4A00D9",
"EA500CE",
"6A9157E",
"5AD00D6",
"2B600CC",
"86E137C",
"92E00D3",
"C8D1783",
"C9500DB",
"D4A00D0",
"D8A167F",
"B5500D7",
"56A00CD",
"A5B147D",
"25D00D5",
"92D00CA",
"D2B027A",
"A9500D2",
"B550781",
"6CA00D9",
"B5500CE",
"535157F",
"4DA00D6",
"A5B00CB",
"457037C",
"52B00D4",
"A9A0883",
"E9500DA",
"6AA00D0",
"AEA0680",
"AB500D7",
"4B600CD",
"AAE047D",
"A5700D5",
"52600CA",
"F260379",
"D9500D1",
"5B50782",
"56A00D9",
"96D00CE",
"4DD057F",
"4AD00D7",
"A4D00CB",
"D4D047B",
"D2500D3",
"D550883",
"B5400DA",
"B6A00CF",
"95A1680",
"95B00D8",
"49B00CD",
"A97047D",
"A4B00D5",
"B270ACA",
"6A500DC",
"6D400D1",
"AF40681",
"AB600D9",
"93700CE",
"4AF057F",
"49700D7",
"64B00CC",
"74A037B",
"EA500D2",
"6B50883",
"5AC00DB",
"AB600CF",
"96D0580",
"92E00D8",
"C9600CD",
"D95047C",
"D4A00D4",
"DA500C9",
"755027A",
"56A00D1",
"ABB0781",
"25D00DA",
"92D00CF",
"CAB057E",
"A9500D6",
"B4A00CB",
"BAA047B",
"AD500D2",
"55D0983",
"4BA00DB",
"A5B00D0",
"5171680",
"52B00D8",
"A9300CD",
"795047D",
"6AA00D4",
"AD500C9",
"5B5027A",
"4B600D2",
"96E0681",
"A4E00D9",
"D2600CE",
"EA6057E",
"D5300D5",
"5AA00CB",
"76A037B",
"96D00D3",
"4AB0B83",
"4AD00DB",
"A4D00D0",
"D0B1680",
"D2500D7",
"D5200CC",
"DD4057C",
"B5A00D4",
"56D00C9",
"55B027A",
"49B00D2",
"A570782",
"A4B00D9",
"AA500CE",
"B25157E",
"6D200D6",
"ADA00CA",
"4B6137B",
"93700D3",
"49F08C9",
"49700DB",
"64B00D0",
"68A1680",
"EA500D7",
"6AA00CC",
"A6C147C",
"AAE00D4",
"92E00CA",
"D2E0379",
"C9600D1",
"D550781",
"D4A00D9",
"DA400CD",
"5D5057E",
"56A00D6",
"A6C00CB",
"55D047B",
"52D00D3",
"A9B0883",
"A9500DB",
"B4A00CF",
"B6A067F",
"AD500D7",
"55A00CD",
"ABA047C",
"A5A00D4",
"52B00CA",
"B27037A",
"69300D1",
"7330781",
"6AA00D9",
"AD500CE",
"4B5157E",
"4B600D6",
"A5700CB",
"54E047C",
"D1600D2",
"E960882",
"D5200DA",
"DAA00CF",
"6AA167F",
"56D00D7",
"4AE00CD",
"A9D047D",
"A2D00D4",
"D1500C9",
"F250279",
"D5200D1",
}
-- 十进制转二进制
local function Dec2bin(n)
local t, t1
local tables = {}
t = tonumber(n)
while math.floor(t / 2) >= 1 do
t1 = t and math.fmod(t, 2)
if t1 > 0 then
if #tables > 0 then
table.insert(tables, 1, 1)
else
tables[1] = 1
end
else
if #tables > 0 then
table.insert(tables, 1, 0)
else
tables[1] = 0
end
end
t = math.floor(t / 2)
if t == 1 then
if #tables > 0 then
table.insert(tables, 1, 1)
else
tables[1] = 1
end
end
end
return string.gsub(table.concat(tables), "^[0]+", "")
end
-- 2/10/16进制互转
local function Atoi(x, inPuttype, outputtype)
local r
if tonumber(inPuttype) == 2 then
if tonumber(outputtype) == 10 then -- 2进制-->10进制
r = tonumber(tostring(x), 2)
-- elseif tonumber(outputtype) == 16 then -- 2进制-->16进制
-- r = bin2hex(tostring(x))
end
elseif tonumber(inPuttype) == 10 then
if tonumber(outputtype) == 2 then -- 10进制-->2进制
r = Dec2bin(tonumber(x))
elseif tonumber(outputtype) == 16 then -- 10进制-->16进制
r = string.format("%x", x)
end
elseif tonumber(inPuttype) == 16 then
if tonumber(outputtype) == 2 then -- 16进制-->2进制
r = Dec2bin(tonumber(tostring(x), 16))
elseif tonumber(outputtype) == 10 then -- 16进制-->10进制
r = tonumber(tostring(x), 16)
end
end
return r
end
-- 农历16进制数据分解
local function Analyze(Data)
local rtn1, rtn2, rtn3, rtn4
rtn1 = Atoi(string.sub(Data, 1, 3), 16, 2)
if string.len(rtn1) < 12 then
rtn1 = "0" .. rtn1
end
rtn2 = string.sub(Data, 4, 4)
rtn3 = Atoi(string.sub(Data, 5, 5), 16, 10)
rtn4 = Atoi(string.sub(Data, -2, -1), 16, 10)
if string.len(rtn4) == 3 then
rtn4 = "0" .. Atoi(string.sub(Data, -2, -1), 16, 10)
end
-- string.gsub(rtn1, "^[0]*", "")
return { rtn1, rtn2, rtn3, rtn4 }
end
-- 年天数判断
local function IsLeap(y)
local year = tonumber(y)
if not year then
return nil
end
if math.fmod(year, 400) ~= 0 and math.fmod(year, 4) == 0 or math.fmod(year, 400) == 0 then
return 366
else
return 365
end
end
-- 返回当年过了多少天
local function leaveDate(y)
local day, total
total = 0
if IsLeap(tonumber(string.sub(y, 1, 4))) > 365 then
day = { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
else
day = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
end
if tonumber(string.sub(y, 5, 6)) > 1 then
for i = 1, tonumber(string.sub(y, 5, 6)) - 1 do
total = total + day[i]
end
total = total + tonumber(string.sub(y, 7, 8))
else
return tonumber(string.sub(y, 7, 8))
end
return tonumber(total)
end
-- 计算日期差两个8位数日期之间相隔的天数date2>date1
local function diffDate(date1, date2)
local n, total
total = 0
date1 = tostring(date1)
date2 = tostring(date2)
if tonumber(date2) > tonumber(date1) then
n = tonumber(string.sub(date2, 1, 4)) - tonumber(string.sub(date1, 1, 4))
if n > 1 then
for i = 1, n - 1 do
total = total + IsLeap(tonumber(string.sub(date1, 1, 4)) + i)
end
total = total
+ leaveDate(tonumber(string.sub(date2, 1, 8)))
+ IsLeap(tonumber(string.sub(date1, 1, 4)))
- leaveDate(tonumber(string.sub(date1, 1, 8)))
elseif n == 1 then
total = IsLeap(tonumber(string.sub(date1, 1, 4)))
- leaveDate(tonumber(string.sub(date1, 1, 8)))
+ leaveDate(tonumber(string.sub(date2, 1, 8)))
else
total = leaveDate(tonumber(string.sub(date2, 1, 8))) - leaveDate(tonumber(string.sub(date1, 1, 8)))
-- print(date1 .. "-" .. date2)
end
elseif tonumber(date2) == tonumber(date1) then
return 0
else
return -1
end
return total
end
-- 公历转农历支持转化范围公元1900-2100年
-- 公历日期 Gregorian:格式 YYYYMMDD
-- <返回值>农历日期 中文 天干地支属相
local function Date2LunarDate(Gregorian)
Gregorian = tostring(Gregorian)
local Year, Month, Day, Pos, Data0, Data1, MonthInfo, LeapInfo, Leap, Newyear, LYear, thisMonthInfo
Year = tonumber(Gregorian.sub(Gregorian, 1, 4))
Month = tonumber(Gregorian.sub(Gregorian, 5, 6))
Day = tonumber(Gregorian.sub(Gregorian, 7, 8))
if Year > 2100 or Year < 1899 or Month > 12 or Month < 1 or Day < 1 or Day > 31 or string.len(Gregorian) < 8 then
return "无效日期", "无效日期"
end
-- 获取两百年内的农历数据
Pos = Year - 1900 + 2
Data0 = wNongliData[Pos - 1]
Data1 = wNongliData[Pos]
-- 判断农历年份
local tb1 = Analyze(Data1)
MonthInfo = tb1[1]
LeapInfo = tb1[2]
Leap = tb1[3]
Newyear = tb1[4]
local Date1 = Year .. Newyear
local Date2 = Gregorian
local Date3 = diffDate(Date1, Date2) -- 和当年农历新年相差的天数
if Date3 < 0 then
-- print(Data0 .. "-2")
tb1 = Analyze(Data0)
Year = Year - 1
MonthInfo = tb1[1]
LeapInfo = tb1[2]
Leap = tb1[3]
Newyear = tb1[4]
Date1 = Year .. Newyear
Date2 = Gregorian
Date3 = diffDate(Date1, Date2)
-- print(Date2 .. "--" .. Date1 .. "--" .. Date3)
end
Date3 = Date3 + 1
LYear = Year -- 农历年份,就是上面计算后的值
if Leap > 0 then -- 有闰月
thisMonthInfo = string.sub(MonthInfo, 1, tonumber(Leap)) .. LeapInfo .. string.sub(MonthInfo, Leap + 1)
else
thisMonthInfo = MonthInfo
end
local thisMonth, thisDays, LMonth, LDay, Isleap, LunarDate, LunarDate2, LunarYear, LunarMonth
for i = 1, 13 do
thisMonth = string.sub(thisMonthInfo, i, i)
thisDays = 29 + thisMonth
if Date3 > thisDays then
Date3 = Date3 - thisDays
else
if Leap > 0 then
if Leap >= i then
LMonth = i
Isleap = 0
else
LMonth = i - 1
if i - Leap == 1 then
Isleap = 1
else
Isleap = 0
end
end
else
LMonth = i
Isleap = 0
end
LDay = math.floor(Date3)
break
end
end
if Isleap > 0 then
LunarMonth = "" .. cMonName[LMonth]
else
LunarMonth = cMonName[LMonth]
end
local _nis = tostring(LYear)
local _LunarYears = ""
for i = 1, _nis:len() do
local _ni_digit = tonumber(_nis:sub(i, i))
_LunarYears = _LunarYears .. convert_arab_to_chinese(_ni_digit)
end
LunarYear = string.gsub(_LunarYears, "", "")
LunarDate = cTianGan[math.fmod(LYear - 4, 10) + 1]
.. cDiZhi[math.fmod(LYear - 4, 12) + 1]
.. "年("
.. cShuXiang[math.fmod(LYear - 4, 12) + 1]
.. ""
.. LunarMonth
.. cDayName[LDay]
LunarDate2 = LunarYear .. "" .. LunarMonth .. cDayName[LDay]
return LunarDate, LunarDate2
end
-- 农历
-- 从 lunar: nl 获取农历触发关键字(双拼默认为 lunar
-- 从 recognizer/patterns/gregorian_to_lunar 获取第 2 个字符作为公历转农历的触发前缀,默认为 N
local function translator(input, seg, env)
env.lunar_key_word = env.lunar_key_word or
(env.engine.schema.config:get_string(env.name_space:gsub('^*', '')) or 'nl')
env.gregorian_to_lunar = env.gregorian_to_lunar or
(env.engine.schema.config:get_string('recognizer/patterns/gregorian_to_lunar'):sub(2, 2) or 'N')
if input == env.lunar_key_word then
local date1, date2 = Date2LunarDate(os.date("%Y%m%d"))
local lunar_ymd = (Candidate("", seg.start, seg._end, date2, ""))
lunar_ymd.quality = 999
yield(lunar_ymd)
local lunar_date = Candidate("", seg.start, seg._end, date1, "")
lunar_date.quality = 999
yield(lunar_date)
elseif env.gregorian_to_lunar ~= '' and input:sub(1, 1) == env.gregorian_to_lunar then
local date1, date2 = Date2LunarDate(input:sub(2))
local lunar_ymd = (Candidate("", seg.start, seg._end, date2, ""))
lunar_ymd.quality = 999
yield(lunar_ymd)
local lunar_date = Candidate("", seg.start, seg._end, date1, "")
lunar_date.quality = 999
yield(lunar_date)
end
end
return translator

153
lua/092wb_new_spelling.lua Normal file
View File

@@ -0,0 +1,153 @@
--- 拆分提示
--- 来源https://github.com/yanhuacuo/98wubi-tables
local basic = require('092wb_basic')
local map = basic.map
local index = basic.index
local utf8chars = basic.utf8chars
local matchstr = basic.matchstr
local function xform(input)
if input == "" then return "" end
input = input:gsub('%[', '')
input = input:gsub('%]', '')
input = input:gsub('', ' ')
input = input:gsub('_', ' ')
input = input:gsub(',', '·')
return input
end
local function subspelling(str, ...)
local first, last = ...
if not first then return str end
local radicals = {}
local s = str
s = s:gsub('{', ' {')
s = s:gsub('}', '} ')
for seg in s:gmatch('%S+') do
if seg:find('^{.+}$') then
table.insert(radicals, seg)
else
for pos, code in utf8.codes(seg) do
table.insert(radicals, utf8.char(code))
end
end
end
return table.concat{ table.unpack(radicals, first, last) }
end
local function lookup(db)
return function (str)
return db:lookup(str)
end
end
local function parse_spll(str)
local s = string.gsub(str, ',.*', '')
return string.gsub(s, '^%[', '')
end
local function spell_phrase(s, spll_rvdb)
local chars = utf8chars(s)
local rvlk_results
if #chars == 2 or #chars == 3 then
rvlk_results = map(chars, lookup(spll_rvdb))
else
rvlk_results = map({chars[1], chars[2], chars[3], chars[#chars]},
lookup(spll_rvdb))
end
if index(rvlk_results, '') then return '' end
local spellings = map(rvlk_results, parse_spll)
local sup = ''
if #chars == 2 then
return subspelling(spellings[1] .. sup, 2, 2) ..
subspelling(spellings[1] .. sup, 4, 4) ..
subspelling(spellings[2] .. sup, 2, 2) ..
subspelling(spellings[2] .. sup, 4, 4)
elseif #chars == 3 then
return subspelling(spellings[1], 2, 2) ..
subspelling(spellings[2], 2, 2) ..
subspelling(spellings[3] .. sup, 2, 2) ..
subspelling(spellings[3] .. sup, 4, 4)
else
return subspelling(spellings[1], 2, 2) ..
subspelling(spellings[2], 2, 2) ..
subspelling(spellings[3], 2, 2) ..
subspelling(spellings[4], 2, 2)
end
end
local function get_tricomment(cand, env)
local ctext = cand.text
if utf8.len(ctext) == 1 then
local spll_raw = env.spll_rvdb:lookup(ctext)
if spll_raw ~= '' then
if env.engine.context:get_option("new_hide_pinyin") then
-- return xform(spll_raw:gsub('%[(.-,.-),.+%]', '[%1]'))
return xform(spll_raw:gsub('%[(.-),.+%]', '[%1]'))
else
return xform(spll_raw)
end
end
else
local spelling = spell_phrase(ctext, env.spll_rvdb)
if spelling ~= '' then
spelling = spelling:gsub('{(.-)}', '<%1>')
local code = env.code_rvdb:lookup(ctext)
if code ~= '' then
code = matchstr(code, '%S+')
table.sort(code, function(i, j) return i:len() < j:len() end)
code = table.concat(code, ' ')
return ' ' .. spelling .. ' · ' .. code .. ' '
else
return ' ' .. spelling .. ' '
end
end
end
return ''
end
local function filter(input, env)
if env.engine.context:get_option("new_spelling") then
for cand in input:iter() do
if cand.type == 'simplified' and env.name_space == 'new_for_rvlk' then
local comment = get_tricomment(cand, env) .. cand.comment
yield(Candidate("simp_rvlk", cand.start, cand._end, cand.text, comment))
else
local add_comment = ''
if cand.type == 'punct' then
add_comment = env.code_rvdb:lookup(cand.text)
elseif cand.type ~= 'sentence' then
add_comment = get_tricomment(cand, env)
end
if add_comment ~= '' then
if cand.type ~= 'completion' and (
(env.name_space == 'new' and env.is_mixtyping) or
(env.name_space == 'new_for_rvlk')
) then
cand.comment = add_comment
else
cand.comment = add_comment .. cand.comment
end
end
yield(cand)
end
end
else
for cand in input:iter() do yield(cand) end
end
end
local function init(env)
local config = env.engine.schema.config
local spll_rvdb = config:get_string('lua_reverse_db/spelling')
local code_rvdb = config:get_string('lua_reverse_db/code')
local abc_extags_size = config:get_list_size('abc_segmentor/extra_tags')
env.spll_rvdb = ReverseDb('build/' .. spll_rvdb .. '.reverse.bin')
env.code_rvdb = ReverseDb('build/' .. code_rvdb .. '.reverse.bin')
env.is_mixtyping = abc_extags_size > 0
end
return { init = init, func = filter }

View File

@@ -0,0 +1,157 @@
-- 来源 https://github.com/yanhuacuo/98wubi-tables > http://98wb.ysepan.com/
-- 数字、金额大写
-- 触发前缀默认为 recognizer/patterns/number 的第 2 个字符,即 R
local function splitNumPart(str)
local part = {}
part.int, part.dot, part.dec = string.match(str, "^(%d*)(%.?)(%d*)")
return part
end
local function GetPreciseDecimal(nNum, n)
if type(nNum) ~= "number" then nNum = tonumber(nNum) end
n = n or 0;
n = math.floor(n)
if n < 0 then n = 0 end
local nDecimal = 10 ^ n
local nTemp = math.floor(nNum * nDecimal);
local nRet = nTemp / nDecimal;
return nRet;
end
local function decimal_func(str, posMap, valMap)
local dec
posMap = posMap or { [1] = "", [2] = "", [3] = "", [4] = "" }
valMap = valMap or { [0] = "", "", "", "", "", "", "", "", "", "" }
if #str > 4 then dec = string.sub(tostring(str), 1, 4) else dec = tostring(str) end
dec = string.gsub(dec, "0+$", "")
if dec == "" then return "" end
local result = ""
for pos = 1, #dec do
local val = tonumber(string.sub(dec, pos, pos))
if val ~= 0 then result = result .. valMap[val] .. posMap[pos] else result = result .. valMap[val] end
end
result = result:gsub(valMap[0] .. valMap[0], valMap[0])
return result:gsub(valMap[0] .. valMap[0], valMap[0])
end
-- 把数字串按千分位四位数分割,进行转换为中文
local function formatNum(num, t)
local digitUnit, wordFigure
local result = ""
num = tostring(num)
if tonumber(t) < 1 then digitUnit = { "", "", "", "" } else digitUnit = { "", "", "", "" } end
if tonumber(t) < 1 then
wordFigure = { "", "", "", "", "", "", "", "", "", "" }
else
wordFigure = { "", "", "", "", "", "", "", "", "", "" }
end
if string.len(num) > 4 or tonumber(num) == 0 then return wordFigure[1] end
local lens = string.len(num)
for i = 1, lens do
local n = wordFigure[tonumber(string.sub(num, -i, -i)) + 1]
if n ~= wordFigure[1] then result = n .. digitUnit[i] .. result else result = n .. result end
end
result = result:gsub(wordFigure[1] .. wordFigure[1], wordFigure[1])
result = result:gsub(wordFigure[1] .. "$", "")
result = result:gsub(wordFigure[1] .. "$", "")
return result
end
-- 数值转换为中文
local function number2cnChar(num, flag, digitUnit, wordFigure) --flag=0中文小写反之为大写
local result = ""
if tonumber(flag) < 1 then
digitUnit = digitUnit or { [1] = "", [2] = "亿" }
wordFigure = wordFigure or { [1] = "", [2] = "", [3] = "", [4] = "" }
else
digitUnit = digitUnit or { [1] = "", [2] = "亿" }
wordFigure = wordFigure or { [1] = "", [2] = "", [3] = "", [4] = "" }
end
local lens = string.len(num)
if lens < 5 then
result = formatNum(num, flag)
elseif lens < 9 then
result = formatNum(string.sub(num, 1, -5), flag) .. digitUnit[1] .. formatNum(string.sub(num, -4, -1), flag)
elseif lens < 13 then
result = formatNum(string.sub(num, 1, -9), flag) ..
digitUnit[2] ..
formatNum(string.sub(num, -8, -5), flag) .. digitUnit[1] .. formatNum(string.sub(num, -4, -1), flag)
else
result = ""
end
result = result:gsub("^" .. wordFigure[1], "")
result = result:gsub(wordFigure[1] .. digitUnit[1], "")
result = result:gsub(wordFigure[1] .. digitUnit[2], "")
result = result:gsub(wordFigure[1] .. wordFigure[1], wordFigure[1])
result = result:gsub(wordFigure[1] .. "$", "")
if lens > 4 then result = result:gsub("^" .. wordFigure[2] .. wordFigure[3], wordFigure[3]) end
if result ~= "" then result = result .. wordFigure[4] else result = "数值超限!" end
return result
end
local function number2zh(num, t)
local result, wordFigure
result = ""
if tonumber(t) < 1 then
wordFigure = { "", "", "", "", "", "", "", "", "", "" }
else
wordFigure = { "", "", "", "", "", "", "", "", "", "" }
end
if tostring(num) == nil then return "" end
for pos = 1, string.len(num) do
result = result .. wordFigure[tonumber(string.sub(num, pos, pos) + 1)]
end
result = result:gsub(wordFigure[1] .. wordFigure[1], wordFigure[1])
return result:gsub(wordFigure[1] .. wordFigure[1], wordFigure[1])
end
local function number_translatorFunc(num)
local numberPart = splitNumPart(num)
local result = {}
if numberPart.dot ~= "" then
table.insert(result,
{ number2cnChar(numberPart.int, 0, { "", "亿" }, { "", "", "", "" }) .. number2zh(numberPart.dec, 0),
"〔数字小写〕" })
table.insert(result,
{ number2cnChar(numberPart.int, 1, { "", "" }, { "", "", "", "" }) .. number2zh(numberPart.dec, 1),
"〔数字大写〕" })
else
table.insert(result, { number2cnChar(numberPart.int, 0, { "", "亿" }, { "", "", "", "" }), "〔数字小写〕" })
table.insert(result, { number2cnChar(numberPart.int, 1, { "", "" }, { "", "", "", "" }), "〔数字大写〕" })
end
table.insert(result,
{ number2cnChar(numberPart.int, 0) ..
decimal_func(numberPart.dec, { [1] = "", [2] = "", [3] = "", [4] = "" },
{ [0] = "", "", "", "", "", "", "", "", "", "" }), "〔金额小写〕" })
table.insert(result,
{ number2cnChar(numberPart.int, 1) ..
decimal_func(numberPart.dec, { [1] = "", [2] = "", [3] = "", [4] = "" },
{ [0] = "", "", "", "", "", "", "", "", "", "" }), "〔金额大写〕" })
return result
end
local function number_translator(input, seg, env)
-- 获取 recognizer/patterns/number 的第 2 个字符作为触发前缀
env.number_keyword = env.number_keyword or
env.engine.schema.config:get_string('recognizer/patterns/number'):sub(2, 2)
local str, num, numberPart
if env.number_keyword ~= '' and input:sub(1, 1) == env.number_keyword then
str = string.gsub(input, "^(%a+)", "")
numberPart = number_translatorFunc(str)
if str and #str > 0 and #numberPart > 0 then
for i = 1, #numberPart do
yield(Candidate(input, seg.start, seg._end, numberPart[i][1], numberPart[i][2]))
end
end
end
end
-- print(#number_translatorFunc(3355.433))
return number_translator

30
lua/092wb_unicode.lua Normal file
View File

@@ -0,0 +1,30 @@
-- Unicode
-- 复制自: https://github.com/shewer/librime-lua-script/blob/main/lua/component/unicode.lua
-- 示例:输入 U62fc 得到「拼」
-- 触发前缀默认为 recognizer/patterns/unicode 的第 2 个字符,即 U
-- 2024.02.26: 限定编码最大值
local function unicode(input, seg, env)
-- 获取 recognizer/patterns/unicode 的第 2 个字符作为触发前缀
env.unicode_keyword = env.unicode_keyword or
env.engine.schema.config:get_string('recognizer/patterns/unicode'):sub(2, 2)
if seg:has_tag("unicode") and env.unicode_keyword ~= '' and input:sub(1, 1) == env.unicode_keyword then
local ucodestr = input:match(env.unicode_keyword .. "(%x+)")
if ucodestr and #ucodestr > 1 then
local code = tonumber(ucodestr, 16)
if code > 0x10FFFF then
yield(Candidate("unicode", seg.start, seg._end, "数值超限!", ""))
return
end
local text = utf8.char(code)
yield(Candidate("unicode", seg.start, seg._end, text, string.format("U%x", code)))
if code < 0x10000 then
for i = 0, 15 do
local text = utf8.char(code * 16 + i)
yield(Candidate("unicode", seg.start, seg._end, text, string.format("U%x~%x", code, i)))
end
end
end
end
end
return unicode