မဝ်ဂျူ:string/char
Reimplementation of mw.ustring.char
in pure Lua, which is faster up to around 15 arguments, and much faster if given only one argument.
local char = string.char
local concat = table.concat
local tonumber = tonumber
local function err(cp)
error("Codepoint " .. cp .. " is out of range: codepoints must be between 0x0 and 0x10FFFF.", 2)
end
local function utf8_char(cp)
cp = tonumber(cp)
if cp < 0 then
err("-0x" .. ("%X"):format(-cp + 1))
elseif cp < 0x80 then
return char(cp)
elseif cp < 0x800 then
return char(
0xC0 + cp / 0x40,
0x80 + cp % 0x40
)
elseif cp < 0x10000 then
if cp >= 0xD800 and cp < 0xE000 then
return "?" -- mw.ustring.char returns "?" for surrogates.
end
return char(
0xE0 + cp / 0x1000,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
)
elseif cp < 0x110000 then
return char(
0xF0 + cp / 0x40000,
0x80 + cp / 0x1000 % 0x40,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
)
end
err("0x" .. ("%X"):format(cp))
end
return function(cp, ...)
if ... == nil then
return utf8_char(cp)
end
local ret = {cp, ...}
for i = 1, #ret do
ret[i] = utf8_char(ret[i])
end
return concat(ret)
end