မဝ်ဂျူ:Unicode data: အရာမတၞဟ်ခြာ အကြာ မူတၞဟ်ဂမၠိုင်

Content deleted Content added
မကလေၚ်ပလီုထောံ95868နူကဵု咽頭べさ (ဓရီုကျာ)မပလေဝ်ဒါန်လဝ်
ဒစင်ဂမၠိုင်: ဟွံပ မကလေၚ်ပလေဝ်ဒါန်လဝ်
No edit summary
ဒစင်ဂမၠိုင်: လက်ဖြင့် နောက်ပြန်ပြင်ခြင်း မကလေၚ်ပလေဝ်ဒါန်လဝ်
လာင် ၁-
local m_str_utils = require("Module:string utilities")
 
local cp = m_str_utils.codepoint
local gcodepoint = m_str_utils.gcodepoint
local gsub = m_str_utils.gsub
local u = m_str_utils.char
 
local export = {}
local udata = mw.loadData("Module:Unicode data/data")
 
local floor = math.floor
Line ၈၈ ⟶ ၉၆:
return ("TANGUT COMPONENT-%03d"):format(codepoint - 0x187FF)
end },
{ 0x18D00, 0x18D08, "TANGUT IDEOGRAPH-%04X" }, -- Tangut
{ 0x18B00, 0x18CD5, "KHITAN SMALL SCRIPT CHARACTER-%04X" },
{ 0x1B1700x18D00, 0x1B2FB0x18D08, "NUSHUTANGUT CHARACTERIDEOGRAPH-%04X" }, -- NushuTangut Supplement
{ 0x18D000x1B170, 0x18D080x1B2FB, "TANGUTNUSHU IDEOGRAPHCHARACTER-%04X" }, -- TangutNüshu
{ 0x20000, 0x2A6DF, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension B
{ 0x2A700, 0x2B7380x2B739, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension C
{ 0x2A7400x2B740, 0x2B81D, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension D
{ 0x2B820, 0x2CEA1, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension E
{ 0x2CEB0, 0x2EBE0, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension F
{ 0x2EBF0, 0x2EE5D, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension I
-- CJK Compatibility Ideographs Supplement (Supplementary Ideographic Plane)
{ 0x2F800, 0x2FA1D, "CJK COMPATIBILITY IDEOGRAPH-%04X" },
{ 0x30000, 0x3134A, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension G
{ 0x31350, 0x323AF, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension H
{ 0xE0100, 0xE01EF, function (codepoint) -- Variation Selectors Supplement
return ("VARIATION SELECTOR-%d"):format(codepoint - 0xE0100 + 17)
Line ၁၀၆ ⟶ ၁၁၆:
}
name_hooks.length = #name_hooks
--[[ Add another - in this line to test the code point ordering in name_hooks.
local planesi = {1
local function print_it(a, b, c)
if type(c) == "string" then
mw.log(c:format(a), c:format(b))
else
mw.log(c(a), c(b))
end
end
while true do
local first, second = name_hooks[i], name_hooks[i + 1]
if not (first and second) then break end
local message
if not (first[1] < first[2] and first[2] < second[1] and second[1] < second[2]) then
message = "Bad name label ordering at index " .. i .. ":"
elseif second[1] == first[2] + 1 and second[3] == first[3] then
message = "Name hooks can be merged at index " .. i .. ":"
end
if message then
mw.log(message)
print_it(unpack(first))
print_it(unpack(second))
end
i = i + 1
end
--]]
 
local name_range_cache
Line ၁၇၂ ⟶ ၂၀၈:
end
end
 
local planes = {
[ 0] = "Basic Multilingual Plane";
[ 1] = "Supplementary Multilingual Plane";
[ 2] = "Supplementary Ideographic Plane";
[ 3] = "Tertiary Ideographic Plane";
[14] = "Supplementary Special-purpose Plane";
[15] = "Supplementary Private Use Area-A";
[16] = "Supplementary Private Use Area-B";
}
 
-- Load [[Module:Unicode data/blocks]] if needed and assign it to this variable.
Line ၂၁၇ ⟶ ၂၄၃:
function export.lookup_plane(codepoint)
local i = floor(codepoint / 0x10000)
return udata.planes[i] or ("Plane %u"):format(i)
end
 
Line ၂၄၁ ⟶ ၂၆၇:
local has_nonws = false
 
for cpcodepoint in mw.ustring.gcodepoint(pagename) do
if (cpcodepoint == 0x0023) -- #
or (cpcodepoint == 0x005B) -- [
or (cpcodepoint == 0x005D) -- ]
or (cpcodepoint == 0x007B) -- {
or (cpcodepoint == 0x007C) -- |
or (cpcodepoint == 0x007D) -- }
or (cpcodepoint == 0x180E) -- MONGOLIAN VOWEL SEPARATOR
or ((cpcodepoint >= 0x2000) and (cpcodepoint <= 0x200A)) -- spaces in General Punctuation block
or (cpcodepoint == 0xFFFD) -- REPLACEMENT CHARACTER
then
return false
end
 
local printable, result = export.is_printable(cpcodepoint)
if not printable then
return false
Line ၃၆၂ ⟶ ၃၈၈:
 
function export.add_dotted_circle(str)
return (mw.ustring.gsub(str, ".",
function(char)
if export.is_combining(mw.ustring.codepointcp(char)) then
return '◌' .. char
end
Line ၄၀၅ ⟶ ၄၃၁:
end,
"Zzzz")
 
local unsupported_title = {
[0x0020] = "Unsupported titles/Space";
[0x0023] = "Unsupported titles/Number sign";
[0x002E] = "Unsupported titles/Full stop";
[0x003A] = "Unsupported titles/Colon";
[0x003C] = "Unsupported titles/Less than";
[0x003E] = "Unsupported titles/Greater than";
[0x005B] = "Unsupported titles/Left square bracket";
[0x005D] = "Unsupported titles/Right square bracket";
[0x005F] = "Unsupported titles/Low line";
[0x007B] = "Unsupported titles/Left curly bracket";
[0x007C] = "Unsupported titles/Vertical line";
[0x007D] = "Unsupported titles/Right curly bracket";
[0x1680] = "Unsupported titles/Ogham space";
[0xFFFD] = "Unsupported titles/Replacement character";
}
 
function export.get_entry_title(codepoint)
if udata.unsupported_title[codepoint] then
return udata.unsupported_title[codepoint]
end
if lookup_control(codepoint) ~= "assigned" then
return nil
end
return mw.ustring.charu(codepoint)
end
 
ကလေၚ်သီကေတ်လဝ် နူ "https://mnw.wiktionary.org/wiki/မဝ်ဂျူ:Unicode_data"