မဝ်ဂျူ:zh-new
Documentation for this module may be created at မဝ်ဂျူ:zh-new/doc
local M = require('Module:zh')
local len = mw.ustring.len
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local find = mw.ustring.find
local pos_aliases_title = {
["n"] = "Noun",
["pn"] = "Proper noun",
["propn"] = "Proper noun",
["pron"] = "Pronoun",
["v"] = "Verb",
["a"] = "Adjective",
["adj"] = "Adjective",
["adv"] = "Adverb",
["prep"] = "Preposition",
["postp"] = "Postposition",
["conj"] = "Conjunction",
["part"] = "Particle",
["suf"] = "Suffix",
["prov"] = "Proverb",
["id"] = "Idiom",
["ch"] = "Idiom",
["cy"] = "Idiom",
["ph"] = "Phrase",
["intj"] = "Interjection",
["cl"] = "Classifier",
["num"] = "Numeral",
["abb"] = "Abbreviation",
["deter"] = "Determiner",
}
-- This is now used only as an inverse alias table.
local pos_aliases_head = {
["n"] = "noun",
["pn"] = "proper noun",
["propn"] = "proper noun",
["v"] = "verb",
["a"] = "adj",
["postp"] = "post",
["conj"] = "con",
["part"] = "particle",
["pron"] = "pronoun",
["prov"] = "proverb",
["id"] = "idiom",
["ch"] = "idiom",
["cy"] = "idiom",
["ph"] = "phrase",
["intj"] = "interj",
["abb"] = "abbr",
["cl"] = "cls",
["deter"] = "det",
}
function M.pytemp(text,comp,pos,p,is_erhua)
local m_cmn_pron = mw.loadData("Module:zh/data/cmn-pron")
local wordlist_1, wordlist_2, wordlist_3 = mw.loadData("Module:zh/data/wordlist/1"), mw.loadData("Module:zh/data/wordlist/2"), mw.loadData("Module:zh/data/wordlist/3")
if not is_erhua then is_erhua = false end
if type(text) == 'table' then text,comp,pos = text.args[1],text.args[2],text.args[3] or 'n' end
comp = comp or ''
local q = {}
local sum = 0
local wordlist_result = wordlist_1[text] or wordlist_2[text] or wordlist_3[text] or nil
local moe_pron = wordlist_result and mw.text.split(wordlist_result, " ") or {}
local textconv = M.ts(text)
local length = len(text)
if is_erhua == true then
length = length - 1
textconv = sub(textconv, 1, length)
end
text = ''
if comp ~= '' and comp ~= '12' and comp ~= '21' and not is_erhua then
for i = 1, len(comp) do
sum = sum + tonumber(sub(comp,i,i))
q[sum] = 'y'
end
end
if not p then p={} end
for i = 1, length do
if p[i] and p[i] ~= '' then --pronunciation supplied
text = text .. p[i]
else
local char = sub(textconv,i,i)
if ('一不期績绩蹟跡迹嵌框微突帆藩擊击夾夹鞠拈夕汐矽昔惜息危椰濤涛叔寂馴驯築筑質质播究菌矻識识穴膜餾馏企辱署偽伪蹈諷讽斂敛坊樸朴儲储剖檔档髮轍辙賜赐堤壑酵括懾慑蝸蜗淆攜携崖癌暫暂蟄蛰驟骤液血酪嘌覲幀蕁曳室癬癣亞亚穹褐貯贮淑場场踮鱒跌擁綏胺翕煦伐髮眶湮櫛栉萎閩闽銨铵鑿凿鈸钹謅诌雌綜综摑掴癖'):find(char, 1, true) then
text = text .. char
else
char = moe_pron[i] or m_cmn_pron.py[char] or char
if i ~= 1 and find(char,'^[aoeāōēáóéǎǒěàòè]') then
char = "'" .. char
end
text = text .. char
end
end
if q[i] == 'y' and i ~= length and not is_erhua and pos ~= 'cy' then text = text .. ' ' end
end
text = gsub(text," '"," ")
if pos == 'pn' or pos == 'propn' then
local characters = mw.text.split(text,' ')
for i = 1, #characters do
characters[i] = mw.language.getContentLanguage():ucfirst(characters[i])
end
text = table.concat(characters,' ')
end
return text
end
function M.pytemp_er(text,comp,pos,p)
return M.pytemp(text,comp,pos,p,true)
end
function M.hzbox(title,comp,e,alt,gloss,lit,t2,t3,delink)
if type(title) == 'table' then title,comp = title.args[1],title.args[2] end
local id = M.ts_determ(title)
local text = '{{zh-forms'
if e and e ~= "" then text = text .. '|' .. e end
if id == 'trad' then
text = text .. '|s=' .. M.ts(title)
end
text = text .. ((t2 and t2 ~= "") and ('|t2=' .. t2) or '')
text = text .. ((t3 and t3 ~= "") and ('|t3=' .. t3) or '')
text = text .. ((comp and comp ~= "") and ('|type=' .. comp) or '')
text = text .. ((alt and alt ~= "") and '|alt=' .. alt or '')
text = text .. ((gloss and gloss ~= "") and '|gloss=' .. gloss or '')
text = text .. ((lit and lit ~= "") and '|lit=' .. lit or '')
text = text .. ((delink and delink ~= "") and '|delink=' .. delink or '')
return text .. '}}'
end
function M.hzbox_er(title)
if type(title) == 'table' then title = title.args[1] end
local length = len(title)
local id
if sub(title, length, length) == '兒' then id = 'trad' else id = 'simp' end
title = sub(title, 1, length-1)
local text = '{{zh-hanzi-box|'
if id == 'simp' then
text = (text .. '[[' .. title .. ']][[儿]]|[[' .. M.st(title) .. '兒]]}}')
else
text = (text .. '[[' .. M.ts(title) .. '儿]]|[[' .. title .. ']][[兒]]}}')
end
return text
end
function M.create_er(f)
return M.create(f,true)
end
function M.semantics(text,name,sem)
local orig_text = text
if sem[1] and sem[1] ~= '' then
text = (text .. '\n\n====' .. name .. '====')
if name == 'Derived terms' or name == 'Compounds' then
if sem[1] == 'a' or find(sem[1], '^a,') then
local zh_der = mw.getCurrentFrame():preprocess('{{subst:zh-new/der' .. gsub(gsub(sem[1], '^a', ''), ',', "|") .. '}}')
if zh_der == '{{zh-der|}}' then return orig_text end
text = text .. '\n' .. zh_der
else
text = text .. '\n{{zh-der'
for i = 1, #sem do
text = text .. '|' .. sem[i]
end
text = text .. '}}'
end
else
for i = 1, #sem do
text = text .. '\n* {{zh-l|' .. sem[i] .. '}}'
end
end
end
return text
end
local function checkpos(pos)
for poscode,posname in pairs(pos_aliases_head) do
if pos == posname then
return poscode
end
end
for poscode,posname in pairs(pos_aliases_title) do
if pos == posname then
return poscode
end
end
return pos
end
function M.postitle(pos)
pos = pos or ''
if pos == '' then pos = 'n' end
return pos_aliases_title[pos] or pos
end
function M.poshead(pos)
return mw.ustring.lower(M.postitle(pos))
end
function M.newDer(frame)
local title = mw.title.getCurrentTitle().subpageText
local prefix = "Module:zh/data/wordlist/"
local args = frame:getParent().args
local limit = args["limit"] and tonumber(args["limit"]) or false
local char_pronunciation = args["p"] or false
local fold = args["fold"] or false
local hide_pron = args["hide_pron"] or false
local big = args["big"] or false
local result = {}
for _, arg in ipairs(args) do
table.insert(result, arg)
end
local i = 1
if big then
while i < 3 do
local wordlist = require(prefix .. 'big' .. tostring(i)).list
for _, word in ipairs(wordlist) do
if match(word, title) and word ~= title and not (len(title) == 1 and len(word) > (limit or 4)) then
table.insert(result, word)
end
end
i = i + 1
end
else
while i < 4 do
local wordlist = require(prefix .. tostring(i))
for word, pronunciation in pairs(wordlist) do
if match(word, title) and word ~= title and not (len(title) == 1 and len(word) > (limit or 4)) then
if char_pronunciation then
if mw.text.split(pronunciation, " ")[find(word, title)] == char_pronunciation then
table.insert(result, word)
end
else
table.insert(result, word)
end
end
end
i = i + 1
end
end
local hash, res = {}, {}
for _, element in ipairs(result) do
local section = mw.text.split(element, ":")[1]
if not hash[section] then
res[#res + 1] = element
hash[section] = true
end
end
return "{{zh-der|" .. (hide_pron and "hide_pron=1|" or "") .. (fold and "fold=1|" or "") .. table.concat(res, "|") .. "}}"
end
function M.check_yue(title, c)
local ret = {}
if mw.ustring.len(title) > 1 then -- do not do anything on hanzi pages. cf. 宑&diff=49855439
c = gsub(c, ", ", " ")
c = gsub(c, " *%.%.%. *", " ")
for phrase in mw.text.gsplit(c, ",") do
local c_set = mw.text.split(phrase, " ")
i = 0
for ch in mw.text.gsplit(mw.ustring.gsub(title, "[…,]", ""), "") do
i = i + 1
if mw.title.new(ch).exists then
local content = mw.title.new(ch):getContent()
local templates = mw.ustring.gmatch(content, "|c=([^};|\n]+)")
local prons = {}
for template in templates do
for indiv_pron in mw.text.gsplit(template, ",") do
prons[indiv_pron] = true
end
end
if not prons[c_set[i]] then
table.insert(ret, "{{attention|yue|Cantonese pronunciation '" .. c_set[i] .. "' not found in the entry " .. ch .. ".}}")
end
end
end
end
end
return #ret > 0 and (table.concat(ret, '\n') .. '\n\n') or ''
end
function M.create(f,is_erhua)
if not is_erhua then is_erhua = false end
local title = mw.title.getCurrentTitle().text
local params = {
["type"] = {}, ["comp"] = {alias_of = "type"},
[1] = {list = true, allow_holes = true},
["pos"] = {list = true, allow_holes = true},
["def"] = {list = true, allow_holes = true},
["e1"] = {list = "e", allow_holes=true}, ["etym1"] = {list = "etym", allow_holes=true}, ["etymology1"] = {list = "etymology", allow_holes=true}, ["meaning"] = {list = true, allow_holes=true},
["k"] = {}, ["ko"] = {alias_of = "k"}, ["korean"] = {alias_of = "k"},
["kt"] = {}, ["tr"] = {alias_of = "kt"}, ["transcription"] = {alias_of = "kt"}, ["ktr"] = {alias_of = "kt"}, ["kotr"] = {alias_of = "kt"}, ["koreantr"] = {alias_of = "kt"}, ["ktrans"] = {alias_of = "kt"}, ["kotrans"] = {alias_of = "kt"},
["ke"] = {}, ["kodef"] = {alias_of = "ke"}, ["kodefinition"] = {alias_of = "ke"}, ["koreandef"] = {alias_of = "ke"},
["v"] = {}, ["vi"] = {alias_of = "v"}, ["vietnam"] = {alias_of = "v"},
["ve"] = {}, ["videf"] = {alias_of = "ve"}, ["videfinition"] = {alias_of = "ve"}, ["vietnamdef"] = {alias_of = "ve"}, ["vietnamdefinition"] = {alias_of = "ve"},
["p"] = {list = true, allow_holes=true}, ["pron"] = {list = true, allow_holes=true}, ["pronunciation"] = {list = true, allow_holes=true},
["go"] = {},
["e"] = {}, ["etym"] = {alias_of = "e"}, ["etymology"] = {alias_of = "e"}, ["origin"] = {alias_of = "e"}, ["ori"] = {alias_of = "e"}, ["o"] = {alias_of = "e"},
["syn"] = {list = true}, ["synonym"] = {list = true},
["ant"] = {list = true}, ["antonym"] = {list = true},
["hyper"] = {list = true}, ["hypernym"] = {list = true},
["hypo"] = {list = true}, ["hyponym"] = {list = true},
["coo"] = {list = true}, ["coord"] = {list = true}, ["coordinate"] = {list = true},
["der"] = {list = true}, ["deriv"] = {list = true}, ["derived"] = {list = true}, ["derivedterm"] = {list = true},
["rel"] = {list = true}, ["related"] = {list = true},
["also"] = {list = true}, ["see"] = {list = true}, ["seealso"] = {list = true}, ["alsosee"] = {list = true},
["wp"] = {}, ["wiki"] = {alias_of = "wp"}, ["wikipedia"] = {alias_of = "wp"},
["cat"] = {list = true}, ["category"] = {list = true}, ["categories"] = {list = true}, ["categorize"] = {list = true}, ["categorise"] = {list = true}, ["categorization"] = {list = true}, ["categorisation"] = {list = true},
["pic"] = {}, ["file"] = {alias_of = "pic"}, ["image"] = {alias_of = "pic"}, ["picture"] = {alias_of = "pic"},
["piccap"] = {}, ["caption"] = {alias_of = "piccap"}, ["description"] = {alias_of = "piccap"}, ["desc"] = {alias_of = "piccap"},
["er"] = {}, ["erhua"] = {alias_of = "er"},
["tl"] = {}, ["toneless"] = {alias_of = "tl"}, ["tonelessvariant"] = {alias_of = "tl"}, ["variant"] = {alias_of = "tl"}, ["variation"] = {alias_of = "tl"}, ["tonelessvariation"] = {alias_of = "tl"},
["a"] = {}, ["audio"] = {alias_of = "a"}, ["listen"] = {alias_of = "a"}, ["sound"] = {alias_of = "a"}, ["pronounced"] = {alias_of = "a"},
["alt"] = {}, ["alter"] = {alias_of = "alt"}, ["altern"] = {alias_of = "alt"}, ["alternate"] = {alias_of = "alt"}, ["alternative"] = {alias_of = "alt"},
["c"] = {}, ["cant"] = {alias_of = "c"}, ["cantonese"] = {alias_of = "c"},
["mn"] = {}, ["nan"] = {alias_of = "mn"}, ["minnan"] = {alias_of = "mn"},
["w"] = {}, ["wu"] = {alias_of = "w"}, ["shanghai"] = {alias_of = "w"},
["m"] = {},
["m-s"] = {},
["dg"] = {},
["c-t"] = {},
["g"] = {},
["h"] = {},
["j"] = {},
["mb"] = {},
["md"] = {},
["mn-t"] = {},
["x"] = {},
["mc"] = {},
["oc"] = {},
["ts"] = {},
["gloss"] = {},
["lit"] = {},
["t2"] = {},
["t3"] = {},
["delink"] = {},
}
local args = require("Module:parameters").process(f:getParent().args, params)
local comp = args["type"] or ""
local pos = {}
local def = {}
for i=1,math.max(args[1].maxindex/2, args["pos"].maxindex, args["def"].maxindex) do
table.insert(pos, args[1][2*i-1] or args["pos"][i] or "")
table.insert(def, args[1][2*i] or args["def"][i] or "")
end
local function length(array)
return array.maxindex or #array
end
local function expand(arg)
local result = {}
local maximum = 0
for i=1,#arg do if length(arg[i]) > maximum then maximum = length(arg[i]) end end
local current = nil
for i=1,maximum do
current = nil
for j=1,#arg do
if current then
break
else
current = arg[j][i]
end
end
current = current or ""
table.insert(result, current)
end
return result
end
local function fetch_all(arg)
local result = {}
for i=1,#arg do
for j=1,#arg[i] do
table.insert(result,arg[i][j])
end
end
return result
end
local etyms = expand({args["e1"], args["etym1"], args["etymology1"], args["meaning"]})
local ko = args["k"] or ""
local kotrans = args["kt"] or "" -- currently unused
local kodef = args["ke"] or def[1] or ""
local vi = args["v"] or ""
local videf = args["ve"] or def[1] or ""
local p = expand({args["p"], args["pron"], args["pronunciation"]})
local glyph_origin = args["go"] or ""
local etym = args["e"] or ""
local syn = expand({args["syn"], args["synonym"]})
local ant = expand({args["ant"], args["antonym"]})
local hyper = expand({args["hyper"], args["hypernyms"]})
local hypo = expand({args["hypo"], args["hyponyms"]})
local coo = expand({args["coord"], args["coo"], args["coordinate"]})
local der = expand({args["der"], args["deriv"], args["derived"], args["derivedterm"]})
local rel = expand({args["rel"], args["related"]})
local also = expand({args["also"], args["see"], args["alsosee"], args["seealso"]})
local wp = args["wp"] or ""
local cat = fetch_all({args["cat"], args["category"], args["categories"], args["categorize"], args["categorise"], args["categorization"], args["categorisation"]})
local pic = args["pic"] or ""
local piccap = args["piccap"] or ""
local er = args["er"] or ""
local tl = args["tl"] or ""
local audio = args["a"] or ""
local alt = args["alt"] or ""
local m = args["m"] or ""
local m_s = args["m-s"] or ""
local dg = args["dg"] or ""
local c = args["c"] or ""
local c_t = args["c-t"] or ""
local g = args["g"] or ""
local h = args["h"] or ""
local j = args["j"] or ""
local mb = args["mb"] or ""
local md = args["md"] or ""
local mn = args["mn"] or ""
local mn_t = args["mn-t"] or ""
local w = args["w"] or ""
local x = args["x"] or ""
local mc = args["mc"] or ""
local oc = args["oc"] or ""
local ts = args["ts"] or ""
local gloss = args["gloss"] or ""
local lit = args["lit"] or ""
local t2 = args["t2"] or ""
local t3 = args["t3"] or ""
local delink = args["delink"] or ""
local text = ''
if not pos[1] or pos[1] == '' then pos[1] = 'n' end
for i=1,#pos do pos[i] = checkpos(pos[i]) end
text = (text .. '==Chinese==\n')
if M.ts_determ(title) == 'simp' and ts ~= "trad" then
return text .. '{{zh-see|' .. M.st(title) .. ('}}'):format(mw.title.getCurrentTitle().text)
end
length = len(title)
local noerhua = sub(title,1,length-1) -- currently unused
local erhua = sub(title,length,length) -- currently unused
text = text .. (is_erhua and M.hzbox_er(title) or M.hzbox(title,comp,table.concat(etyms,'|'),alt,gloss,lit,t2,t3,delink))
if wp ~= '' then text = (text .. '\n{{zh-wp' .. (wp ~= 'y' and '|' .. wp or '') .. '}}') end
if pic ~= '' then text = (text .. '\n[[File:' .. pic .. '|thumb') if piccap ~= '' then text = (text .. '|' .. piccap) end text = (text .. ']]') end
if is_erhua then
text = (text .. '===Pronunciation===\n{{zh-pron\n\|m=' .. M.pytemp_er(title,comp,pos[1],p) .. 'r\n|cat=' .. table.concat(pos,',') .. '\n}}\n\n')
else
text = (text .. '\n\n')
if glyph_origin ~= '' then text = (text .. '===Glyph origin===\n' .. glyph_origin .. '\n\n') end
if etym ~= '' then text = (text .. '===Etymology===\n' .. etym .. '\n\n') end
text = (text .. '===Pronunciation===\n{{zh-pron')
if m ~= '-' then
if m ~= '' then
local m_pron = m
text = (text .. '\n|m=' .. m_pron)
else
local m_pron = gsub(M.pytemp(title,comp,pos[1],p), ',', ', ')
text = (text .. '\n|m=' .. m_pron)
if er ~= '' then text = (text .. ',er=' .. er) end
if tl ~= '' then text = (text .. ',tl=y') end
end
end
if length == 1 or m_s ~= '' then text = (text .. '\n|m-s=' .. (m_s or "")) end
if length == 1 or dg ~= '' then text = (text .. '\n|dg=' .. (dg or "")) end
if c == '' then c = M.check_pron(title, 'yue', length) or '' end
if length == 1 or (c ~= '' and c ~= '-') then text = (text .. '\n|c=' .. (c or "")) end
if length == 1 or c_t ~= '' then text = (text .. '\n|c-t=' .. (c_t or "")) end
if length == 1 or g ~= '' then text = (text .. '\n|g=' .. (g or "")) end
if h == '' then h = M.check_pron(title, 'hak') or '' end
if length == 1 or (h ~= '' and h ~= '-') then text = (text .. '\n|h=' .. (h and ("pfs=" .. h) or "")) end
if length == 1 or j ~= '' then text = (text .. '\n|j=' .. (j or "")) end
if length == 1 or mb ~= '' then text = (text .. '\n|mb=' .. (mb or "")) end
if length == 1 or md ~= '' then text = (text .. '\n|md=' .. (md or "")) end
if mn == '' then mn = M.check_pron(title, 'nan', nil, true) or '' end
if length == 1 or (mn ~= '' and mn ~= '-') then text = (text .. '\n|mn=' .. (mn or "")) end
if length == 1 or mn_t ~= '' then text = (text .. '\n|mn-t=' .. (mn_t or "")) end
if length == 1 or w ~= '' then text = (text .. '\n|w=' .. (w or "")) end
if length == 1 or x ~= '' then text = (text .. '\n|x=' .. (x or "")) end
if audio ~= '' then text = (text .. '\n|ma=') if audio ~= 'y' then text = (text .. audio) else text = (text .. 'y') end end
if length == 1 or mc ~= '' then text = (text .. '\n|mc=' .. (mc ~= "" and mc or 'y')) end
if length == 1 or oc ~= '' then text = (text .. '\n|oc=' .. (oc ~= "" and oc or 'y')) end
text = (text .. '\n|cat=' .. table.concat(require('Module:table').removeDuplicates(pos),',') .. '\n}}\n\n')
local pcall_success, yue_check = pcall(M.check_yue, title, c)
if pcall_success and c and c ~= '' and c ~= '-' then text = text .. yue_check end
end
text = (text .. '===' .. (length == 1 and "Definitions" or M.postitle(pos[1])) .. '===\n')
text = (text .. '{{head|zh|' .. (length == 1 and "Han character" or M.poshead(pos[1])) .. '}}\n\n')
if is_erhua then
text = text .. '# {{lb|zh|Mandarin}} {{erhua form'
if def[1] and def[1] ~= '' then text = text .. '|' .. def[1] end
text = text .. '}}'
else
text = (text .. '# ' .. ((def[1] and def[1] ~= "") and def[1] or "{{rfdef|zh}}"))
end
if syn[1] then
if match(syn[1], "^dial") then
text = text .. "\n\n====Synonyms====\n{{zh-" .. syn[1] .. "}}"
else
text = M.semantics(text,'Synonyms',syn)
end
end
text = M.semantics(text,'Antonyms',ant)
text = M.semantics(text,'Hypernyms',hyper)
text = M.semantics(text,'Hyponyms',hypo)
text = M.semantics(text,'Coordinate terms',coo)
text = M.semantics(text,length == 1 and 'Compounds' or 'Derived terms',der)
text = M.semantics(text,'Related terms',rel)
for i=2,#pos do
text = text .. '\n\n===' .. M.postitle(pos[i]) .. '===\n'
text = text .. '{{head|zh|' .. M.poshead(pos[i]) .. '}}\n\n'
if is_erhua then
text = text .. '# {{lb|zh|Mandarin}} {{erhua form'
if def[i] ~= '' then text = text .. '|' .. def[i] end
text = text .. '}}'
else
text = text .. '# ' .. def[i]
end
end
if #also > 0 then
text = (text .. '\n\n====See also====')
for i=1,#also do
text = (text .. '\n* {{zh-l|' .. also[i] .. '}}')
end
end
if #cat > 0 then text = (text .. '\n\n{{zh-cat|' .. table.concat(cat,'|') .. '}}') end
if ko ~= '' then
text = text .. '\n\n----\n\n==Korean==\n{{ko-hanjatab}}\n\n===Noun===\n{{ko-noun|hj|hangeul=' .. ko .. '}}\n\n# {{hanja form of|' .. ko .. '|' .. kodef .. '}}'
end
if vi ~= '' then
text = text .. '\n\n----\n\n==Vietnamese==\n{{vi-hantutab}}\n\n===' .. M.postitle(pos[1]) .. '===\n{{vi-hantu}}\n\n# {{han tu form of|' .. vi .. '|' .. videf .. '}}'
end
return text
end
return M