မဝ်ဂျူ:zh-usex
Documentation for this module may be created at မဝ်ဂျူ:zh-usex/doc
local m_zh = require("Module:zh")
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
local split = mw.text.split
-- Use this when the actual title needs to be known.
local actual_title = mw.title.getCurrentTitle()
-- Use this when testcases need to be able to override the title (for bolding,
-- for instance).
local title = actual_title
local PAGENAME = PAGENAME or title.text
local export = {}
local data = mw.loadData("Module:zh-usex/data")
local variety_list = data.variety_list
local punctuation = data.punctuation
local ref_list = data.ref_list
local pron_correction = data.pron_correction
local polysyllable_pron_correction = data.polysyllable_pron_correction
local zh_format_start_simp = "<span lang=\"zh-Hans\" class=\"Hans\">"
local zh_format_start_trad = "<span lang=\"zh-Hant\" class=\"Hant\">"
local zh_format_end = "</span>"
local Han_pattern = '[一-鿌㐀-䶵𠀀-]'
local UTF8_char = '[%z\1-\127\194-\244][\128-\191]*'
local tag = "%b<>"
local function make_link(word)
local orig_word = word
word = "[[" .. word .. "]]"
-- If an entire word is bolded in a link, move the tags out of the wikilink syntax.
word = word
:gsub("(%[%[)(</?b>)(.-)(</?b>)(%]%])", "%2%1%3%5%4")
-- Move br tags out of links.
:gsub("(%[%[)(<br ?/?>)", "%2%1")
-- Link to Chinese section.
-- Remove bolding from link target, leave it in link text.
:gsub("%[%[([^|]-)%]%]",
function (word)
return "[[" .. word:gsub("</?b>", "") .. "#Chinese|" .. word .. "]]"
end)
if actual_title.nsText == "Module" then
mw.log(orig_word, "->", word)
end
return word
end
function export.show(frame)
local params = {
[1] = { required = true }, -- example
[2] = {}, -- translation
[3] = {}, -- variety
lit = {},
tr = {},
ref = {}, r = { alias_of = "ref" },
display_type = {}, type = { alias_of = "display_type" },
inline = {},
audio = {}, a = { alias_of = "audio" },
collapsed = { type = "boolean" },
link = { type = "boolean", default = true }, l = { alias_of = "link" },
-- Allow specifying pagename in testcases on documentation page.
pagename = actual_title.nsText == "Module" and {} or nil,
nocat = { type = "boolean" },
tr_nocap = { type = "boolean" },
}
local args, unrecognized_args = require("Module:parameters").process(frame:getParent().args, params, true)
if args.pagename then
-- Override title in Module namespace.
title = mw.title.new(args.pagename)
PAGENAME = title.text
end
local example = args[1] or error("Example unspecified.")
local translation = args[2]
local literal = args["lit"]
local reference = args["ref"]
local manual_tr = args["tr"]
local display = args["display_type"]
local inline = args["inline"]
local audio_file = args["audio"]
local collapsed = args["collapsed"]
local phonetic = ""
local original_length = mw.ustring.len(gsub(example, "[^一-龯㐀-䶵]", ""))
local variety = args[3] or (ref_list[reference] and ref_list[reference][1] or false) or "MSC"
local variety_data = variety_list[variety] or error("variety " .. variety .. " not recognized.")
local variety_code
if variety_data then
variety_code = variety_data[2]
end
local link = args["link"]
-- link = match(link, "n") == nil -- and not (not match(example, " ") and match(example, "[,。?!﹑]"))
if next(unrecognized_args) then
--[[Special:WhatLinksHere/Template:tracking/zh-usex/unrecognized arg]]
require("Module:debug").track_unrecognized_args(unrecognized_args, "zh-usex")
end
if not translation or translation == '' then -- per standard [[Module:usex]]
translation = '<small>(please add an English translation of this example)</small> [[Category:Requests for translations of Chinese usage examples]]'
end
if not match(example, "'''") then boldify = true end
-- automatically boldify pagetitle if nothing is in bold
if boldify and not punctuation[PAGENAME] then
example = gsub(example, PAGENAME, "'''" .. PAGENAME .. "'''")
example = gsub(example, "''''''", "")
end
-- tidying up the example, making it ready for transcription
example = gsub(example, "([?!,。、“”…;:‘’|()「」『』—《》〈〉· .~])", " %1 ")
example = gsub(example, " — — ", " —— ") -- double em-dash (to be converted to single em-dash later)
example = gsub(example, "^ *", "")
example = gsub(example, " *$", "")
example = gsub(example, " +", " ")
example = gsub(example, "%'%'%'([^%']+)%'%'%'", "<b>%1</b>")
example = gsub(example, "(.)</b>%[([^%[%]]+)%]", function(first, second)
return "<b>"..first.."</b>" ~= second and first.."["..second.."]</b>" or first.."["..first.."]</b>" end)
example = gsub(example, "</b>({[^{}]+})", "%1</b>")
local ruby_start, ruby_mid, ruby_end = "<big><ruby><span class=\"Hani\" style=\"display: inline-flex; flex-direction: column;\">", "</span><rp> (</rp><rt><big>", "</big></rt><rp>)</rp></ruby></big>"
local ruby_words = {}
local trad_words, simp_words, tr_words = {}, {}, {}
simp_exist = (m_zh.ts_determ(gsub(example, "(.)%[%1%]", "")) == "trad" or (match(example, "%[[^%[%]]+%]") and not match(example, "(.)%[%1%]"))) and variety_code ~= "vi"
for word in mw.text.gsplit(example, " ", true) do
if gsub(gsub(word, "%{[^%}]+%}", ""), "%.", "") == PAGENAME and boldify then
word = "<b>" .. word .. "</b>"
end
local trad_word, simp_word, tr_word, ruby_word = word, false, false, ""
-- various tricks for linking and display in trad. and simp.
trad_word = gsub(trad_word, "(.)%[(.)%]", "%1")
trad_word = gsub(trad_word, "{[^{}]*}", "")
trad_word = gsub(trad_word, "[%^%.]", "")
trad_word = gsub(trad_word, "\\", "|")
trad_word = gsub(trad_word, ".", ".")
if simp_exist then
simp_word = match(word, "%[") and gsub(gsub(word .. "終[终]", "([^%[%]]*).%[(.)%]", function(a, b) return m_zh.ts(a) .. b end), "终$", "") or m_zh.ts(word)
simp_word = gsub(simp_word, "{[^{}]*}", "")
simp_word = gsub(simp_word, "[%^%.]", "")
simp_word = gsub(simp_word, "\\", "|")
simp_word = gsub(simp_word, ".", ".")
end
-- produce links
local trad_segments, simp_segments
-- Replace "-" with "🈹" between Han characters (optionally with a HTML
-- tag between the Han character and the hyphen). This would be simpler with
-- regex or LPeg.
local function replace_hyphen(word)
local function helper(pos1, before, pos2, after)
if (before == ">"
and find(word:sub(1, pos1), Han_pattern .. tag .. "$")
or find(before, Han_pattern))
and (after == "<"
and find(word:sub(pos2), "^" .. tag .. Han_pattern)
or after == "@"
or find(after, Han_pattern)) then
return before .. "🈹" .. after
end
end
if variety_code == "cmn" then word = word:gsub("%-%-(%-?)", "%1") end
for i = 1, 2 do
word = word:gsub("()(" .. UTF8_char .. ")%-()(" .. UTF8_char .. ")", helper) -- odd and even positions
end
return word
end
trad_segments = split(replace_hyphen(trad_word), '🈹')
if simp_exist then
simp_segments = split(replace_hyphen(simp_word), '🈹')
if #trad_segments ~= #simp_segments then error('trad-to-simp conversion changed the number of hyphens') end
end
for i, trad_segment in ipairs(trad_segments) do
local contain_pagename = (gsub(gsub(gsub(trad_segment, "</?b>", ""), "%^", ""), "-", "") == PAGENAME) and not punctuation[PAGENAME]
if match(trad_segment, "|") or (link and not match(trad_segment, "@") and not punctuation[word] and not contain_pagename) then
trad_segments[i] = make_link(trad_segment)
if simp_exist then
simp_segments[i] = make_link(simp_segments[i])
end
end
end
trad_word = table.concat(trad_segments)
simp_word = simp_exist and table.concat(simp_segments)
trad_word = gsub(trad_word, "@", "")
simp_word = simp_exist and gsub(simp_word, "@", "")
-- same tricks applied to transcription
if not manual_tr and (variety_code == "cmn" or variety_code == "yue" or variety_code == "nan" or variety == "H") then
if punctuation[word] then
tr_word = punctuation[word]
else
real_word = true
local hyphen = variety_code == "nan" or variety_code == "hak"
tr_word = gsub(word, "@", "")
tr_word = gsub(tr_word, "%.", " ")
tr_word = gsub(tr_word, ".+\\", "")
tr_word = gsub(tr_word, "%[[^%[%]]+%]", "")
tr_word = gsub(tr_word, ".</b>(%{[^%}]+%})", "%1</b>")
tr_word = gsub(tr_word, "(.){([^{}]*)}",function(a, b)
if hyphen and not mw.ustring.find(a, "[a-zA-Z]") then
return "-" .. b .. "-"
else
return b
end
end)
for key,val in pairs(polysyllable_pron_correction[variety_code]) do
tr_word = gsub(tr_word, key, val)
end
tr_word = gsub(tr_word, ".", pron_correction[variety_code])
if variety_code == "cmn" then
tr_word = gsub(tr_word, "%-+", function(s) return mw.ustring.len(s)==1 and '' or '-' end)
tr_word = gsub(tr_word, "[^%-]+", m_zh.py)
elseif variety_code == "yue" then
local m_yue_pron = mw.loadData("Module:zh/data/yue-pron")
tr_word = gsub(tr_word, ".", m_yue_pron.jyutping)
tr_word = gsub(tr_word, "([a-z])([1-9])(-?)([1-9]?)", "%1%2%3%4 ")
elseif hyphen then
tr_word = gsub(tr_word, "[一-鿌㐀-䶵 -〿𠀀-]+", function(text)
if m_zh.check_pron(text, variety_code, 1) then
return gsub(m_zh.check_pron(text, variety_code, 1), "/.+$", "")
else
text = gsub(text, ".", function(ch)
if m_zh.check_pron(ch, variety_code, 1) then
return gsub(m_zh.check_pron(ch, variety_code, 1), "/.+$", "") .. "-"
else
return ch
end
end)
return gsub(text, "-$", "")
end
end)
tr_word = gsub(tr_word, "%-+", "-")
tr_word = gsub(tr_word, "%-([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōṳA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌṲ])", "%1")
tr_word = gsub(tr_word, "([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōoóòôōṳA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌOÓÒÔŌṲ̄̀́̂̍͘])%-", "%1")
tr_word = gsub(tr_word, "<b>", "-<b>")
tr_word = gsub(tr_word, "</b>", "</b>-")
tr_word = gsub(tr_word, "%^%-<b>", "<b>^")
tr_word = gsub(tr_word, "^%-+", "")
tr_word = gsub(tr_word, "%-+$", "")
tr_word = gsub(tr_word, "%%%-?", "--")
end
if match(tr_word, "[一-鿌㐀-䶵𠀀-]") then
require("Module:debug").track("zh-usex/character without transliteration")
end
end
end
if variety_code == "nan" then
trad_word = gsub(trad_word, "%%", "")
simp_word = simp_exist and gsub(simp_word, "%%", "")
end
if display == "ruby" then
ruby_word = ruby_start .. trad_word .. (simp_exist and "<br>" .. simp_word or "") .. ruby_mid .. (real_word and tr_word or "") .. ruby_end
table.insert(ruby_words, ruby_word)
else
table.insert(trad_words, trad_word)
table.insert(simp_words, simp_word or nil)
table.insert(tr_words, tr_word or nil)
end
end
local tag_start = " <span style=\"color:darkgreen; font-size:x-small;\">[" -- HTML entity since "[[[:en:w:MSC|MSC]]" is interpreted poorly
local tag_end = "]</span>"
if display == "ruby" then
local tag = " <ruby><rb><big>" ..
tag_start .. variety_data[1] ..
(simp_exist
and ", ''[[Traditional Chinese|trad.]]''↑ + ''[[Simplified Chinese|simp.]]''↓"
or ", ''[[Traditional Chinese|trad.]]'' and ''[[Simplified Chinese|simp.]]''") .. tag_end ..
tag_start .. "''rom.'': " .. variety_data[3] .. tag_end ..
"</big></rb></ruby>"
return table.concat(ruby_words, "") .. tag .. "<dl><dd><i>" .. translation .. "</i></dd></dl>"
else
trad_text = gsub(table.concat(trad_words), "([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2")
simp_text = simp_exist and gsub(table.concat(simp_words), "([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2") or false
phonetic = manual_tr or (#tr_words > 0 and table.concat(tr_words, " ") or false)
-- overall transcription formatting
if phonetic then
phonetic = gsub(phonetic, " </b>", "</b> ")
phonetic = gsub(phonetic, " ", " ")
if variety_code == "yue" or variety_code == "zhx-tai" or variety_code == "zhx-teo" or variety_code == "nan-hai" or variety_code == "cmn-sze" or variety_code == "cjy" or variety_code == "hsn" then
phonetic = gsub(phonetic, "([a-zê]+)([1-9%-]+)", "%1<sup>%2</sup>") -- superscript tones
end
phonetic = gsub(phonetic, " ([,%.?!;:’”)])", "%1") -- remove excess spaces from punctiation
phonetic = gsub(phonetic, "([‘“(]) ", "%1")
if not manual_tr then
phonetic = gsub(phonetic, "%'([^%'])", "%1") -- allow bolding for manual translit
if variety_code == "nan" then
phonetic = gsub(phonetic, " +%-%-", "--")
end
end
-- capitalisation
if variety_code == "yue" or variety_code == "cjy" or variety_code == "hsn" or variety_code == "cmn-wuh" or variety_code == "zhx-teo" or variety_code == "wxa" or variety_code == "wuu" then
args.tr_nocap = '1'
end
if not args.tr_nocap and match(example, "[。?!]") then
phonetic = "^" .. gsub(phonetic, "([%.?!]) ", "%1 ^")
end
phonetic = gsub(phonetic, "([%.%?%!][”’]) (.)", "%1 ^%2")
phonetic = gsub(phonetic, "<br>(.)", "<br>^%1")
phonetic = gsub(phonetic, ": ([“‘])(.)", ": %1^%2")
phonetic = gsub(phonetic, "%^<b>", "<b>^")
phonetic = gsub(phonetic, "%^+.", mw.ustring.upper)
phonetic = gsub(phonetic, "%^", "")
if variety_code == "wuu" then
local wuu_pron = require("Module:wuu-pron")
phonetic = "<span class=\"IPA\">[" .. wuu_pron.ipa_conv(phonetic) .. "]</span>"
elseif variety_code == "cmn-wuh" or variety_code == "wxa" then
phonetic = "<span class=\"IPA\">[" .. phonetic .. "]</span>"
elseif variety_code == "cdo" then
local cdo_pron = require("Module:cdo-pron")
phonetic = "<i>" .. phonetic .. "</i>" ..
(not match(phonetic, "-[^ ]+-[^ ]+-[^ ]+-")
and " / <span class=\"IPA\"><small>[" .. cdo_pron.sentence(phonetic) .. "]</small></span>"
or "")
else
phonetic = "<i>" .. phonetic .. "</i>"
end
phonetic = "<span style=\"color:#404D52\">" .. phonetic .. "</span>"
end
end
local collapse_start, collapse_end, collapse_tag, collapse_border_div, collapse_border_div_end = '', '', '', '', ''
simplified_start = '<br>'
if collapsed then
collapse_start = '<span class="vsHide">'
collapse_end = '</span>'
collapse_tag = '<span class="vsToggleElement" style="color:darkgreen; font-size:x-small;padding-left:10px"></span>'
collapse_border_div = '<div class="vsSwitcher" data-toggle-category="usage examples" style="border-left: 1px solid #930; border-left-width: 2px; padding-left: 0.8em;">'
collapse_border_div_end = '</div>'
simplified_start = '<hr>'
end
if actual_title.nsText == '' and (not args.nocat) then -- fixme: probably categorize only if text contains the actual word
if reference then
cat = "[[Category:Chinese terms with quotations]]"
else
cat = "[[Category:သုင်စောဲဝေါဟာကြုက်ထ္ၜးဥပမာဂမၠိုင်]]"
end
end
-- indentation, font and identity tags
if
((variety_code == "cmn" and original_length > 7)
or (variety_code ~= "cmn" and original_length > 5)
or reference
or (match(example, "[,。?!、:; ]") and variety_code == "wuu")
or (variety_code == "cdo" and original_length > 3)
or (inline or "" ~= "")) then
trad_text = zh_format_start_trad .. trad_text .. zh_format_end
if not phonetic then
translation = "<i>" .. translation .. "</i>"
end
if phonetic then
phonetic = "<dd>" .. collapse_start .. phonetic
translation = "<dd>" .. translation .. "</dd>"
tr_tag = tag_start .. variety_data[3] .. tag_end .. collapse_end .. "</dd>"
else
translation = "<dd>" .. translation .. "</dd>"
end
if audio_file then
audio = "<dd>[[File:" .. audio_file .. "]]</dd>"
end
trad_tag = collapse_start .. tag_start .. variety_data[1] .. ", <i>[[:en:w:Traditional Chinese|trad.]]" ..
((simp_exist or variety_code == "vi") and "" or " and [[:en:w:Simplified Chinese|simp.]]") .. "</i>" .. tag_end .. collapse_end .. collapse_tag
if simp_exist then
simp_text = simplified_start .. collapse_start .. zh_format_start_simp .. simp_text .. zh_format_end
simp_tag = tag_start .. variety_data[1] .. ", <i>[[w:Simplified Chinese|simp.]]</i>" .. tag_end .. collapse_end
end
if reference then
reference = "<dd>" .. collapse_start .. "<small><i>From:</i> " ..
(ref_list[reference] and ref_list[reference][2] or reference) .. "</small>" .. collapse_end .. "</dd>"
end
return collapse_border_div .. "<dl class=\"zhusex\">" .. trad_text .. trad_tag .. (simp_text or "") .. (simp_tag or "") .. (reference or "") ..
(phonetic and phonetic .. tr_tag or "") .. (audio or "") .. translation .. "</dl>" .. (cat or "") .. collapse_border_div_end
else
trad_text = zh_format_start_trad .. trad_text .. zh_format_end
divider = " ― "
if variety ~= "MSC" then
ts_tag = tag_start .. variety_data[1] .. tag_end
tr_tag = tag_start .. variety_data[3] .. tag_end
end
if not phonetic then
translation = "<i>" .. translation .. "</i>"
end
if simp_exist then
simp_text = " / " .. zh_format_start_simp .. simp_text .. zh_format_end
end
if audio_file then
audio = " [[File:" .. audio_file .. "]]"
end
return trad_text .. (simp_text or "") .. (ts_tag or "") .. divider ..
(phonetic and phonetic .. (tr_tag or "") .. (audio or "") .. divider or "") .. translation .. (literal and " (literally, “" .. literal .. "”)" or "") ..
(cat or "")
end
end
function export.migrate(text, translation, ref)
if type(text) == "table" then
if not text.args or not text.args[1] then
text = text:getParent()
end
if text.args[2] and text.args[2] ~= '' then
ref = text.args[1]
translation = text.args[3]
text = text.args[2]
else
text = text.args[1]
end
end
text = text:gsub('^[%*#: \n]+', ''):gsub('[ \n]+$', ''):gsub(' +', ' '):gsub('\n+', '<br>'):gsub('|', '\\'):gsub('\'\'\'%[%[', ' '):gsub('%]%]\'\'\'', ' '):gsub('%]%]%[%[', ' '):gsub('%]%]', ''):gsub('%[%[', '')
:gsub('\'\'\'', ''):gsub(',', ','):gsub('!', '!'):gsub('%?', '?')
if translation then
if ref and ref ~= '' then
return '{{zh-x|' .. text .. '|' .. translation .. '|ref=' .. ref .. '}}'
else
return '{{zh-x|' .. text .. '|' .. translation .. '}}'
end
else
return text
end
end
return export