မဝ်ဂျူ:yue-pron
Documentation for this module may be created at မဝ်ဂျူ:yue-pron/doc
local export = {}
local m_string_utils = require("Module:string utilities")
local gsplit = m_string_utils.gsplit
local gsub = m_string_utils.gsub
local len = m_string_utils.len
local lower = m_string_utils.lower
local split = m_string_utils.split
local entering = {
p = 1, t = 1, k = 1
}
local entering_tones = {
["1"] = "7", ["3"] = "8", ["6"] = "9"
}
local ipa_allophones = {
ei = "eri",
eoi = "eoy",
ing = "irng",
ik = "irk",
ou = "oru",
ung = "urng",
uk = "urk",
}
local ipa_initial = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["gw"] = "kʷ", ["kw"] = "kʷʰ",
-- ["zh"] = "t͡ʃ", ["ch"] = "t͡ʃʰ", ["sh"] = "ʃ",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
["h"] = "h", ["w"] = "w", ["j"] = "j",
[""] = ""
}
-- vowels with "r" only appear as allophones and should not appear in the input
local ipa_nucleus = {
["aa"] = "aː", ["a"] = "ɐ",
["e"] = "ɛː", ["er"] = "e",
["i"] = "iː", ["ir"] = "ɪ",
["o"] = "ɔː", ["or"] = "o",
["oe"] = "œː", ["eo"] = "ɵ",
["u"] = "uː", ["ur"] = "ʊ",
["yu"] = "yː"
}
local ipa_coda = {
["i"] = "i̯", ["u"] = "u̯", ["y"] = "y̯",
["m"] = "m", ["n"] = "n", ["ng"] = "ŋ",
["p"] = "p̚", ["t"] = "t̚", ["k"] = "k̚",
[""] = ""
}
local ipa_tone = {
["1"] = "<span style=\"cursor:help\" title=\"or 53\">⁵⁵</span>",
["2"] = "³⁵",
["3"] = "³³",
["4"] = "²¹",
["5"] = "¹³",
["6"] = "²²",
["7"] = "⁵",
["8"] = "³",
["9"] = "²",
[""] = ""
}
local ipa_tone_sandhi = {
["-"] = "⁻", [""] = ""
}
local ipa_syllabic = {
["m"] = "m̩", ["ng"] = "ŋ̍"
}
-- display `main`, but show `option` on hovering
local function alt(main,option)
return '<span style="cursor:help" title="or ' .. option .. '">' .. main .. '</span>'
end
local acute = {
a="á", e="é", i="í", o="ó", u="ú", m="ḿ", ng="ńg"
}
local grave = {
a="à", e="è", i="ì", o="ò", u="ù", m="m̀", ng="ǹg"
}
local macron = {
a="ā", e="ē", i="ī", o="ō", u="ū", m="m̄", ng="n̄g"
}
-- "?" indicates finals that are not supported by Yale
local yale_final = {
["a"] = "?", ["aa"] = "a",
["eu"] = "?", ["em"] = "?", ["en"] = "?", ["ep"] = "?", ["et"] = "?",
["om"] = "?", ["op"] = "?",
["um"] = "?", ["up"] = "?",
["oe"] = "eu", ["oem"] = "?", ["oen"] = "?", ["oeng"] = "eung", ["oep"] = "?", ["oet"] = "?", ["oek"] = "euk",
["eoi"] = "eui", ["eon"] = "eun", ["eot"] = "eut",
}
-- The core function to handle conversion to Yale.
-- For non-syllabics, there is exactly one vowel cluster in the syllable:
-- the first vowel is inputted as `a`, and the rest of the vowels is `b`, and `t` is the tone.
--- (e.g. "keui" -> a="e",b="ui"; "keung" -> a="e",b="u")
-- Conversion rule:
--- if `t` is 4, 5, or 6, then "h" is inserted after `b`.
--- if `t` is 1, 2, 4, or 5, then the corresponding accent is put on `a`.
-- (finally, for syllabics, the whole syllabic is `a`, and `b` is empty)
local function yale_tone(a,b,t)
local h = ""
if t == "4" or t == "5" or t == "6" then
h = "h"
end
if t == "1" then a = alt(macron[a], grave[a]) end
if t == "4" then a = grave[a] end
if t == "2" or t == "5" then a = acute[a] end
return a..b..h
end
function export.jyutping_to_ipa(text)
if type(text) == "table" then text = text.args[1] end
text = text:gsub(", "," "):gsub("%.%.%.", " "):gsub(" $",""):gsub(" / ","/, /")
-- :gsub("([zcs])yu", "%1hyu")
-- :gsub("([zcs])oe", "%1hoe")
-- :gsub("([zcs])eo", "%1heo")
:gsub("(%l+)(%d)(%-?)(%d?)", function(main,tone,symbol,tone2)
-- try initial+final
local initial, final = main:match("^([bpmfdtnlgknzcshwj]?[gw]?)([aeiouy]%l*)$")
if not initial then
-- otherwise try initial+syllabic
local syllabic
initial, syllabic = main:match("^(h?)([mn]g?)$")
if not initial then
error("Invalid Jyutping syllable: " .. main)
end
main = ipa_initial[initial]
.. (ipa_syllabic[syllabic] or error("Unrecognised syllabic: " .. syllabic)) -- really?
else
-- e.g. convert <ei> (which would be */ɛːi̯/) to <eri> (/ei̯/)
final = ipa_allophones[final] or final
local nucleus, coda = final:match("^(y?[aeiou][aeor]?)([iuymnptk]?g?)$")
if not nucleus then
error("Invalid Jyutping final: " .. final)
end
if entering[coda] then
tone = entering_tones[tone] or tone
tone2 = entering_tones[tone2] or tone2
end
main = (ipa_initial[initial] or error(("Unrecognised initial: \"%s\""):format(initial)))
.. (ipa_nucleus[nucleus] or error(("Unrecognised nucleus: \"%s\""):format(nucleus)))
.. (ipa_coda[coda] or error(("Unrecognised coda: \"%s\""):format(coda)))
end
return main .. ipa_tone[tone] .. ipa_tone_sandhi[symbol] .. ipa_tone[tone2]
end)
return text
end
function export.jyutping_to_yale(text)
if type(text) == "table" then text = text.args[1] end
text = text:gsub("jy?","y")
:gsub("[cz]",{z="j",c="ch"})
--:gsub("[1-6]%-","")
:gsub("(%l+)(%d)(%-?)(%d?)", function(main,tone,symbol,tone2)
if tone2 ~= "" then
tone = tone2
end
-- find the first vowel letter
local initial,final = main:match("^([^aeiou]*)([aeiou].*)$")
local a,b,c
if initial then
final = yale_final[final] or final
if final == "?" then
return "?"
end
a,b,c = final:match("^([aeiou])([aiu]*)([mnptk]?g?)$")
else -- otherwise it is a syllabic
initial,a = main:match("(h?)([mn]g?)")
b,c = "",""
end
return initial..yale_tone(a,b,tone)..c
end)
if text:find("?") then
return false
end
return text
end
function export.jyutping_to_cantonese_pinyin(text)
if type(text) == "table" then text = text.args[1] end
if text:find("oe[mnpt][^g]") then -- unsupported finals
return false
end
text = text:gsub("yu","y")
:gsub("eo[int]",{eoi="oey",eon="oen",eot="oet"})
:gsub("[zc]",{z="dz",c="ts"})
:gsub("([ptk])([1-6])(%-?)([1-6]?)",function(a,b,c,d)
return a .. (entering_tones[b] or b) .. c .. (entering_tones[d] or d)
end)
return text
end
function export.jyutping_to_guangdong(text)
if type(text) == 'table' then text = text.args[1] end
-- unsupported finals
if text:find("%f[a]a%d") or text:find("oe[mnpt][^g]")
or text:find("[ou][mp]") or text:find("e[un][^g]") then
return false
end
text = text:gsub("yu","ü")
:gsub("j","y")
:gsub("[zcs]%f[iü]",{z="j",c="q",s="x"}) -- ü=\xC3\xBC
:gsub("([jqxy])ü","%1u")
:gsub("eoi","êu")
:gsub("[aeo][aeo]?",{aa="a",a="e",e="é",oe="ê",eo="ê"})
:gsub("([ae])u","%1o")
:gsub("([gk])w","%1u")
:gsub("[ptk]%f[%d]",{p="b",t="d",k="g"})
return text
end
-- substitute changed tones for finding homophones
function export.jyutping_format(text)
text = text:gsub("[1-6]%-([1-6])", "%1")
return split(text," / ")
end
local function Consolas(text)
return '<span style="font-family: Consolas, monospace;">' .. text .. "</span>"
end
local function format_IPA(text)
return '<span class="IPA">' .. text .. "</span>"
end
local function make_superscript(text)
return (text:gsub("%d[%d%*%-]*%f[^%d%*]", "<sup>%0</sup>"))
end
-- the only allowed punctuations are:
--- ", ": represents a comma (or a break of any sort)
--- "...": represents a slot where a text can go to (e.g. [[一……就……]])
--- ",": separates alternate readings
local function validate(c_rom)
if c_rom:find("[7-9]") then error("Invalid tone in Jyutping.") end
if c_rom:find("[A-Z]") then error("Please do not capitalize the Jyutping.") end
if c_rom:find("%-[a-z]") then error("Please do not hyphenate the Jyutping.") end
if c_rom:find("[0-9][a-z]") then error("Error in Jyutping: please use spaces to separate syllables.") end
if c_rom:find("[zcs]h") then error("'zh'/'ch'/'sh' are non-valid Jyutping, use 'z'/'c'/'s' instead.") end
if c_rom:find("y[^u]") then error("Wrong usage of 'y' in Jyutping.") end
if c_rom:find("oei") then error("Invalid rime oei in Jyutping. Did you mean eoi?") end
if c_rom:find("eong") then error("Invalid rime eong in Jyutping. Did you mean oeng?") end
if c_rom:find("eok") then error("Invalid rime eok in Jyutping. Did you mean oek?") end
if c_rom:find("r") then error("Invalid letter \"r\" in Jyutping.") end
if c_rom:find("%d%d") then error("Invalid Jyutping: please use a hyphen to indicate a changed tone.") end
c_rom = c_rom:gsub("^%.%.%.",""):gsub("%.%.%.%f[%z,]",""):gsub("%.%.%."," "):gsub(", ?"," ")
if c_rom:find("^ ") or c_rom:find(" ") or c_rom:find(" $") then
error("Empty syllable detected.")
end
if c_rom:find("[^a-z1-6%- ]") then
error("Invalid character found.")
end
-- ensure that each syllable matches `^%l+%d%-?%d?$`
for syllable in c_rom:gmatch("%S+") do
if not syllable:match("^%l+%d%-?%d?$") then
error("Invalid Jyutping syllable: " .. syllable)
end
end
end
-- generate the shown text of Standard Cantonese
-- if the pagename is one character long, then generate links to all the readings
function export.show_standard(c_rom, is_single_hanzi)
validate(c_rom)
c_rom = c_rom:gsub(",%f[^ ,]"," / ")
if is_single_hanzi then
for reading in c_rom:gmatch("[^ ,./]+") do
require('Module:debug').track('yue-pron/'..reading)
end
c_rom = c_rom:gsub("(%l+)(%d%-?%d?)","[[%1%2|%1<sup>%2</sup>]]")
else
c_rom = make_superscript(c_rom)
end
return c_rom
end
-- generate the collapsed text of Standard Cantonese
-- generate all the different romanisations, as well as homophones
function export.hide_standard(c_rom, is_single_hanzi)
local res = ""
c_rom = c_rom:gsub(",%f[^ ,]"," / ")
-- generate IPA first because the error-catching is located there
local c_ipa = export.jyutping_to_ipa(c_rom)
local c_yale = export.jyutping_to_yale(c_rom)
local c_cp = export.jyutping_to_cantonese_pinyin(c_rom)
local c_gd = export.jyutping_to_guangdong(c_rom)
local c_hom = mw.loadData("Module:yue-pron/hom")
local c_hom_exists = false
for _,c_first in ipairs(export.jyutping_format(c_rom)) do
if c_hom[c_first] then
c_hom_exists = c_first
break
end
end
res = res .. "\n** <small>(<i>[[w:Standard Cantonese|Standard Cantonese]], [[w:Guangzhou Cantonese|Guangzhou]]–[[w:Hong Kong Cantonese|Hong Kong]]</i>)</small>"
if not c_hom_exists and not is_single_hanzi then
res = res .. '<sup><small><abbr title="Add Cantonese homophones"><span class="plainlinks">['
res = res .. tostring(mw.uri.fullUrl("Module:yue-pron/hom",{["action"]="edit"}))
res = res .. " +]</span></abbr></small></sup>"
end
res = res .. "\n*** <small><i>[[w:Jyutping|Jyutping]]</i></small>: "
res = res .. Consolas(make_superscript(c_rom))
if c_yale then
res = res .. "\n*** <small><i>[[w:Yale romanization of Cantonese|Yale]]</i></small>: "
res = res .. Consolas(c_yale)
end
if c_cp then
res = res .. "\n*** <small><i>[[w:Cantonese Pinyin|Cantonese Pinyin]]</i></small>: "
res = res .. Consolas(make_superscript(c_cp))
end
if c_gd then
res = res .. "\n*** <small><i>[[w:Guangdong Romanization|Guangdong Romanization]]</i></small>: "
res = res .. Consolas(make_superscript(c_gd))
end
res = res .. "\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]] <sup>([[w:Cantonese phonology|key]])</sup></small>: "
res = res .. format_IPA("/" .. c_ipa .. "/")
if c_hom_exists then
res = res .. '\n*** <small>Homophones</small>: <table class="wikitable mw-collapsible mw-collapsed" style="width:15em;margin:0;'
res = res .. 'position:left; text-align:center"><tr><th></th></tr><tr><td><div style="float: right; clear: right;"><sup>'
res = res .. '<span class="plainlinks">['
res = res .. tostring(mw.uri.fullUrl("Module:yue-pron/hom",{["action"]="edit"}))
res = res .. ' edit]</span></sup></div><div style="visibility:hidden; float:left"><sup><span style="color:#FFF">edit</span></sup></div>'
local hom_text = {}
local yue = require("Module:languages").getByCode("yue")
for _,hom in ipairs(c_hom[c_hom_exists]) do
table.insert(hom_text, require("Module:links").full_link( { term = hom, lang = yue, tr = "-" } ))
end
res = res .. table.concat(hom_text, "<br>")
res = res .. "</td></tr></table>"
res = res .. "[[Category:Cantonese terms with homophones]]"
end
return res
end
function export.jyutping_headword(frame)
local args = require("Module:parameters").process(frame:getParent().args, {
["head"] = {},
})
return require("Module:headword").full_headword{
lang = require("Module:languages").getByCode("yue"),
sc = require("Module:scripts").getByCode("Latn"),
heads = {make_superscript(args.head or mw.loadData("Module:headword/data").pagename)},
pos_category = "jyutping"
}
end
return export