မဝ်ဂျူ:hak-pron
Documentation for this module may be created at မဝ်ဂျူ:hak-pron/doc
local export = {}
local m_string_utils = require("Module:string utilities")
local gsub = m_string_utils.gsub
local sub = mw.ustring.sub
local match = m_string_utils.match
local find = m_string_utils.find
local len = m_string_utils.len
local lower = m_string_utils.lower
local toNFD = mw.ustring.toNFD
function export.rom_display(text,convtype)
if type(text) == 'table' then text,convtype = text.args[1],(text.args[2] or '') end
local display = ''
local show = { ['pfs'] = '', ['gd'] = '', ['hrs'] = '', ['ct'] = '' }
local decomp = mw.text.split(gsub(text,'/',' / '),';',true)
local TableTools = require('Module:table')
for i = 1,#decomp,1 do
if match(decomp[i],'pfs') then
decomp[i] = gsub(decomp[i],'pfs=','')
local pfs_readings = { ['n'] = {}, ['s'] = {} }
local hrs_readings = { ['n'] = {}, ['s'] = {} }
local hpy_readings = { ['n'] = {}, ['s'] = {} }
local ipa_readings = { ['n'] = {}, ['s'] = {} }
local function display_format(style)
local label = { ['n'] = 'Northern ', ['s'] = 'Southern ', ['ns'] = '' }
local city = {
['n'] = '[[w:Miaoli City|Miaoli]]',
['s'] = '[[w:Neipu|Neipu]]',
['ns'] = '[[w:Miaoli City|Miaoli]] and [[w:Neipu|Neipu]]'
}
local text = string.format("\n** <small>(''[[w:Sixian dialect|%sSixian]], incl. %s'')</small>", label[style], city[style])
text = text .. "\n*** <small>''[[w:Pha̍k-fa-sṳ|Pha̍k-fa-sṳ]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. table.concat(pfs_readings[style:sub(1,1)], ' / ') .. '</span>'
text = text .. "\n*** <small>''[[w:Taiwanese Hakka Romanization System|Hakka Romanization System]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. table.concat(hrs_readings[style:sub(1,1)], ' / ') .. '</span>'
text = text .. "\n*** <small>''[[w:Hagfa Pinyim|Hagfa Pinyim]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. table.concat(hpy_readings[style:sub(1,1)], ' / ') .. '</span>'
local ipa = '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]]'
local span = '</small>: <span class="IPA">/'
text = text .. ipa
if style == 'ns' then
local north = table.concat(ipa_readings['n'], '/, /')
local south = table.concat(ipa_readings['s'], '/, /')
if north == south then
text = text .. span .. north .. '/</span>'
else
text = text .. " (''Northern, incl. " .. city['n'] .. "'')" .. span .. north .. "/</span>"
text = text .. ipa .. " (''Southern, incl. " .. city['s'] .. "'')" .. span .. south .. "/</span>"
end
else
text = text .. span .. table.concat(ipa_readings[style], '/, /') .. '/</span>'
end
return text
end
local function southern(text)
local function convert(a, b, c)
local e_a = { ['e'] = 'a', ['ê'] = 'â', ['é'] = 'á', ['è'] = 'à' }
return a .. e_a[b] .. c
end
text = gsub(text, '([yY])([eéèê])(̍?[nt])', convert)
text = gsub(text, '([nN]gi)([eéèê])(̍?[nt])', convert)
text = gsub(text, '([kK]h?i)([eéèê])(̍?[nt])', convert)
text = gsub(text, '^([hH]i)([eéèê])(̍?[nt])', convert)
text = gsub(text, '([%-%s][hH]i)([eéèê])(̍?[nt])', convert)
return text
end
local function add(style, reading)
pfs_readings[style][#pfs_readings[style] + 1] = reading
hrs_readings[style][#hrs_readings[style] + 1] = export.hrs(reading, style)
hpy_readings[style][#hpy_readings[style] + 1] = export.pfs_to_hpy(reading)
ipa_readings[style][#ipa_readings[style] + 1] = export.ipa(reading, style)
end
local ns = true
for _, reading in ipairs(mw.text.split(decomp[i], ' / ')) do
if match(reading,':') then
local pair = mw.text.split(reading, ':')
if pair[1] == 'ns' then
add('n', pair[2])
add('s', pair[2])
else
ns = false
add(pair[1], pair[2])
end
elseif match(reading,'[yY]') or reading ~= southern(reading) then
ns = false
add('n', reading)
add('s', southern(reading))
else
add('n', reading)
add('s', reading)
end
end
if convtype == '' then
if ns then
display = display .. display_format('ns')
else
if #pfs_readings['n'] ~= 0 then display = display .. display_format('n') end
if #pfs_readings['s'] ~= 0 then display = display .. display_format('s') end
end
else
for i, reading in ipairs(pfs_readings['s']) do
pfs_readings['n'][#pfs_readings['n'] + 1] = pfs_readings['s'][i]
end
show['pfs'] = table.concat(TableTools.removeDuplicates(pfs_readings['n']), ' / ')
end
end
if match(decomp[i],'hrs') then
decomp[i] = gsub(decomp[i], 'hrs=', '')
local supported = { "h" }
local hrs_readings = { ['h'] = {}, ['d'] = {}, ['r'] = {}, ['z'] = {} }
for _, reading in ipairs(mw.text.split(decomp[i], ' / ')) do
if find(reading, ':') then
local dialects, rom = match(reading, '^([hdrz,]+):(.+)$')
for _, dialect in ipairs(mw.text.split(dialects, ',')) do
table.insert(hrs_readings[dialect], rom)
end
else
error("Missing dialect label for Hakka Romanization.")
end
end
local dialect_link = {
['h'] = "[[w:Hailu dialect|Hailu]], incl. [[w:Zhudong|Zhudong]]",
['d'] = "[[w:zh:大埔話|Dabu]]",
['r'] = "[[w:Raoping Hakka|Raoping]]",
['z'] = "[[w:zh:詔安客語|Zhao'an]]",
}
if convtype == '' then
for _, dialect in ipairs(supported) do
display = display .. string.format("\n** <small>(''%s'')</small>", dialect_link[dialect])
display = display .. "\n*** <small>''[[w:Taiwanese Hakka Romanization System|Hakka Romanization System]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. export.hrs_process(hrs_readings[dialect], dialect, "rom") .. '</span>'
display = display .. '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]]</small>: <span class="IPA">/' .. export.hrs_process(hrs_readings[dialect], dialect, "ipa") .. "/</span>"
end
else
show['hrs'] = export.hrs_process(hrs_readings['h'], 'h', "rom") --TO-DO: multiple dialects
end
end
if match(decomp[i],'gd') then
local gd_formatted = gsub(gsub(decomp[i], 'gd=', ''), '([1-6])', '<sup>%1</sup>')
if convtype == '' then
display = display .. "\n** <small>(''[[w:Meixian dialect|Meixian]]'')</small>"
display = display .. "\n*** <small>''[[w:Guangdong_Romanization#Hakka|Guangdong]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. gd_formatted .. '</span>'
display = display .. '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]]</small>: <span class="IPA">/' .. export.gd_to_ipa(decomp[i]) .. "/</span>"
else
show['gd'] = gd_formatted
end
end
if match(decomp[i],'ct') then
local ct_formatted = gsub(gsub(decomp[i], 'ct=', ''), '([1-5])', '<sup>%1</sup>')
if convtype == '' then
display = display .. "\n** <small>(''[[w:Changting dialect|Changting]]'')</small>"
display = display .. "\n*** <small>''Changting Pinyin''</small>: <span style=\"font-family: Consolas, monospace;\">" .. ct_formatted .. '</span>'
display = display .. '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]]</small>: <span class="IPA">/' .. export.ct_to_ipa(decomp[i]) .. "/</span>"
else
show['ct'] = ct_formatted
end
end
end
if convtype ~= '' then
local pfs = " <small>(''[[w:Sixian dialect|Sixian]], [[w:Pha̍k-fa-sṳ|PFS]]'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['pfs'] .. '</span>'
local gd = " <small>(''[[w:Meixian dialect|Meixian]], [[w:Guangdong_Romanization#Hakka|Guangdong]]'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['gd'] .. '</span>'
local hrs = " <small>(''[[w:Hailu dialect|Hailu]], [[w:Taiwanese Hakka Romanization System|HRS]]'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['hrs'] .. '</span>'
local ct = " <small>(''[[w:Changting dialect|Changting]], Changting Pinyin'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['ct'] .. '</span>'
if show['pfs'] ~= '' and show['hrs'] == '' and show['gd'] == '' and show['ct'] == '' then
display = display .. pfs
elseif show['gd'] ~= '' and show['pfs'] == '' and show['hrs'] == '' and show['ct'] == '' then
display = display .. gd
elseif show['hrs'] ~= '' and show['pfs'] == '' and show['gd'] == '' and show['ct'] == '' then
display = display .. hrs
elseif show['ct'] ~= '' and show['pfs'] == '' and show['hrs'] == '' and show['gd'] == '' then
display = display .. ct
else
display = display .. (show['pfs'] ~= '' and '\n*:' .. pfs or '')
display = display .. (show['hrs'] ~= '' and '\n*:' .. hrs or '')
display = display .. (show['gd'] ~= '' and '\n*:' .. gd or '')
display = display .. (show['ct'] ~= '' and '\n*:' .. ct or '')
end
end
return display
end
local function find_tone(text)
text = toNFD(text)
if find(text, '̂') then
return 1
elseif find(text, '̀') then
return 2
elseif find(text, '́') then
return 3
elseif find(text, '̍') then
return 6
elseif find(text, '[^n][ptkbdg]$') then
return 5
else
return 4
end
end
function export.ipa(text, dialect)
local syllables, initial, final, tone, tone_conv = {}, {}, {}, {}, {}
local ipa = {}
if type(text) == 'table' then text = text.args[1] end
text = gsub(gsub(gsub(lower(text), '%.', ''), '%s+$', ''), '%s+', '-')
syllables = mw.text.split(text, "-")
for i, syllable in ipairs(syllables) do
syllable = gsub(syllable, ",", "")
syllable = gsub(syllable,'o̍[ae]',{['o̍a']='ua̍',['o̍e']='ue̍'})
syllable = gsub(syllable,'[oóòôō][ae]',{['oa']='ua',['óa']='uá',['òa']='uà',['ôa']='uâ',['ōa']='uā',['oe']='ue',['óe']='ué',['òe']='uè',['ôe']='uê',['ōe']='uē'})
initial[i] = match(syllable, '^[mnptkcfvshyl]?[gh]?h?')
final[i] = sub(syllable, len(initial[i]) + 1, -1)
local initial_ipa = {
['ng'] = 'ŋ',
['ph'] = 'pʰ',
['th'] = 'tʰ',
['kh'] = 'kʰ',
['ch'] = 't͡s',
['chh'] = 't͡sʰ',
['y'] = 'i'
}
initial[i] = initial_ipa[initial[i]] or initial[i]
tone[i] = find_tone(final[i])
local final_conv = {
['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u', ['́'] = '',
['à'] = 'a', ['è'] = 'e', ['ì'] = 'i', ['ò'] = 'o', ['ù'] = 'u', ['̀'] = '',
['â'] = 'a', ['ê'] = 'e', ['î'] = 'i', ['ô'] = 'o', ['û'] = 'u', ['̂'] = '',
['ń'] = 'n', ['ǹ'] = 'n',
['̍'] = '',
['ṳ'] = 'ɨ',
}
final[i] = gsub(final[i], '[âêîôû̂àèìòù̀áéíóú́ńǹ̍ṳ]', final_conv)
if initial[i] == 'i' then
final[i] = (find(final[i], '^i[mnpt]?$') and '' or 'i') .. final[i]
initial[i] = dialect == 's' and '(j)' or ''
end
final[i] = gsub(final[i], '([ptk])$', '%1̚')
final[i] = gsub(final[i], 'ng$', 'ŋ')
final[i] = final[i] == 'ŋ' and 'ŋ̍' or final[i]
final[i] = gsub(final[i], 'er$', 'ə')
final[i] = gsub(final[i], '([aeiouɨ])([aeiouɨ])([aeiouɨ]?)', function(first, second, third)
if third ~= '' then
first = first .. '̯'
third = third .. '̯'
elseif first == 'i' or first == 'u' then
first = first .. '̯'
elseif second == 'i' or second == 'u' then
second = second .. '̯'
end
return first .. second .. third end)
end
for i, syllable in ipairs(syllables) do
local tone_ipa = {
[1] = '²⁴',
[2] = '¹¹',
[3] = '³¹',
[4] = '⁵⁵',
[5] = '²',
[6] = '⁵',
}
tone_conv[i] = tone_ipa[tone[i]]
if (tone[i] == 1 and find(tostring(tone[i+1]), '[146]') and not find(syllable, ",")) or (syllable == 'é' and dialect == 'n' and find(mw.title.getCurrentTitle().text, '仔') and find(text, '-é') and find(tostring(tone[i-1]), '[35]')) then
tone_conv[i] = tone_conv[i] .. '⁻¹¹'
end
ipa[i] = initial[i] .. final[i] .. tone_conv[i]
end
return gsub(table.concat(ipa, " "), ",", "")
end
function export.hrs(text, dialect)
if type(text) == 'table' then text = text.args[1] end
local syllables = mw.text.split(gsub(lower(text), ' ', '-'), "-")
for i, syllable in ipairs(syllables) do
-- check for commas
local comma = ''
if find(syllable, ',') then
comma = ','
syllable = sub(syllable, 1, -2)
end
-- change consonants
syllable = gsub(syllable,'[ptky]',{['p']='b',['t']='d',['k']='g',['y']=dialect == 's' and '(r)i' or 'i'})
syllable = gsub(syllable,'[bdgc]h',{['bh']='p',['dh']='t',['gh']='k',['ch']='z'})
syllable = gsub(syllable,'zh','c')
local palatal = {['z']='j',['c']='q',['s']='x',['i']=''}
syllable = gsub(syllable,'([zcsi])([iíìî])', function(a,b) return palatal[a]..b end)
-- find tones
local marks = { [1] = 'ˊ', [2] = 'ˇ', [3] = 'ˋ', [5] = 'ˋ' }
local tone = marks[find_tone(syllable)] or ''
-- remove tone marks and fix vowels
syllable = gsub(syllable, 'ṳ', 'ii')
syllable = gsub(toNFD(syllable), '[́̀̂̍]', '')
syllable = gsub(syllable, 'o([ae])', 'u%1')
-- add new tone marks
syllables[i] = syllable .. "<sup>" .. tone .. "</sup>" .. comma
end
return table.concat(syllables, " ")
end
function export.pfs_to_hpy(text)
if type(text) == 'table' then text = text.args[1] end
local syllables = mw.text.split(gsub(lower(text), ' ', '-'), "-")
for i, syllable in ipairs(syllables) do
if syllable == '...' then
syllables[i] = syllable
else
-- check for commas
local comma = ''
if find(syllable, ',') then
comma = ','
syllable = sub(syllable, 1, -2)
end
-- change consonants
syllable = gsub(syllable,'[ptk]',{['p']='b',['t']='d',['k']='g'})
syllable = gsub(syllable,'[bdgc]h',{['bh']='p',['dh']='t',['gh']='k',['ch']='z'})
syllable = gsub(syllable,'zh','c')
local palatal = {['z']='j',['c']='q',['s']='x'}
syllable = gsub(syllable,'([zcs])([iíìî])', function(a,b) return palatal[a]..b end)
-- find tones
local tone = find_tone(syllable)
-- remove tone marks and fix vowels
syllable = gsub(syllable, 'ṳ', 'i')
syllable = gsub(toNFD(syllable), '[́̀̂̍]', '')
syllable = gsub(syllable, 'o([ae])', 'u%1')
syllable = syllable == 'yu' and 'yiu' or syllable
syllable = gsub(syllable, '([iy])e([nd])', '%1a%2')
-- put everything together
syllables[i] = syllable .. '<sup>' .. tone .. '</sup>' .. comma
end
end
return table.concat(syllables, " ")
end
function export.gd_to_ipa(text)
local initial_conv = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["v"] = "ʋ",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
["j"] = "t͡s", ["q"] = "t͡sʰ", ["x"] = "s",
[""] = "",
}
local final_conv = {
["ii"] = "z̩", ["i"] = "i", ["u"] = "u",
["a"] = "a", ["ia"] = "ia", ["ua"] = "ua",
["ê"] = "e", ["iê"] = "ie", ["uê"] = "ue",
["o"] = "o", ["io"] = "io", ["uo"] = "uo",
["m"] = "m̩", ["n"] = "n̩",
["ai"] = "aɪ", ["iai"] = "iaɪ", ["uai"] = "uaɪ",
["oi"] = "oɪ",
["ui"] = "uɪ", ["iui"] = "iuɪ",
["au"] = "au", ["iau"] = "iau",
["êu"] = "eu",
["iu"] = "iu",
["em"] = "əm", ["im"] = "im",
["am"] = "am", ["iam"] = "iam",
["êm"] = "ɛm",
["en"] = "ən", ["in"] = "in",
["an"] = "an", ["ian"] = "ian", ["uan"] = "uan",
["ên"] = "ɛn", ["iên"] = "iɛn", ["uên"] = "uɛn",
["on"] = "ɔn", ["ion"] = "iɔn", ["uon"] = "uɔn",
["un"] = "un", ["iun"] = "iun",
["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ",
["ong"] = "ɔŋ", ["iong"] = "iɔŋ", ["uong"] = "uɔŋ",
["ung"] = "ʊŋ", ["iung"] = "iʊŋ",
["eb"] = "əp̚", ["ib"] = "ip̚",
["ab"] = "ap̚", ["iab"] = "iap̚",
["êb"] = "ɛp̚",
["ed"] = "ət̚", ["id"] = "it̚",
["ad"] = "at̚", ["iad"] = "iat̚", ["uad"] = "uat̚",
["êd"] = "ɛt̚", ["iêd"] = "iɛt̚", ["uêd"] = "uɛt̚",
["od"] = "ɔt̚",
["ud"] = "ut̚", ["iud"] = "iut̚",
["ag"] = "ak̚", ["iag"] = "iak̚", ["uag"] = "uak̚",
["og"] = "ɔk̚", ["iog"] = "iɔk̚", ["uog"] = "uɔk̚",
["ug"] = "ʊk̚", ["iug"] = "iʊk̚",
}
local tone_conv = {
["1"] = "⁴⁴", ["2"] = "¹¹",
["3"] = "³¹",
["4"] = "⁵³",
["5"] = "¹", ["6"] = "⁵",
["1*"] = "⁴⁴⁻³⁵",
["4*"] = "⁵³⁻⁵⁵",
}
if type(text) == 'table' then text = text.args[1] end
local words = mw.text.split(text, " / ")
local result = {}
for _, word in ipairs(words) do
word = gsub(gsub(gsub(word, 'gd=', ''), '[%.,]', ''), '%s+$', '')
local syllables = mw.text.split(word, '%s+')
local initial, final, tone, ipa = {}, {}, {}, {}
for i, syllable in ipairs(syllables) do
initial[i] = match(syllable, "^[bpmfvdtnlgkhzcsjqx]?g?")
final[i] = match(sub(syllable, len(initial[i]) + 1, -1), "^[^1-6]*")
if initial[i] == "" and find(final[i], "^i") then
error('Syllables starting with "i" need a "y" in front.')
end
final[i] = gsub(gsub(final[i], "^yi", "i"), "^y", "i")
if find(initial[i], "[zcs]") and final[i] == "i" then
final[i] = "ii"
end
if final[i] == "" then
final[i] = initial[i]
initial[i] = ""
end
tone[i] = match(syllable, "[1-6]$")
end
for i, syllable in ipairs(syllables) do
local ortho_pal = { --orthographic palatalization
["z"] = "j",
["c"] = "q",
["s"] = "x",
}
local ortho_alv = { --orthographic alveolars
["j"] = "z",
["q"] = "c",
["x"] = "s",
}
if find(initial[i], "^[zcs]$") and find(final[i], "^i[^i]") then
error("Initial should be " .. ortho_pal[initial[i]] .. "?")
end
if find(initial[i], "^[jqx]$") and find(final[i], "^[^i]") then
error("Initial should be " .. ortho_alv[initial[i]] .. "?")
end
initial[i] = initial_conv[initial[i]] or error(("Unrecognised initial: \"%s\""):format(initial[i]))
final[i] = final_conv[final[i]] or error(("Unrecognised final: \"%s\""):format(final[i]))
if match(tone[i], "[14]") and match(tone[i+1] or "", "[2345]") then
tone[i] = tone[i] .. "*"
end
if initial[i] == "" and final[i] == "e" and tone[i] == "3" and find(mw.title.getCurrentTitle().text, '仔') then
initial[i] = match(final[i-1] or '', '([mnŋpti])̚?$') or initial[i]
initial[i] = find(final[i-1] or '', 'u$') and 'ʋ' or initial[i]
initial[i] = find(final[i-1] or '', '[ao]$') and '(ʋ)' or initial[i]
initial[i] = find(final[i-1] or '', 'e$') and '(i)' or initial[i]
end
tone[i] = tone_conv[tone[i]]
ipa[i] = initial[i] .. final[i] .. tone[i]
end
table.insert(result, table.concat(ipa, " "))
end
return table.concat(result, "/, /")
end
function export.hrs_process(rom_list, dialect, process)
local dialect_names = {
["h"] = "hailu",
["d"] = "dabu",
["r"] = "raoping",
["z"] = "zhaoan",
}
if process == "rom" then
return gsub(table.concat(rom_list, " / "), "([ˊˇˋ˖])", "<sup>%1</sup>")
elseif process == "ipa" then
local ipa_readings = {}
for i, reading in ipairs(rom_list) do
ipa_readings[i] = export.hrs_to_ipa(reading, dialect_names[dialect])
end
return table.concat(ipa_readings, "/, /")
end
end
function export.hrs_to_ipa(text, dialect)
local initial_conv = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["v"] = "v", ["bb"] = "b",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
["j"] = "t͡s", ["q"] = "t͡sʰ", ["x"] = "s",
["zh"] = "t͡ʃ", ["ch"] = "t͡ʃʰ", ["sh"] = "ʃ", ["rh"] = "ʒ",
[""] = "",
}
local final_conv = {
["ii"] = "ɨ",
["i"] = "i", ["e"] = "e", ["a"] = "a", ["o"] = "o", ["u"] = "u",
["ie"] = "ie", ["eu"] = "eu", ["ieu"] = "ieu",
["ia"] = "ia", ["ua"] = "ua",
["ai"] = "ai", ["iai"] = "iai", ["uai"] = "uai",
["au"] = "au", ["iau"] = "iau",
["io"] = "io", ["oi"] = "oi", ["ioi"] = "ioi",
["iu"] = "iu", ["ui"] = "ui", ["iui"] = "iui",
["ue"] = "ue",
["iim"] = "ɨm", ["im"] = "im",
["em"] = "em", ["iem"] = "iem",
["am"] = "am", ["iam"] = "iam",
["iin"] = "ɨn", ["in"] = "in",
["en"] = "en", ["ien"] = "ien", ["uen"] = "uen",
["an"] = "an", ["ian"] = "ian", ["uan"] = "uan",
["on"] = "on", ["ion"] = "ion",
["un"] = "un", ["iun"] = "iun",
["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ",
["ong"] = "oŋ", ["iong"] = "ioŋ",
["ung"] = "uŋ", ["iung"] = "iuŋ",
["er"] = "ə",
["iib"] = "ɨp", ["ib"] = "ip",
["eb"] = "ep", ["ieb"] = "iep",
["ab"] = "ap", ["iab"] = "iap",
["iid"] = "ɨt", ["id"] = "it",
["ed"] = "et", ["ied"] = "iet", ["ued"] = "uet",
["ad"] = "at", ["iad"] = "iat", ["uad"] = "uat",
["od"] = "ot", ["iod"] = "iot",
["ud"] = "ut", ["iud"] = "iut",
["ag"] = "ak", ["iag"] = "iak", ["uag"] = "uak",
["og"] = "ok", ["iog"] = "iok",
["ug"] = "uk", ["iug"] = "iuk",
["m"] = "m̩", ["n"] = "n̩", ["ng"] = "ŋ̍",
}
local function get_tone(final, tone_mark, dialect)
local mark_to_value = {
["hailu"] = {
["ˋ"] = "53",
[""] = "55",
["ˊ"] = "24",
["ˇ"] = "11",
["˖"] = "33",
["d"] = "5",
["dˋ"] = "2",
}
}
local mark = (find(final, "[ptk]$") and "d" or "") .. tone_mark
return mark_to_value[dialect][mark] or ""
end
local function get_sandhi(syl_count, i, tone, dialect)
if dialect == "hailu" then
if i < syl_count then
if tone == "24" then
return "33"
elseif tone == "5" then
return "2"
end
end
end
return ""
end
local sup = {
["1"] = "¹", ["2"] = "²", ["3"] = "³", ["4"] = "⁴", ["5"] = "⁵", ["-"] = "⁻",
}
local function hrs_check_invalid(text)
if not text then
return nil
end
local common_errors = "[´`+⁺^]"
local error_correction = {
["´"] = "ˊ",
["`"] = "ˋ",
["+"] = "˖",
["⁺"] = "˖",
["^"] = "ˆ",
}
local correct = gsub(text, common_errors, error_correction)
if text ~= correct then
error("Invalid Hakka Romanization \"" .. text .. "\": please change it to \"" .. correct .. "\"")
end
end
--check for common errors in input
hrs_check_invalid(text)
local syllables, initial, final, tone, sandhi, ipa = {}, {}, {}, {}, {}, {}
syllables = mw.text.split(text, " ")
for i, syllable in ipairs(syllables) do
syllable = gsub(syllable, ",", "")
--find initial, final, tone
initial[i] = match(syllable, "^([bpmfvdtnlgkhzcsjqxr][ghb]?)") or ""
tone[i] = match(syllable, "([ˊˇˋ˖])$") or ""
final[i] = sub(syllable, len(initial[i]) + 1, -1 - len(tone[i]))
--convert initial, final, tone
initial[i] = initial_conv[initial[i]] or ""
final[i] = final_conv[final[i]] or ""
tone[i] = get_tone(final[i], tone[i], dialect)
sandhi[i] = get_sandhi(#syllables, i, tone[i], dialect)
ipa[i] = initial[i] .. final[i] ..
gsub(tone[i] .. (sandhi[i] ~= "" and "-" or "") .. sandhi[i], "[12345%-]", sup)
end
return gsub(table.concat(ipa, " "), ",", "")
end
function export.ct_to_ipa(text)
local initial_conv = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["v"] = "v",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
["j"] = "t͡ɕ", ["q"] = "t͡ɕʰ", ["x"] = "ɕ",
["zh"] = "t͡ʃ", ["ch"] = "t͡ʃʰ", ["sh"] = "ʃ",
[""] = "",
}
local final_conv = {
["ï"] = "ʐ̩", ["i"] = "i", ["u"] = "u",
["a"] = "a", ["ia"] = "ia", ["ua"] = "ua",
["o"] = "o", ["io"] = "io",
["e"] = "e", ["ie"] = "ie", ["ue"] = "ue",
["ai"] = "ai", ["ui"] = "ui",
["ao"] = "ɔ", ["iao"] = "iɔ",
["eu"] = "əɯ", ["iu"] = "iəɯ", ["ieu"] = "iəɯ",
["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ",
["eng"] = "eŋ", ["ieng"] = "ieŋ", ["ueng"] = "ueŋ",
["ing"] = "iŋ", ["ung"] = "uŋ",
["ông"] = "ɔŋ", ["iông"] = "iɔŋ",
["ong"] = "oŋ", ["iong"] = "ioŋ",
["ng"] = "ŋ̍",
}
local tone_conv = {
["1"] = "³³", ["2"] = "²⁴",
["3"] = "⁴²",
["4"] = "⁵⁴", ["5"] = "²¹",
["3*"] = "⁴²⁻³³",
}
if type(text) == 'table' then text = text.args[1] end
local words = mw.text.split(text, " / ")
local result = {}
for _, word in ipairs(words) do
word = gsub(gsub(gsub(word, 'ct=', ''), '[%.,]', ''), '%s+$', '')
local syllables = mw.text.split(word, '%s+')
local initial, final, tone, ipa = {}, {}, {}, {}
for i, syllable in ipairs(syllables) do
initial[i] = match(syllable, "^[bpmfvdtnlgkhzcsjqx]?[gh]?")
final[i] = match(sub(syllable, len(initial[i]) + 1, -1), "^[^1-5]*")
if initial[i] == "" and final[i] == "wung" then
error('Please change "wung" to "ng".')
end
if initial[i] == "" and find(final[i], "^i") then
error('Syllables starting with "i" need a "y" in front.')
end
final[i] = gsub(gsub(final[i], "^yi", "i"), "^y", "i")
if initial[i] == "" and find(final[i], "^u") then
error('Syllables starting with "u" need a "w" in front.')
end
final[i] = gsub(gsub(final[i], "^wu", "u"), "^w", "u")
if final[i] == "" then
final[i] = initial[i]
initial[i] = ""
end
tone[i] = match(syllable, "[1-5]$")
end
for i, syllable in ipairs(syllables) do
initial[i] = initial_conv[initial[i]] or error(("Unrecognised initial: \"%s\""):format(initial[i]))
final[i] = final_conv[final[i]] or error(("Unrecognised final: \"%s\""):format(final[i]))
if initial[i]:find("[zcs]") then
final[i] = final[i]:gsub("ʐ", "z")
end
if (tone[i] == "3") and (tone[i+1] == "3") then
tone[i] = tone[i] .. "*"
end
-- TODO: tone sandhi?
tone[i] = tone_conv[tone[i]]
ipa[i] = initial[i] .. final[i] .. tone[i]
end
table.insert(result, table.concat(ipa, " "))
end
return table.concat(result, "/, /")
end
return export