Documentation for this module may be created at မဝ်ဂျူ:cdo-pron/doc

local export = {}

local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local len = mw.ustring.len
local match = mw.ustring.match
local lower = mw.ustring.lower

local split_tone = {
	["ă"] = "a".."̆", ["ĕ"] = "e".."̆", ["ĭ"] = "i".."̆", ["ŏ"] = "o".."̆", ["ŭ"] = "u".."̆",
	["ā"] = "a".."̄", ["ē"] = "e".."̄", ["ī"] = "i".."̄", ["ō"] = "o".."̄", ["ū"] = "u".."̄",
	["á"] = "a".."́", ["é"] = "e".."́", ["í"] = "i".."́", ["ó"] = "o".."́", ["ú"] = "u".."́",
	["à"] = "a".."̀", ["è"] = "e".."̀", ["ì"] = "i".."̀", ["ò"] = "o".."̀", ["ù"] = "u".."̀",
	["â"] = "a".."̂", ["ê"] = "e".."̂", ["î"] = "i".."̂", ["ô"] = "o".."̂", ["û"] = "u".."̂",
}

local tone_from_mark = {
	[""] = 1, ["̆"] = 1, ["̆k"] = 7, ["̆h"] = 7,
	["̄"] = 2,
	["́"] = 3,
	["́k"] = "4A", ["́h"] = "4B",
	["̀"] = 5,
	["̂"] = 6
}

local initial_ipa = {
	["b"] = { ["unchanged"] = "p", ["lenited"] = "<sup>(p-)</sup>β", ["nasal"] = "<sup>(p-)</sup>m" },
	["p"] = { ["unchanged"] = "pʰ", ["lenited"] = "<sup>(pʰ-)</sup>β", ["nasal"] = "<sup>(pʰ-)</sup>m" },
	["m"] = { ["unchanged"] = "m", ["lenited"] = "m", ["nasal"] = "m" },
	["d"] = { ["unchanged"] = "t", ["lenited"] = "<sup>(t-)</sup>l", ["nasal"] = "<sup>(t-)</sup>n" },
	["t"] = { ["unchanged"] = "tʰ", ["lenited"] = "<sup>(tʰ-)</sup>l", ["nasal"] = "<sup>(tʰ-)</sup>n" },
	["n"] = { ["unchanged"] = "nˡ", ["lenited"] = "nˡ", ["nasal"] = "nˡ" },
	["l"] = { ["unchanged"] = "l̃", ["lenited"] = "l̃", ["nasal"] = "<sup>(l-)</sup>nˡ" },
	["g"] = { ["unchanged"] = "k", ["lenited"] = "<sup>(k-)</sup>", ["nasal"] = "<sup>(k-)</sup>ŋ" },
	["k"] = { ["unchanged"] = "kʰ", ["lenited"] = "<sup>(kʰ-)</sup>", ["nasal"] = "<sup>(kʰ-)</sup>ŋ" },
	["ng"] = { ["unchanged"] = "ŋ", ["lenited"] = "ŋ", ["nasal"] = "ŋ" },
	["h"] = { ["unchanged"] = "h", ["lenited"] = "<sup>(h-)</sup>", ["nasal"] = "<sup>(h-)</sup>ŋ" },
	["c"] = { ["unchanged"] = "t͡s", ["lenited"] = "<sup>(t͡s-)</sup>ʒ", ["nasal"] = "<sup>(t͡s-)</sup>ʒ" },
	["ch"] = { ["unchanged"] = "t͡sʰ", ["lenited"] = "<sup>(t͡sʰ-)</sup>ʒ", ["nasal"] = "<sup>(t͡sʰ-)</sup>ʒ" },
	["s"] = { ["unchanged"] = "s", ["lenited"] = "<sup>(s-)</sup>l", ["nasal"] = "<sup>(s-)</sup>n" },
	[""] = { ["unchanged"] = "", ["lenited"] = "", ["nasal"] = "<sup>(Ø-)</sup>ŋ" },
}

local final_ipa = {
	["a"]	= { ["closed"] = "a",		["open"] = "ɑ"	}, 
	["a̤"]	= { ["closed"] = "ɛ",		["open"] = "ɑ"	}, 
	["ae̤"]	= { ["closed"] = "œ",		["open"] = "ɔ"	}, 
	["ae̤h"]	= { ["closed"] = "øyʔ",	["open"] = "ɔyʔ"	},--see [[茉莉]]
	["ae̤k"]	= { ["closed"] = "øyʔ",	["open"] = "ɔyʔ"	},
	["ae̤ng"]	= { ["closed"] = "øyŋ",	["open"] = "ɔyŋ"	}, 
	["ah"]	= { ["closed"] = "aʔ",	["open"] = "ɑʔ"	}, 
	["a̤h"]	= { ["closed"] = "ɛʔ",	["open"] = "ɑʔ"	}, 
	["ai"]	= { ["closed"] = "ai",	["open"] = "ɑi"	}, 
	["aik"]	= { ["closed"] = "ɛiʔ",	["open"] = "aiʔ"	}, 
	["aing"]	= { ["closed"] = "ɛiŋ",	["open"] = "aiŋ"	}, 
	["aiu"]	= { ["closed"] = "ɛu",	["open"] = "ɑu"	}, 
	["ak"]	= { ["closed"] = "aʔ",	["open"] = "ɑʔ"	}, 
	["ang"]	= { ["closed"] = "aŋ",	["open"] = "ɑŋ"	}, 
	["au"]	= { ["closed"] = "au",	["open"] = "ɑu"	}, 
	["auk"]	= { ["closed"] = "ouʔ",	["open"] = "ɑuʔ"	}, 
	["aung"]	= { ["closed"] = "ouŋ",	["open"] = "ɑuŋ"	}, 
	["e"]	= { ["closed"] = "i",		["open"] = "ɛi"	}, 
	["e̤"]	= { ["closed"] = "œ",		["open"] = "ɔ"	}, 
	["eh"]	= { ["closed"] = "ɛiʔ",	["open"] = "ɛiʔ"	},--see [[茉莉]]
	["e̤h"]	= { ["closed"] = "œʔ",	["open"] = "œʔ"	}, 
	["ek"]	= { ["closed"] = "ɛiʔ",	["open"] = "aiʔ"	}, 
	["ek2"]	= { ["closed"] = "iʔ",	["open"] = "ɛiʔ"	}, 
	["e̤k"]	= { ["closed"] = "øyʔ",	["open"] = "ɔyʔ"	}, 
	["eng"]	= { ["closed"] = "ɛiŋ",	["open"] = "aiŋ"	}, 
	["eng2"]	= { ["closed"] = "iŋ",	["open"] = "ɛiŋ"	}, 
	["e̤ng"]	= { ["closed"] = "øyŋ",	["open"] = "ɔyŋ"	}, 
	["eu"]	= { ["closed"] = "ɛu",	["open"] = "ɑu"	}, 
	["eu2"]	= { ["closed"] = "ieu",	["open"] = "iɛu"	}, 
	["e̤ṳ"]	= { ["closed"] = "y",		["open"] = "øy"	}, 
	["e̤ṳk"]	= { ["closed"] = "yʔ",	["open"] = "øyʔ"	}, 
	["e̤ṳng"]	= { ["closed"] = "yŋ",	["open"] = "øyŋ"	}, 
	["i"]	= { ["closed"] = "i",		["open"] = "ɛi"	}, 
	["ia"]	= { ["closed"] = "ia",	["open"] = "iɑ"	}, 
	["iah"]	= { ["closed"] = "iaʔ",	["open"] = "iɑʔ"	}, 
	["iak"]	= { ["closed"] = "iaʔ",	["open"] = "iɑʔ"	}, 
	["iang"]	= { ["closed"] = "iaŋ",	["open"] = "iɑŋ"	}, 
	["iau"]	= { ["closed"] = "iau",	["open"] = "iau"	}, 
	["ie"]	= { ["closed"] = "ie",	["open"] = "iɛ"	}, 
	["ieh"]	= { ["closed"] = "ieʔ",	["open"] = "iɛʔ"	}, 
	["iek"]	= { ["closed"] = "ieʔ",	["open"] = "iɛʔ"	}, 
	["ieng"]	= { ["closed"] = "ieŋ",	["open"] = "iɛŋ"	}, 
	["ieu"]	= { ["closed"] = "ieu",	["open"] = "iɛu"	}, 
	["ih"]	= { ["closed"] = "iʔ",	["open"] = "ɛiʔ"	}, 
	["ik"]	= { ["closed"] = "iʔ",	["open"] = "ɛiʔ"	}, 
	["ing"]	= { ["closed"] = "iŋ",	["open"] = "ɛiŋ"	}, 
	["io"]	= { ["closed"] = "yo",	["open"] = "yɔ"	}, 
	["ioh"]	= { ["closed"] = "yoʔ",	["open"] = "yɔʔ"	}, 
	["iok"]	= { ["closed"] = "yoʔ",	["open"] = "yɔʔ"	}, 
	["iong"]	= { ["closed"] = "yoŋ",	["open"] = "yɔŋ"	}, 
	["iu"]	= { ["closed"] = "ieu",	["open"] = "iɛu"	}, 
	["ng"]	= { ["closed"] = "ŋ̍",		["open"] = "ŋ̍"	},
	["o"]	= { ["closed"] = "u",		["open"] = "ou"	}, 
	["o̤"]	= { ["closed"] = "o",		["open"] = "ɔ"	}, 
	["oh"]	= { ["closed"] = "uʔ",	["open"] = "ouʔ"	}, 
	["o̤h"]	= { ["closed"] = "oʔ",	["open"] = "ɔʔ"	}, 
	["oi"]	= { ["closed"] = "øy",	["open"] = "ɔy"	}, 
	["oi2"]	= { ["closed"] = "ui",	["open"] = "ui"	}, 
	["o̤i"]	= { ["closed"] = "øy",	["open"] = "ɔy"	}, 
	["ok"]	= { ["closed"] = "ouʔ",	["open"] = "ɔuʔ"	}, 
	["ok2"]	= { ["closed"] = "uʔ",	["open"] = "ouʔ"	}, 
	["o̤k"]	= { ["closed"] = "oʔ",	["open"] = "ɔʔ"	}, --see [[汝各儂]]
	["ong"]	= { ["closed"] = "ouŋ",	["open"] = "ɔuŋ"	}, 
	["ong2"]	= { ["closed"] = "uŋ",	["open"] = "ouŋ"	}, 
	["u"]	= { ["closed"] = "u",		["open"] = "ou"	}, 
	["ṳ"]	= { ["closed"] = "y",		["open"] = "øy"	}, 
	["ua"]	= { ["closed"] = "ua",	["open"] = "uɑ"	}, 
	["uah"]	= { ["closed"] = "uaʔ",	["open"] = "uɑʔ"	}, 
	["uai"]	= { ["closed"] = "uai",	["open"] = "uɑi"	}, 
	["uak"]	= { ["closed"] = "uaʔ",	["open"] = "uɑʔ"	}, 
	["uang"]	= { ["closed"] = "uaŋ",	["open"] = "uɑŋ"	}, 
	["ui"]	= { ["closed"] = "ui",	["open"] = "ui"	}, 
	["uk"]	= { ["closed"] = "uʔ",	["open"] = "ouʔ"	}, 
	["ṳk"]	= { ["closed"] = "yʔ",	["open"] = "øyʔ"	}, 
	["ung"]	= { ["closed"] = "uŋ",	["open"] = "ouŋ"	}, 
	["ṳng"]	= { ["closed"] = "yŋ",	["open"] = "øyŋ"	}, 
	["uo"]	= { ["closed"] = "uo",	["open"] = "uɔ"	}, 
	["uoh"]	= { ["closed"] = "uoʔ",	["open"] = "uɔʔ"	}, 
	["uoi"]	= { ["closed"] = "ui",	["open"] = "ui"	}, 
	["uok"]	= { ["closed"] = "uoʔ",	["open"] = "uɔʔ"	}, 
	["uong"]	= { ["closed"] = "uoŋ",	["open"] = "uɔŋ"	}, 
}

local tone_ipa = {
	["1"] = "⁵⁵", --陰平
	["2"] = "³³", --上聲
	["3"] = "²¹³", --陰去
	["4A"] = "²⁴", --陰入-甲 (-k)
	["4B"] = "²⁴", --陰入-乙 (-h)
	["5"] = "⁵³", --陽平
	["6"] = "²⁴²", --陽去
	["7"] = "⁵", --陽入
	["8"] = "²¹", --半陰去
	["9"] = "³⁵", --半陽去
	["-"] = "⁻",
	["("] = "⁽",
	[")"] = "⁾",
}

local tone_sandhi = {
	["A-I"] = "1", ["A-II"] = "1", ["A-III"] = "5", ["A-IV"] = "5",
	["B-I"] = "8", ["B-II"] = "8", ["B-III"] = "9", ["B-IV"] = "1",
	["C-I"] = "1", ["C-II"] = "2", ["C-III"] = "2", ["C-IV"] = "8",

	["A-A-I"] = "8-1", ["A-A-II"] = "8-1", ["A-A-III"] = "8-5", ["A-A-IV"] = "8-5",
	["A-B-I"] = "8-8", ["A-B-II"] = "8-8", ["A-B-III"] = "8-9", ["A-B-IV"] = "8-1",
	
	["B-A-I"] = "8-1", ["B-A-II"] = "8-1", ["B-A-III"] = "8-5", ["B-A-IV"] = "8-5",
	["B-B-I"] = "8-8", ["B-B-II"] = "8-8", ["B-B-III"] = "8-9", ["B-B-IV"] = "8-1",
	
	["C-A-I"] = "8-1", ["C-A-II"] = "8-1", ["C-A-III"] = "8-5", ["C-A-IV"] = "8-5",
	["C-B-I"] = "8-8", ["C-B-II"] = "8-8", ["C-B-III"] = "8-9", ["C-B-IV"] = "8-1",
	
	["A-C-I"] = "1-1", ["A-C-II"] = "1-1", ["A-C-III"] = "5-2", ["A-C-IV"] = "5-8",
	["B-C-I"] = "9-2", ["B-C-II"] = "9-2", ["B-C-III"] = "9-2", ["B-C-IV"] = "1-8",
	["C-C-I"] = "2-2", ["C-C-II"] = "2-2", ["C-C-III"] = "2-2", ["C-C-IV"] = "8-8",
}

local sylcat = {
	[1] = {
		["1"] = "A", ["3"] = "A", ["4B"] = "A", ["6"] = "A",
		["2"] = "B", ["4A"] = "B",
		["5"] = "C", ["7"] = "C"
	},
	[2] = {
		["1"] = "I", 
		["5"] = "II", ["7"] = "II",
		["2"] = "III",
		["3"] = "IV", ["6"] = "IV", ["4A"] = "IV", ["4B"] = "IV"
	}
}

local diminutive_sandhi = {
	["1"] = "2", ["2"] = "2", ["5"] = "2",
	["3"] = "8", ["4A"] = "8", ["4B"] = "8", ["6"] = "8",
	["7"] = "7"
}

local dual_rimes = {
	["ong"] = true, ["ok"] = true,
	["eng"] = true, ["ek"] = true,
	["eu"] = true,
	["oi"] = true,
}

local neg_assim = {
	["labial"] = "<sup>(ŋ̍-)</sup>m̩",
	["dental"] = "<sup>(ŋ̍-)</sup>n̩",
	["velar"] = "<sup>(ŋ̍-)</sup>ŋ̍",
	["alone"] = "<sup>(ŋ̍-)</sup>ŋ̍/m̩/n̩",
}

local neg_type = {
	["b"] = "labial", ["p"] = "labial", ["m"] = "labial",
	["d"] = "dental", ["t"] = "dental", ["n"] = "dental", ["l"] = "dental", ["s"] = "dental", ["c"] = "dental",
	["✘"] = "alone",
}

local initial_string = "^([bpmdtnlgkhcs]?[gh]?)"

function export.rom(text)
	return (text
		:gsub("/", " / ")
		:gsub("[!\\]", "")
		:gsub(">([^> %-]+)", "<sup>→%1</sup>"))
end

function export.sentence(text)
	local sentence = {}
	text = gsub(text, "[,%.%?!;:']", "")
	for word in mw.text.gsplit(lower(text), " ", true) do
		table.insert(sentence, export.ipa(word))
	end
	return table.concat(sentence, " ")
end

local function determ_tone(text)
	local tone = gsub(gsub(text, ".", split_tone), "^[^̆̄́̀̂hk]*([̆̄́̀̂]?)[^̆̄́̀̂hk]*([hk]?)$", function(tone_symbol, coda)
		return tone_from_mark[tone_symbol..coda] end)
	return tone
end

function export.ipa(text, feature)
	if type(text) == "table" then
		text = text.args[1]
	end
	text = lower(text)
	local phrase_result = {}
	local words = mw.text.split(text, "/")
	for _, word in ipairs(words) do
		local word_result = {}
		local parts = mw.text.split(word, " ")
		for _, part in ipairs(parts) do
			local initial, final, tone, tone_conv, ipa, exc = {}, {}, {}, {}, {}, {}
			local lenition_blocked, ablaut_blocked, diminutive = {}, {}, {}
			local syllables = mw.text.split(part, "-")
			for index, syllable in ipairs(syllables) do
				syllable = gsub(syllable, "\\$", function(diminutive_syllable) diminutive[index] = true return "" end)
				syllable = gsub(syllable, "!$", function(ablaut_syllable) ablaut_blocked[index] = true return "" end)
				syllable = gsub(syllable, "%*", function(captured_initial) lenition_blocked[index] = true return "" end)
				if match(syllable, ">") then
					tone[index] = determ_tone(gsub(gsub(gsub(syllable, ">[^>]+$", ""), initial_string, ""), ".", split_tone))
					syllable = gsub(syllable, "[^>]+>", "")
					exc[index] = determ_tone(gsub(syllable, initial_string, ""))
				end
				initial[index] = match(syllable, initial_string)
				final[index] = sub(syllable, len(initial[index]) + 1, -1)
				final[index] = gsub(final[index], ".", split_tone)
				tone[index] = exc[index] and tone[index] or determ_tone(final[index])
				final[index] = gsub(final[index], "[̆̄́̀̂]", "")
				if dual_rimes[final[index]] and match(tostring(tone[index]), "[346]") then
					final[index] = final[index] .. "2"
				end
				final[index] = match(initial[index] .. final[index], "[dtnlcs]h?io") and gsub(final[index], "io", "uo") or final[index]
				if (initial[index] .. final[index]) == "ng" then
					initial[index], final[index] = "", "ng"
				end
			end
			
			if #syllables == 1 or feature == "no_sandhi" then
				tone_conv = tone
			
			elseif #syllables == 2 then
				tone_conv = {
					tone[1].."-"..(tone_sandhi[sylcat[1][exc[1] or tone[1]].."-"..sylcat[2][tone[2]]]),
					tone[2]
				}
		
			elseif #syllables == 3 then
				sandhi = mw.text.split(tone_sandhi[sylcat[1][exc[1] or tone[1]].."-"..
					sylcat[1][exc[2] or tone[2]].."-"..sylcat[2][tone[3]]], "-")
				tone_conv = {
					tone[1].."-"..sandhi[1],
					tone[2].."-"..sandhi[2],
					tone[3]
				}
		
			elseif #syllables == 4 then
				tone_conv = {
					tone[1].."-"..tone_sandhi[sylcat[1][exc[1] or tone[1]].."-"..sylcat[2][tone[2]]],
					tone[2].."(-8)",
					tone[3].."-"..tone_sandhi[sylcat[1][exc[3] or tone[3]].."-"..sylcat[2][tone[4]]],
					tone[4]
				}
				
			end
			
			for index = 1, #syllables do
				if diminutive[index] then tone_conv[index] = gsub(tone_conv[index], "%-.+$", "-" .. diminutive_sandhi[tone[index]]) end
				if (match(tostring(tone_conv[index]), "[346][AB]?$") and (#syllables == 1 or index == #syllables)) or ablaut_blocked[index] then
					final[index] = final_ipa[final[index]]["open"]
				else
					final[index] = final_ipa[final[index]]["closed"]
				end
				local initial_state = (index == 1 or match(syllables[index-1], "k!?\\?$") or lenition_blocked[index])
					and "unchanged" or (match(final[index-1], "[ŋ̩̍]$") and "nasal" or "lenited")
					
				initial[index] = initial_ipa[initial[index]][initial_state]
				if final[index] == "ŋ̍" then
					final[index] = neg_assim[neg_type[sub(syllables[index + 1] or "✘", 1, 1)] or "velar"]
				end
				tone_conv[index] = gsub(tone_conv[index], "([1-9AB]+)%-([1-9AB]+)", function(original, sandhi)
					if original == sandhi then
						return original
					end end)
				tone_conv[index] = gsub(tone_conv[index], "([1-9%-%(%)][AB]?)", tone_ipa)
				ipa[index] = initial[index] .. final[index] .. tone_conv[index]
			end
			table.insert(word_result, table.concat(ipa, " "))
		end
		table.insert(phrase_result, table.concat(word_result, " "))
	end
	return table.concat(phrase_result, "/, /")
end

return export