Documentation for this module may be created at မဝ်ဂျူ:yo-pron/doc

local export = {}

local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("yo")

local u = mw.ustring.char

local rsubn = mw.ustring.gsub

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end


local ACUTE = u(0x0301) -- acute =  ́
local GRAVE = u(0x0300) -- grave =  ̀
local MACRON = u(0x0304) -- macron =  ̄
local DOT = u(0x0323) -- dot =  ̣
local NASAL = u(0x0303) -- nasal =  ̃

local consonants = {
	["b"] = "b",
	["d"] = "d",
	["f"] = "f",
	["g"] = "ɡ",
	["gb"] = "ɡ͡b",
	["h"] = "h",
	["j"] = "d͡ʒ",
	["k"] = "k",
	["l"] = "l",
	["m"] = "m",
	["n"] = "n",
	["p"] = "k͡p",
	["r"] = "ɾ",
	["s"] = "s",
	["ṣ"] = "ʃ",
	["t"] = "t",
	["w"] = "w",
	["y"] = "j",
}

local vowels = {
	["a"] = "a",
	["e"] = "e",
	["ẹ"] = "ɛ",
	["i"] = "i",
	["o"] = "o",
	["ọ"] = "ɔ",
	["u"] = "u",
}

local nasals = {
	["an"] = "ã",
	["ẹn"] = "ɛ̃",
	["in"] = "ĩ",
	["ọn"] = "ɔ̃",
	["un"] = "ũ",
	["m"] = "m",
	["n"] = "ŋ",
}

function export.IPA(text, loanword, downstep)
	text = mw.ustring.lower(text)
	text = mw.ustring.gsub(text, "%-", "z")
	text = mw.ustring.toNFD(text)
	text = mw.ustring.gsub(text, ".[" .. DOT .. "]", {
		["e" .. DOT] = "ẹ",
		["o" .. DOT] = "ọ",
		["s" .. DOT] = "ṣ",
	})

	text = text .. "x"

	text = mw.ustring.gsub(text, "%s+", " ")
	text = mw.ustring.gsub(text, "^ ", "")
	text = mw.ustring.gsub(text, " $", "")
	text = mw.ustring.gsub(text, "\n", "")
	text = mw.ustring.gsub(text, "'", "")
	text = mw.ustring.gsub(text, "ẹ", "ɛ")
	text = mw.ustring.gsub(text, "gb", "ɡ͡b")
	text = mw.ustring.gsub(text, "g", "ɡ")
	text = mw.ustring.gsub(text, "j", "d͡ʒ")
	text = mw.ustring.gsub(text, "ọ", "ɔ")
	text = mw.ustring.gsub(text, "p", "k͡p")
	text = mw.ustring.gsub(text, "r", "ɾ")
	text = mw.ustring.gsub(text, "ṣ", "ʃ")
	text = mw.ustring.gsub(text, "y", "j")

	text = mw.ustring.gsub(text, "m([́̀̄])", "ŋ" .. "%1")
	text = mw.ustring.gsub(text, "n([́̀̄])", "ŋ" .. "%1")

	text = rsub_repeatedly(text, "([aeɛioɔu])([aeɛioɔunbdfɡhjklmnŋɾsʃtwxz ])", "%1" .. MACRON .. "%2")

	text = mw.ustring.gsub(text, "([aɛiɔu])([́̀̄])n([bdfɡhjklmnŋɾsʃtwxz ])", "%1" .. NASAL .. "%2%3")

	--TODO: FIX LOANWORDS
	if not loanword then
		text = mw.ustring.gsub(text, "n([aiu])([́̀̄])", "n%1" .. NASAL .. "%2")
	end

	if not loanword then
		text = mw.ustring.gsub(text, "m([iɔu])([́̀̄])", "m%1" .. NASAL .. "%2")
	end
	text = rsub_repeatedly(text, "([aɛiɔu])([̃])([́̀̄])([aɛiɔu])([́̀̄])", "%1%2%3%4" .. NASAL .. "%5")

	text = mw.ustring.gsub(text, "([aeɛioɔu])([́̀̄])", "%1" .. "%2")

	text = mw.ustring.gsub(text, "a([̃])([́̀̄])za","a" .. "%1" .. "%2a")
	text = mw.ustring.gsub(text, "ɛ([̃])([́̀̄])zɛ","ɛ" .. "%1" .. "%2ɛ")
	text = mw.ustring.gsub(text, "i([̃])([́̀̄])zi","i" .. "%1" .. "%2i")
	text = mw.ustring.gsub(text, "ɔ([̃])([́̀̄])zɔ","ɔ" .. "%1" .. "%2ɔ")
	text = mw.ustring.gsub(text, "u([̃])([́̀̄])zu","u" .. "%1" .. "%2u")

	text = mw.ustring.gsub(text, "z", ".")

	-- Split into syllables
	text = mw.ustring.gsub(text, "a([́̀̄])([eɛioɔubdfɡhjklmnŋɾsʃtw])","a" .. "%1.%2")
	text = mw.ustring.gsub(text, "a([̃])([́̀̄])([eɛioɔubdfɡhjklmnŋɾsʃtw])","a" .. "%1" .. "%2.%3")
	text = mw.ustring.gsub(text, "e([́̀̄])([aɛioɔubdfɡhjklmnŋɾsʃtw])","e" .. "%1.%2")
	text = mw.ustring.gsub(text, "ɛ([́̀̄])([aeioɔubdfɡhjklmnŋɾsʃtw])","ɛ" .. "%1.%2")
	text = mw.ustring.gsub(text, "ɛ([̃])([́̀̄])([aeioɔubdfɡhjklmnŋɾsʃtw])","ɛ" .. "%1" .. "%2.%3")
	text = mw.ustring.gsub(text, "i([́̀̄])([aeɛoɔubdfɡhjklmnŋɾsʃtw])","i" .. "%1.%2")
	text = mw.ustring.gsub(text, "i([̃])([́̀̄])([aeɛoɔubdfɡhjklmnŋɾsʃtw])","i" .. "%1" .. "%2.%3")
	text = mw.ustring.gsub(text, "o([́̀̄])([aeɛiɔubdfɡhjklmnŋɾsʃtw])","o" .. "%1.%2")
	text = mw.ustring.gsub(text, "ɔ([́̀̄])([aeɛioubdfɡhjklmnŋɾsʃtw])","ɔ" .. "%1.%2")
	text = mw.ustring.gsub(text, "ɔ([̃])([́̀̄])([aeɛioubdfɡhjklmnŋɾsʃtw])","ɔ" .. "%1" .. "%2.%3")
	text = mw.ustring.gsub(text, "u([́̀̄])([aeɛiɔobdfɡhjklmnŋɾsʃtw])","u" .. "%1.%2")
	text = mw.ustring.gsub(text, "u([̃])([́̀̄])([aeɛiɔobdfɡhjklmnŋɾsʃtw])","u" .. "%1" .. "%2.%3")
	text = rsub_repeatedly(text, "m([́̀̄])([aeɛioɔubdfɡhjklnŋɾsʃtw])","m" .. "%1.%2")
	text = rsub_repeatedly(text, "ŋ([́̀̄])([aeɛioɔubdfɡhjklmnɾsʃtw])","ŋ" .. "%1.%2")

	text = mw.ustring.gsub(text, "x", "")
	
	-- Adds downstep ꜜ before provided syllable
	if downstep then
		downstep = tonumber(downstep)
		local i = 0
		local result = (text):gsub("[^.]+", function(syllable)
    		i = i + 1
    		if i == downstep then
        		return "ꜜ" .. syllable
    		end -- else leave it alone
		end)
		text = result
	end
		
	return mw.ustring.toNFC(text)
end

function export.show(frame)
	local args = frame:getParent().args
	local p, results = {}, {}

	if args[1] then
		for _, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("Please provide a tone marked term.")
	end

	for _, text in ipairs(p) do
		if args["loan"] == "1" then
			table.insert(results, {pron = "/" .. export.IPA(text, true, args["downstep"]) .. "/", note = nil})
		else
			table.insert(results, {pron = "/" .. export.IPA(text, false, args["downstep"]) .. "/", note = nil})
		end
	end

	return m_IPA.format_IPA_full(lang, results)
end

return export