Documentation for this module may be created at မဝ်ဂျူ:hak-pron/doc

local export = {}
local m_string_utils = require("Module:string utilities")

local gsub = m_string_utils.gsub
local sub = mw.ustring.sub
local match = m_string_utils.match
local find = m_string_utils.find
local len = m_string_utils.len
local lower = m_string_utils.lower
local toNFD = mw.ustring.toNFD

function export.rom_display(text,convtype)
	if type(text) == 'table' then text,convtype = text.args[1],(text.args[2] or '') end
	local display = ''
	local show = { ['pfs'] = '', ['gd'] = '', ['hrs'] = '', ['ct'] = '' }
	local decomp = mw.text.split(gsub(text,'/',' / '),';',true)
	local TableTools = require('Module:table')
	for i = 1,#decomp,1 do
		if match(decomp[i],'pfs') then
			decomp[i] = gsub(decomp[i],'pfs=','')
			local pfs_readings = { ['n'] = {}, ['s'] = {} }
			local hrs_readings = { ['n'] = {}, ['s'] = {} }
			local hpy_readings = { ['n'] = {}, ['s'] = {} }
			local ipa_readings = { ['n'] = {}, ['s'] = {} }
			local function display_format(style)
				local label = { ['n'] = 'Northern ', ['s'] = 'Southern ', ['ns'] = '' }
				local city = {
					['n']	= '[[w:Miaoli City|Miaoli]]',
					['s']	= '[[w:Neipu|Neipu]]',
					['ns']	= '[[w:Miaoli City|Miaoli]] and [[w:Neipu|Neipu]]'
					}
				local text = string.format("\n** <small>(''[[w:Sixian dialect|%sSixian]], incl. %s'')</small>", label[style], city[style])
				text = text .. "\n*** <small>''[[w:Pha̍k-fa-sṳ|Pha̍k-fa-sṳ]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. table.concat(pfs_readings[style:sub(1,1)], ' / ') .. '</span>'
				text = text .. "\n*** <small>''[[w:Taiwanese Hakka Romanization System|Hakka Romanization System]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. table.concat(hrs_readings[style:sub(1,1)], ' / ') .. '</span>'
				text = text .. "\n*** <small>''[[w:Hagfa Pinyim|Hagfa Pinyim]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. table.concat(hpy_readings[style:sub(1,1)], ' / ') .. '</span>'
				local ipa = '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]]'
				local span = '</small>: <span class="IPA">/'
				text = text .. ipa
				if style == 'ns' then
					local north = table.concat(ipa_readings['n'], '/, /')
					local south = table.concat(ipa_readings['s'], '/, /')
					if north == south then
						text = text .. span .. north .. '/</span>'
					else
						text = text .. " (''Northern, incl. " .. city['n'] .. "'')" .. span .. north .. "/</span>"
						text = text .. ipa .. " (''Southern, incl. " .. city['s'] .. "'')" .. span .. south .. "/</span>"
					end
				else
					text = text .. span .. table.concat(ipa_readings[style], '/, /') .. '/</span>'
				end
				return text
			end
			local function southern(text)
				local function convert(a, b, c)
					local e_a = { ['e'] = 'a', ['ê'] = 'â', ['é'] = 'á', ['è'] = 'à' }
					return a .. e_a[b] .. c
				end
				text = gsub(text, '([yY])([eéèê])(̍?[nt])', convert)
				text = gsub(text, '([nN]gi)([eéèê])(̍?[nt])', convert)
				text = gsub(text, '([kK]h?i)([eéèê])(̍?[nt])', convert)
				text = gsub(text, '^([hH]i)([eéèê])(̍?[nt])', convert)
				text = gsub(text, '([%-%s][hH]i)([eéèê])(̍?[nt])', convert)
				return text
			end
			local function add(style, reading)
				pfs_readings[style][#pfs_readings[style] + 1] = reading
				hrs_readings[style][#hrs_readings[style] + 1] = export.hrs(reading, style)
				hpy_readings[style][#hpy_readings[style] + 1] = export.pfs_to_hpy(reading)
				ipa_readings[style][#ipa_readings[style] + 1] = export.ipa(reading, style)
			end
			
			local ns = true
			for _, reading in ipairs(mw.text.split(decomp[i], ' / ')) do
				if match(reading,':') then
					local pair = mw.text.split(reading, ':')
					if pair[1] == 'ns' then
						add('n', pair[2])
						add('s', pair[2])
					else
						ns = false
						add(pair[1], pair[2])
					end
				elseif match(reading,'[yY]') or reading ~= southern(reading) then
					ns = false
					add('n', reading)
					add('s', southern(reading))
				else
					add('n', reading)
					add('s', reading)
				end
			end
		
			if convtype == '' then
				if ns then 
					display = display .. display_format('ns')
				else
					if #pfs_readings['n'] ~= 0 then display = display .. display_format('n') end
					if #pfs_readings['s'] ~= 0 then display = display .. display_format('s') end
				end
			else
				for i, reading in ipairs(pfs_readings['s']) do
					pfs_readings['n'][#pfs_readings['n'] + 1] = pfs_readings['s'][i]
				end
				show['pfs'] = table.concat(TableTools.removeDuplicates(pfs_readings['n']), ' / ')
			end
		end
		if match(decomp[i],'hrs') then
			decomp[i] = gsub(decomp[i], 'hrs=', '')
			local supported = { "h" }
			local hrs_readings = { ['h'] = {}, ['d'] = {}, ['r'] = {}, ['z'] = {} }
			
			for _, reading in ipairs(mw.text.split(decomp[i], ' / ')) do
				if find(reading, ':') then
					local dialects, rom = match(reading, '^([hdrz,]+):(.+)$')
					for _, dialect in ipairs(mw.text.split(dialects, ',')) do
						table.insert(hrs_readings[dialect], rom)
					end
				else
					error("Missing dialect label for Hakka Romanization.")
				end
			end
			
			local dialect_link = {
				['h'] = "[[w:Hailu dialect|Hailu]], incl. [[w:Zhudong|Zhudong]]",
				['d'] = "[[w:zh:大埔話|Dabu]]",
				['r'] = "[[w:Raoping Hakka|Raoping]]",
				['z'] = "[[w:zh:詔安客語|Zhao'an]]",
			}
			
			if convtype == '' then
				for _, dialect in ipairs(supported) do
					display = display .. string.format("\n** <small>(''%s'')</small>", dialect_link[dialect])
					display = display .. "\n*** <small>''[[w:Taiwanese Hakka Romanization System|Hakka Romanization System]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. export.hrs_process(hrs_readings[dialect], dialect, "rom") .. '</span>'
					display = display .. '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]]</small>: <span class="IPA">/' .. export.hrs_process(hrs_readings[dialect], dialect, "ipa") .. "/</span>"
				end
			else
				show['hrs'] = export.hrs_process(hrs_readings['h'], 'h', "rom") --TO-DO: multiple dialects
			end
		end
		if match(decomp[i],'gd') then
			local gd_formatted = gsub(gsub(decomp[i], 'gd=', ''), '([1-6])', '<sup>%1</sup>')
			if convtype == '' then
				display = display .. "\n** <small>(''[[w:Meixian dialect|Meixian]]'')</small>"
				display = display .. "\n*** <small>''[[w:Guangdong_Romanization#Hakka|Guangdong]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. gd_formatted .. '</span>'
				display = display .. '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]]</small>: <span class="IPA">/' .. export.gd_to_ipa(decomp[i]) .. "/</span>"
			else
				show['gd'] = gd_formatted
			end
		end
		if match(decomp[i],'ct') then
			local ct_formatted = gsub(gsub(decomp[i], 'ct=', ''), '([1-5])', '<sup>%1</sup>')
			if convtype == '' then
				display = display .. "\n** <small>(''[[w:Changting dialect|Changting]]'')</small>"
				display = display .. "\n*** <small>''Changting Pinyin''</small>: <span style=\"font-family: Consolas, monospace;\">" .. ct_formatted .. '</span>'
				display = display .. '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]]</small>: <span class="IPA">/' .. export.ct_to_ipa(decomp[i]) .. "/</span>"
			else
				show['ct'] = ct_formatted
			end
		end
	end
	if convtype ~= '' then
		local pfs = " <small>(''[[w:Sixian dialect|Sixian]], [[w:Pha̍k-fa-sṳ|PFS]]'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['pfs'] .. '</span>'
		local gd = " <small>(''[[w:Meixian dialect|Meixian]], [[w:Guangdong_Romanization#Hakka|Guangdong]]'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['gd'] .. '</span>'
		local hrs = " <small>(''[[w:Hailu dialect|Hailu]], [[w:Taiwanese Hakka Romanization System|HRS]]'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['hrs'] .. '</span>'
		local ct = " <small>(''[[w:Changting dialect|Changting]], Changting Pinyin'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['ct'] .. '</span>'
		
		if show['pfs'] ~= '' and show['hrs'] == '' and show['gd'] == '' and show['ct'] == '' then
			display = display .. pfs
		elseif show['gd'] ~= '' and show['pfs'] == '' and show['hrs'] == '' and show['ct'] == '' then
			display = display .. gd
		elseif show['hrs'] ~= '' and show['pfs'] == '' and show['gd'] == '' and show['ct'] == '' then
			display = display .. hrs
		elseif show['ct'] ~= '' and show['pfs'] == '' and show['hrs'] == '' and show['gd'] == '' then
			display = display .. ct
		else
			display = display .. (show['pfs'] ~= '' and '\n*:' .. pfs or '')
			display = display .. (show['hrs'] ~= '' and '\n*:' .. hrs or '')
			display = display .. (show['gd'] ~= '' and '\n*:' .. gd or '')
			display = display .. (show['ct'] ~= '' and '\n*:' .. ct or '')
		end
	end
	return display
end

local function find_tone(text)
	text = toNFD(text)
	if find(text, '̂') then
		return 1
	elseif find(text, '̀') then
		return 2
	elseif find(text, '́') then
		return 3
	elseif find(text, '̍') then
		return 6
	elseif find(text, '[^n][ptkbdg]$') then 
		return 5
	else
		return 4
	end
end

function export.ipa(text, dialect)
	local syllables, initial, final, tone, tone_conv = {}, {}, {}, {}, {}
	local ipa = {}
	if type(text) == 'table' then text = text.args[1] end
	text = gsub(gsub(gsub(lower(text), '%.', ''), '%s+$', ''), '%s+', '-')
	syllables = mw.text.split(text, "-")
	for i, syllable in ipairs(syllables) do
		syllable = gsub(syllable, ",", "")
		syllable = gsub(syllable,'o̍[ae]',{['o̍a']='ua̍',['o̍e']='ue̍'})
		syllable = gsub(syllable,'[oóòôō][ae]',{['oa']='ua',['óa']='uá',['òa']='uà',['ôa']='uâ',['ōa']='uā',['oe']='ue',['óe']='ué',['òe']='uè',['ôe']='uê',['ōe']='uē'})
		
		initial[i] = match(syllable, '^[mnptkcfvshyl]?[gh]?h?')
		final[i] = sub(syllable, len(initial[i]) + 1, -1)
		
		local initial_ipa = {
			['ng'] = 'ŋ',
			['ph'] = 'pʰ',
			['th'] = 'tʰ',
			['kh'] = 'kʰ',
			['ch'] = 't͡s',
			['chh'] = 't͡sʰ',
			['y'] = 'i'
		}
		initial[i] = initial_ipa[initial[i]] or initial[i]
		tone[i] = find_tone(final[i])
		local final_conv = {
			['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u', ['́'] = '',
			['à'] = 'a', ['è'] = 'e', ['ì'] = 'i', ['ò'] = 'o', ['ù'] = 'u', ['̀'] = '',
			['â'] = 'a', ['ê'] = 'e', ['î'] = 'i', ['ô'] = 'o', ['û'] = 'u', ['̂'] = '',
			['ń'] = 'n', ['ǹ'] = 'n',
			['̍'] = '',
			['ṳ'] = 'ɨ',
		}
		final[i] = gsub(final[i], '[âêîôû̂àèìòù̀áéíóú́ńǹ̍ṳ]', final_conv)
		if initial[i] == 'i' then
			final[i] = (find(final[i], '^i[mnpt]?$') and '' or 'i') .. final[i]
			initial[i] = dialect == 's' and '(j)' or ''
		end
		final[i] = gsub(final[i], '([ptk])$', '%1̚')
		final[i] = gsub(final[i], 'ng$', 'ŋ')
		final[i] = final[i] == 'ŋ' and 'ŋ̍' or final[i]
		final[i] = gsub(final[i], 'er$', 'ə')
		final[i] = gsub(final[i], '([aeiouɨ])([aeiouɨ])([aeiouɨ]?)', function(first, second, third)
				if third ~= '' then
					first = first .. '̯'
					third = third .. '̯'
				elseif first == 'i' or first == 'u' then
					first = first .. '̯'
				elseif second == 'i' or second == 'u' then
					second = second .. '̯'
				end
			return first .. second .. third end)
	end
	for i, syllable in ipairs(syllables) do
		local tone_ipa = {
			[1] = '²⁴',
			[2] = '¹¹',
			[3] = '³¹',
			[4] = '⁵⁵',
			[5] = '²',
			[6] = '⁵',
		}
		tone_conv[i] = tone_ipa[tone[i]]
		if (tone[i] == 1 and find(tostring(tone[i+1]), '[146]') and not find(syllable, ",")) or (syllable == 'é' and dialect == 'n' and find(mw.title.getCurrentTitle().text, '仔') and find(text, '-é') and find(tostring(tone[i-1]), '[35]')) then
			tone_conv[i] = tone_conv[i] .. '⁻¹¹'
		end
		
		ipa[i] = initial[i] .. final[i] .. tone_conv[i]
	end
	return gsub(table.concat(ipa, " "), ",", "")
end

function export.hrs(text, dialect)
	if type(text) == 'table' then text = text.args[1] end
	local syllables = mw.text.split(gsub(lower(text), ' ', '-'), "-")
	for i, syllable in ipairs(syllables) do
		-- check for commas
		local comma = ''
		if find(syllable, ',') then
			comma = ','
			syllable = sub(syllable, 1, -2)
		end
		-- change consonants
		syllable = gsub(syllable,'[ptky]',{['p']='b',['t']='d',['k']='g',['y']=dialect == 's' and '(r)i' or 'i'})
		syllable = gsub(syllable,'[bdgc]h',{['bh']='p',['dh']='t',['gh']='k',['ch']='z'})
		syllable = gsub(syllable,'zh','c')
		local palatal = {['z']='j',['c']='q',['s']='x',['i']=''}
		syllable = gsub(syllable,'([zcsi])([iíìî])', function(a,b) return palatal[a]..b end)
		
		-- find tones
		local marks = { [1] = 'ˊ', [2] = 'ˇ', [3] = 'ˋ', [5] = 'ˋ' }
		local tone = marks[find_tone(syllable)] or ''
		
		-- remove tone marks and fix vowels
		syllable = gsub(syllable, 'ṳ', 'ii')
		syllable = gsub(toNFD(syllable), '[́̀̂̍]', '')
		syllable = gsub(syllable, 'o([ae])', 'u%1')
		
		-- add new tone marks
		syllables[i] = syllable .. "<sup>" .. tone .. "</sup>" .. comma
	end
	return table.concat(syllables, " ")
end

function export.pfs_to_hpy(text)
	if type(text) == 'table' then text = text.args[1] end
	local syllables = mw.text.split(gsub(lower(text), ' ', '-'), "-")
	for i, syllable in ipairs(syllables) do
		if syllable == '...' then
			syllables[i] = syllable
		else
			-- check for commas
			local comma = ''
			if find(syllable, ',') then
				comma = ','
				syllable = sub(syllable, 1, -2)
			end
			-- change consonants
			syllable = gsub(syllable,'[ptk]',{['p']='b',['t']='d',['k']='g'})
			syllable = gsub(syllable,'[bdgc]h',{['bh']='p',['dh']='t',['gh']='k',['ch']='z'})
			syllable = gsub(syllable,'zh','c')
			local palatal = {['z']='j',['c']='q',['s']='x'}
			syllable = gsub(syllable,'([zcs])([iíìî])', function(a,b) return palatal[a]..b end)
			
			-- find tones
			local tone = find_tone(syllable)
			
			-- remove tone marks and fix vowels
			syllable = gsub(syllable, 'ṳ', 'i')
			syllable = gsub(toNFD(syllable), '[́̀̂̍]', '')
			syllable = gsub(syllable, 'o([ae])', 'u%1')
			syllable = syllable == 'yu' and 'yiu' or syllable
			syllable = gsub(syllable, '([iy])e([nd])', '%1a%2')
			
			-- put everything together
			syllables[i] = syllable .. '<sup>' .. tone .. '</sup>' .. comma
		end
	end
	return table.concat(syllables, " ")
end

function export.gd_to_ipa(text)
	local initial_conv = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["v"] = "ʋ",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", 
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
		["j"] = "t͡s", ["q"] = "t͡sʰ", ["x"] = "s",
		[""] = "",
	}
	local final_conv = {
		["ii"] = "z̩", ["i"] = "i", ["u"] = "u",
		["a"] = "a", ["ia"] = "ia", ["ua"] = "ua",
		["ê"] = "e", ["iê"] = "ie", ["uê"] = "ue",
		["o"] = "o", ["io"] = "io", ["uo"] = "uo",
		["m"] = "m̩", ["n"] = "n̩",
		["ai"] = "aɪ", ["iai"] = "iaɪ", ["uai"] = "uaɪ",
		["oi"] = "oɪ",
		["ui"] = "uɪ", ["iui"] = "iuɪ",
		["au"] = "au", ["iau"] = "iau",
		["êu"] = "eu",
		["iu"] = "iu",
		["em"] = "əm", ["im"] = "im",
		["am"] = "am", ["iam"] = "iam",
		["êm"] = "ɛm",
		["en"] = "ən", ["in"] = "in",
		["an"] = "an", ["ian"] = "ian", ["uan"] = "uan",
		["ên"] = "ɛn", ["iên"] = "iɛn", ["uên"] = "uɛn",
		["on"] = "ɔn", ["ion"] = "iɔn", ["uon"] = "uɔn",
		["un"] = "un", ["iun"] = "iun",
		["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ",
		["ong"] = "ɔŋ", ["iong"] = "iɔŋ", ["uong"] = "uɔŋ",
		["ung"] = "ʊŋ", ["iung"] = "iʊŋ",
		["eb"] = "əp̚", ["ib"] = "ip̚",
		["ab"] = "ap̚", ["iab"] = "iap̚",
		["êb"] = "ɛp̚",
		["ed"] = "ət̚", ["id"] = "it̚",
		["ad"] = "at̚", ["iad"] = "iat̚", ["uad"] = "uat̚",
		["êd"] = "ɛt̚", ["iêd"] = "iɛt̚", ["uêd"] = "uɛt̚",
		["od"] = "ɔt̚",
		["ud"] = "ut̚", ["iud"] = "iut̚",
		["ag"] = "ak̚", ["iag"] = "iak̚", ["uag"] = "uak̚",
		["og"] = "ɔk̚", ["iog"] = "iɔk̚", ["uog"] = "uɔk̚",
		["ug"] = "ʊk̚", ["iug"] = "iʊk̚",
	}
	local tone_conv = {
		["1"] = "⁴⁴", ["2"] = "¹¹",
		["3"] = "³¹",
		["4"] = "⁵³",
		["5"] = "¹", ["6"] = "⁵",
		["1*"] = "⁴⁴⁻³⁵",
		["4*"] = "⁵³⁻⁵⁵",
	}
	
	if type(text) == 'table' then text = text.args[1] end
	local words = mw.text.split(text, " / ")
	local result = {}
	for _, word in ipairs(words) do
		word = gsub(gsub(gsub(word, 'gd=', ''), '[%.,]', ''), '%s+$', '')
		local syllables = mw.text.split(word, '%s+')
		local initial, final, tone, ipa = {}, {}, {}, {}
		for i, syllable in ipairs(syllables) do
			initial[i] = match(syllable, "^[bpmfvdtnlgkhzcsjqx]?g?")
			final[i] = match(sub(syllable, len(initial[i]) + 1, -1), "^[^1-6]*")
			
			if initial[i] == "" and find(final[i], "^i") then
				error('Syllables starting with "i" need a "y" in front.')
			end
			
			final[i] = gsub(gsub(final[i], "^yi", "i"), "^y", "i")
			
			if find(initial[i], "[zcs]") and final[i] == "i" then
				final[i] = "ii"
			end
			if final[i] == "" then
				final[i] = initial[i]
				initial[i] = ""
			end
			tone[i] = match(syllable, "[1-6]$")
		end
		for i, syllable in ipairs(syllables) do
			local ortho_pal = { --orthographic palatalization
				["z"] = "j",
				["c"] = "q",
				["s"] = "x",
			}
			local ortho_alv = { --orthographic alveolars
				["j"] = "z",
				["q"] = "c",
				["x"] = "s",
			}
			if find(initial[i], "^[zcs]$") and find(final[i], "^i[^i]") then
				error("Initial should be " .. ortho_pal[initial[i]] .. "?")
			end
			if find(initial[i], "^[jqx]$") and find(final[i], "^[^i]") then
				error("Initial should be " .. ortho_alv[initial[i]] .. "?")
			end
			
			initial[i] = initial_conv[initial[i]] or error(("Unrecognised initial: \"%s\""):format(initial[i]))
			final[i] = final_conv[final[i]] or error(("Unrecognised final: \"%s\""):format(final[i]))
			if match(tone[i], "[14]") and match(tone[i+1] or "", "[2345]") then
				tone[i] = tone[i] .. "*"
			end
			if initial[i] == "" and final[i] == "e" and tone[i] == "3" and find(mw.title.getCurrentTitle().text, '仔') then
				initial[i] = match(final[i-1] or '', '([mnŋpti])̚?$') or initial[i]
				initial[i] = find(final[i-1] or '', 'u$') and 'ʋ' or initial[i]
				initial[i] = find(final[i-1] or '', '[ao]$') and '(ʋ)' or initial[i]
				initial[i] = find(final[i-1] or '', 'e$') and '(i)' or initial[i]
			end
			tone[i] = tone_conv[tone[i]]
			ipa[i] = initial[i] .. final[i] .. tone[i]
		end
		table.insert(result, table.concat(ipa, " "))
	end
	return table.concat(result, "/, /")
end

function export.hrs_process(rom_list, dialect, process)
	local dialect_names = {
		["h"] = "hailu",
		["d"] = "dabu",
		["r"] = "raoping",
		["z"] = "zhaoan",
	}
	
	if process == "rom" then
		return gsub(table.concat(rom_list, " / "), "([ˊˇˋ˖])", "<sup>%1</sup>")
	elseif process == "ipa" then
		local ipa_readings = {}
		for i, reading in ipairs(rom_list) do
			ipa_readings[i] = export.hrs_to_ipa(reading, dialect_names[dialect])
		end
		return table.concat(ipa_readings, "/, /")
	end
end

function export.hrs_to_ipa(text, dialect)
	local initial_conv = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["v"] = "v", ["bb"] = "b",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", 
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
		["j"] = "t͡s", ["q"] = "t͡sʰ", ["x"] = "s",
		["zh"] = "t͡ʃ", ["ch"] = "t͡ʃʰ", ["sh"] = "ʃ", ["rh"] = "ʒ",
		[""] = "",
	}
	local final_conv = {
		["ii"] = "ɨ",
		["i"] = "i", ["e"] = "e", ["a"] = "a", ["o"] = "o", ["u"] = "u",
		["ie"] = "ie", ["eu"] = "eu", ["ieu"] = "ieu",
		["ia"] = "ia", ["ua"] = "ua",
		["ai"] = "ai", ["iai"] = "iai", ["uai"] = "uai",
		["au"] = "au", ["iau"] = "iau",
		["io"] = "io", ["oi"] = "oi", ["ioi"] = "ioi",
		["iu"] = "iu", ["ui"] = "ui", ["iui"] = "iui",
		["ue"] = "ue",
		["iim"] = "ɨm", ["im"] = "im",
		["em"] = "em", ["iem"] = "iem",
		["am"] = "am", ["iam"] = "iam",
		["iin"] = "ɨn", ["in"] = "in",
		["en"] = "en", ["ien"] = "ien", ["uen"] = "uen",
		["an"] = "an", ["ian"] = "ian", ["uan"] = "uan",
		["on"] = "on", ["ion"] = "ion",
		["un"] = "un", ["iun"] = "iun",
		["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ",
		["ong"] = "oŋ", ["iong"] = "ioŋ",
		["ung"] = "uŋ", ["iung"] = "iuŋ",
		["er"] = "ə",
		["iib"] = "ɨp", ["ib"] = "ip",
		["eb"] = "ep", ["ieb"] = "iep",
		["ab"] = "ap", ["iab"] = "iap",
		["iid"] = "ɨt", ["id"] = "it",
		["ed"] = "et", ["ied"] = "iet", ["ued"] = "uet",
		["ad"] = "at", ["iad"] = "iat", ["uad"] = "uat",
		["od"] = "ot", ["iod"] = "iot",
		["ud"] = "ut", ["iud"] = "iut",
		["ag"] = "ak", ["iag"] = "iak", ["uag"] = "uak",
		["og"] = "ok", ["iog"] = "iok",
		["ug"] = "uk", ["iug"] = "iuk",
		["m"] = "m̩", ["n"] = "n̩", ["ng"] = "ŋ̍",
	}
	
	local function get_tone(final, tone_mark, dialect)
		local mark_to_value = {
			["hailu"] = {
				["ˋ"] = "53",
				[""] = "55",
				["ˊ"] = "24",
				["ˇ"] = "11",
				["˖"] = "33",
				["d"] = "5",
				["dˋ"] = "2",
			}
		}
		
		local mark = (find(final, "[ptk]$") and "d" or "") .. tone_mark
		
		return mark_to_value[dialect][mark] or ""
	end
	
	local function get_sandhi(syl_count, i, tone, dialect)
		if dialect == "hailu" then
			if i < syl_count then
				if tone == "24" then
					return "33"
				elseif tone == "5" then
					return "2"
				end
			end
		end
		
		return ""
	end
	
	local sup = {
		["1"] = "¹", ["2"] = "²", ["3"] = "³", ["4"] = "⁴", ["5"] = "⁵", ["-"] = "⁻",
	}
	
	local function hrs_check_invalid(text)
		if not text then
			return nil
		end
		local common_errors = "[´`+⁺^]"
		local error_correction = {
			["´"] = "ˊ",
			["`"] = "ˋ",
			["+"] = "˖",
			["⁺"] = "˖",
			["^"] = "ˆ",
		}
		local correct = gsub(text, common_errors, error_correction)
		if text ~= correct then
			error("Invalid Hakka Romanization \"" .. text .. "\": please change it to \"" .. correct .. "\"")
		end
	end
	
	--check for common errors in input
	hrs_check_invalid(text)
	
	local syllables, initial, final, tone, sandhi, ipa = {}, {}, {}, {}, {}, {}
	
	syllables = mw.text.split(text, " ")
	
	for i, syllable in ipairs(syllables) do
		syllable = gsub(syllable, ",", "")
		
		--find initial, final, tone
		initial[i] = match(syllable, "^([bpmfvdtnlgkhzcsjqxr][ghb]?)") or ""
		tone[i] = match(syllable, "([ˊˇˋ˖])$") or ""
		final[i] = sub(syllable, len(initial[i]) + 1, -1 - len(tone[i]))
		
		--convert initial, final, tone
		initial[i] = initial_conv[initial[i]] or ""
		final[i] = final_conv[final[i]] or ""
		tone[i] = get_tone(final[i], tone[i], dialect)
		sandhi[i] = get_sandhi(#syllables, i, tone[i], dialect)
		
		ipa[i] = initial[i] .. final[i] ..
			gsub(tone[i] .. (sandhi[i] ~= "" and "-" or "") .. sandhi[i], "[12345%-]", sup)
	end
	
	return gsub(table.concat(ipa, " "), ",", "")
end

function export.ct_to_ipa(text)
	local initial_conv = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["v"] = "v",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", 
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
		["j"] = "t͡ɕ", ["q"] = "t͡ɕʰ", ["x"] = "ɕ",
		["zh"] = "t͡ʃ", ["ch"] = "t͡ʃʰ", ["sh"] = "ʃ",
		[""] = "",
	}
	local final_conv = {
		["ï"] = "ʐ̩", ["i"] = "i", ["u"] = "u",
		["a"] = "a", ["ia"] = "ia", ["ua"] = "ua",
		["o"] = "o", ["io"] = "io",
		["e"] = "e", ["ie"] = "ie", ["ue"] = "ue",
		["ai"] = "ai", ["ui"] = "ui",
		["ao"] = "ɔ", ["iao"] = "iɔ",
		["eu"] = "əɯ", ["iu"] = "iəɯ", ["ieu"] = "iəɯ",
		["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ",
		["eng"] = "eŋ", ["ieng"] = "ieŋ", ["ueng"] = "ueŋ",
		["ing"] = "iŋ", ["ung"] = "uŋ",
		["ông"] = "ɔŋ", ["iông"] = "iɔŋ",
		["ong"] = "oŋ", ["iong"] = "ioŋ",
		["ng"] = "ŋ̍",
	}
	local tone_conv = {
		["1"] = "³³", ["2"] = "²⁴",
		["3"] = "⁴²",
		["4"] = "⁵⁴", ["5"] = "²¹",
		["3*"] = "⁴²⁻³³",
	}
	
	if type(text) == 'table' then text = text.args[1] end
	local words = mw.text.split(text, " / ")
	local result = {}
	for _, word in ipairs(words) do
		word = gsub(gsub(gsub(word, 'ct=', ''), '[%.,]', ''), '%s+$', '')
		local syllables = mw.text.split(word, '%s+')
		local initial, final, tone, ipa = {}, {}, {}, {}
		for i, syllable in ipairs(syllables) do
			initial[i] = match(syllable, "^[bpmfvdtnlgkhzcsjqx]?[gh]?")
			final[i] = match(sub(syllable, len(initial[i]) + 1, -1), "^[^1-5]*")
			
			if initial[i] == "" and final[i] == "wung" then
				error('Please change "wung" to "ng".')
			end
			
			if initial[i] == "" and find(final[i], "^i") then
				error('Syllables starting with "i" need a "y" in front.')
			end
			final[i] = gsub(gsub(final[i], "^yi", "i"), "^y", "i")
			
			if initial[i] == "" and find(final[i], "^u") then
				error('Syllables starting with "u" need a "w" in front.')
			end
			final[i] = gsub(gsub(final[i], "^wu", "u"), "^w", "u")
			
			if final[i] == "" then
				final[i] = initial[i]
				initial[i] = ""
			end
			tone[i] = match(syllable, "[1-5]$")
		end
		for i, syllable in ipairs(syllables) do			
			initial[i] = initial_conv[initial[i]] or error(("Unrecognised initial: \"%s\""):format(initial[i]))
			final[i] = final_conv[final[i]] or error(("Unrecognised final: \"%s\""):format(final[i]))
			if initial[i]:find("[zcs]") then
				final[i] = final[i]:gsub("ʐ", "z")
			end
			if (tone[i] == "3") and (tone[i+1] == "3") then
				tone[i] = tone[i] .. "*"
			end
      -- TODO: tone sandhi?
			tone[i] = tone_conv[tone[i]]
			ipa[i] = initial[i] .. final[i] .. tone[i]
		end
		table.insert(result, table.concat(ipa, " "))
	end
	return table.concat(result, "/, /")
end

return export