Documentation for this module may be created at မဝ်ဂျူ:fi-IPA/doc

local export = {}

local m_IPA = require("Module:IPA")
local m_hyph = require("Module:fi-hyphenation")
local lang = require("Module:languages").getByCode("fi")

local gsub = mw.ustring.gsub

local U = mw.ustring.char
local nonsyllabic = U(0x32F)	-- inverted breve below
local unreleased = U(0x31A)
local long = "ː"

local letters_phonemes = {
	["a"] = "ɑ",
	["ä"] = "æ",
	["ö"] = "ø",
	["å"] = "o",
	
	["g"] = "ɡ",
	["q"] = "k",
	["v"] = "ʋ",
	["š"] = "ʃ",
	["ž"] = "ʒ",
	
	["x"] = "ks",
	["zz"] = "ts",
	["ng"] = "ŋː",
	["nk"] = "ŋk",
	["nkk"] = "ŋkː",
	["qu"] = "kʋ",
	["*"] = "ˣ",
	["’"] = ".",
}

local lookahead = 3 -- how many unstressed syllables at most in a single unit, thus max consecutive unstressed syllables

local vowels = "ɑeiouyæø"
local vowel = "[" .. vowels .. "]"
local consonants = "kptɡgbdfʔsnmŋlrhʋʃʒrjçɦx"
local consonant = "[" .. consonants .. "]"
local diacritics = "̝̞̠̪"
local diacritic = "[" .. diacritics .. "]"

local spelled_consonants = "cšvwxzž"
local spelled_consonant = "[" .. consonants .. spelled_consonants .. "]"
local spelled_vowels = "aäö"
local spelled_vowel = "[" .. vowels .. spelled_vowels .. "]"

local tertiary = "ˌ" -- "tertiary stress", a weaker secondary stress (either rhythmic or in some compound words). is there a better way to represent this?
export.tertiary = tertiary

local stress_indicator = "[ ˈˌ" .. tertiary .. "/-]"
local plosives = "kptbdɡ"

local stress_p = "[ˈˌ" .. tertiary .. "]"
local stress_s = "[ˌ" .. tertiary .. "]"

local replacements_narrow = {
	["e"] = "e̞",
	["ø"] = "ø̞",
	["o"] = "o̞",
	["t"] = "t̪",
	["s"] = "s̠"
}

--	This adds letters_phonemes["e"] = "e", letters_phonemes["i"] = "i", etc.
for letter in mw.ustring.gmatch("eiouydhfjklmnprstu", ".") do
	letters_phonemes[letter] = letter
end

--[[	This regex finds the diphthongs in the IPA transcription,
		so that the nonsyllabic diacritic can be added.						]]
-- /_i/ diphthongs can appear in any syllable
local diphthongs_i = {
	"[ɑeouyæø]i"
}
-- /_U/ diphthongs can appear in the initial syllable or later open syllables (no consonantal coda)
local diphthongs_u = {
	"[ɑoei]u",
	"[eiæø]y",
}
-- rising diphthongs can only appear in the initial syllable (of a word, compound word part, etc.)
local diphthongs_rising = {
	"uo",
	"ie",
	"yø",
}

local post_fixes = {
	["t̪s̠"] = "ts̠",         -- t is alveolar in /ts/
	["nt̪"] = "n̪t̪",         -- n is dental in /nt/
	["ˈŋn"] = "ˈɡn",       -- initial <gn> is /gn/
						   -- ŋ is short before consonant (by default)
	["ŋ"..long.."("..consonant..")"] = "ŋ%1",
						   -- dissimilation of vowels by sandhi
	["("..vowel..diacritic.."*"..long.."?)("..stress_s..")%1"] = "%1%2(ʔ)%1"
}

local post_fixes_narrow = {
						   -- long j, v after i, u diphthong
	["(i"..nonsyllabic..")j("..vowel..")"] = "%1j("..long..")%2",
	["(u"..nonsyllabic..")ʋ("..vowel..")"] = "%1ʋ("..long..")%2",
						   -- cleanup
	["("..stress_s..")%."] = "%1",
						   -- sandhi: nm > mm, np > mp, nb > mb, nk > ŋk, ng > ŋg
	["nm"] = "m" .. long,
	["n([pb])"] = "m%1",
	["n("..stress_p.."%s*)([ɡk])"] = "ŋ%1%2",
	["n("..stress_p.."%s*)([mpb])"] = "m%1%2",
						   -- handle potentially long consonants over secondary stresses
	["("..stress_s..")("..consonant..diacritic.."*)%("..long.."%)"] = "(%2)%1%2",
	["("..consonant..diacritic.."*)%("..long.."%)("..stress_s..")"] = "%2%1("..long..")",
    ["(ŋ"..diacritic.."*)"..tertiary.."ɡ"] = "%1"..tertiary.."ŋ"
}

function export.is_light_syllable(syllable)
	return mw.ustring.len(syllable) < 4 and mw.ustring.find(mw.ustring.lower(syllable), "^[" .. m_hyph.sep_symbols .. "]?" .. spelled_consonant .. "?" .. spelled_vowel .. "$")
end

function export.has_later_heavy_syllable(hyph, start)
	local stop = math.min(start + lookahead, #hyph - 1)
	for index = start, stop do
		if not export.is_light_syllable(hyph[index]) then
			return true
		end
	end
	return false	
end

-- applied *before* IPA conversion
local function add_secondary_stress(word)
	-- keep_sep_symbols = true
	local hyph = m_hyph.generate_hyphenation(word, true)
	local res = ""
	local last_index = #hyph
	
	-- find stressed syllables and add secondary stress before each syllable
	for index, syllable in ipairs(hyph) do
		local stressed = false
		local has_symbol = mw.ustring.find(syllable, "^[" .. m_hyph.sep_symbols .. "ˈˌ" .. tertiary .. "]")
		
		if has_symbol then
			-- check if symbol indicates stress
			stressed = mw.ustring.find(syllable, "^" .. stress_indicator)
			has_symbol = stressed
		end
			
		if not stressed then
			if index == 1 then
				stressed = true
			elseif not prev_stress and index < last_index then
				-- shift stress if current syllable light and a heavy syllable occurs later (except as the last syllable)
				stressed = index == last_index - 1 or not export.is_light_syllable(syllable) or not export.has_later_heavy_syllable(hyph, index + 1)
			end
			
			if stressed then
				last_stressed = index
			end
		end
		
		-- check if next syllable already stressed
		-- if is, do not stress this syllable
		if stressed and index < last_index then
			stressed = stressed and not mw.ustring.find(hyph[index + 1], "^" .. stress_indicator)
		end

		if index > 1 and stressed and not has_symbol then
			res = res .. "-$"
		end
		res = res .. syllable

		prev_stress = stressed
	end

	local noninitial = {}
	local index = 1
	res = mw.ustring.gsub(res, "-([$]?)",
		function (dollar)
			index = index + 1
			noninitial[index] = #dollar > 0
			return #dollar > 0 and tertiary or "-"
		end)
	
	return res, noninitial
end

local function handle_diphthongs(IPA, strict_initial)
	-- Add nonsyllabic diacritic after last vowel of diphthong.
	for _, diphthong_regex in pairs(diphthongs_i) do
		IPA = mw.ustring.gsub(IPA, diphthong_regex, "%0" .. nonsyllabic)
	end

	local only_initial = stress_indicator .. "[^" .. vowels .. "]*"
	if strict_initial then
		only_initial = "^[^" .. vowels .. "]*"
	end

	for _, diphthong_regex in pairs(diphthongs_rising) do
		-- initial syllables
		IPA = mw.ustring.gsub(IPA, only_initial .. diphthong_regex, "%0" .. nonsyllabic)
	end

	for _, diphthong_regex in pairs(diphthongs_u) do
		-- initial syllables
		IPA = mw.ustring.gsub(IPA, only_initial .. diphthong_regex, "%0" .. nonsyllabic)

		local open_noninitial = 
			function(diphthong, after)
				if mw.ustring.find(after, "^" .. consonant .. diacritic .. "*" .. vowel) then
					-- consonant after diphthong
					-- must be followed by vowel so that it's part of the
					-- following syllable, else it's in this syllable
					-- and thus this syllabie is closed

					return diphthong .. nonsyllabic .. after
				elseif mw.ustring.find(after, "^" .. consonant) then
					-- consonant after diphthong
					-- must be in this syllable

					return diphthong .. after
				end
				-- no consonant after diphthong => open
				return diphthong .. nonsyllabic .. after
			end

		-- open non-initial syllables
		IPA = mw.ustring.gsub(IPA, "(" .. diphthong_regex .. ")([^" .. nonsyllabic .. "].+)", open_noninitial)
		IPA = mw.ustring.gsub(IPA, "(" .. diphthong_regex .. ")($)", open_noninitial)
	end

	return IPA
end

local function IPA_word(term, is_narrow, has_initial)
	local rest = term
	local phonemes = {}
	
	while mw.ustring.len(rest) > 0 do
		-- Find the longest string of letters that matches a recognised sequence in the list
		local longestmatch = ""
		
		for letter, phoneme in pairs(letters_phonemes) do
			if mw.ustring.sub(rest, 1, mw.ustring.len(letter)) == letter and mw.ustring.len(letter) > mw.ustring.len(longestmatch) then
				longestmatch = letter
			end
		end
		
		-- Convert the string to IPA
		if mw.ustring.len(longestmatch) > 0 then
			table.insert(phonemes, letters_phonemes[longestmatch])
			rest = mw.ustring.sub(rest, mw.ustring.len(longestmatch) + 1)
		else
			table.insert(phonemes, mw.ustring.sub(rest, 1, 1))
			rest = mw.ustring.sub(rest, 2)
		end
	end
	
	local result = table.concat(phonemes)
	
	if is_narrow then
		-- articulation of h
		result = mw.ustring.gsub(result, "(.?)h(.?)",
			function (before, after)
				local h
				if after ~= "" then
					if before ~= "" and vowels:find(before) then
						if consonants:find(after) then
							-- vihma, yhtiö
							if before == "i" or before == "y" then
								h = "ç"
							-- mahti, kohme, tuhka
							elseif before == "ɑ" or before == "o" or before == "u" then
								h = "x"
							end
						-- maha
						elseif vowels:find(after) then
							h = "ɦ"
						end
					end
				end
				
				if h then
					return before .. h .. after
				end
			end)
		
		-- double letter replacement and diphthongs must be handled earlier here
		result = mw.ustring.gsub(result, "(%a)%1", "%1" .. long)
		if has_initial then
			result = handle_diphthongs(result, true)
		end
	
		for letter, phoneme in pairs(replacements_narrow) do
			result = mw.ustring.gsub(result, letter, phoneme)
		end
	end
	
	return result
end

function export.IPA_wordparts(term, is_narrow)
	term = mw.ustring.lower(term)
	local notinitial = {} -- true if the component is not an initial component
	local hyphenstress = "ˌ" -- secondary by default

	if mw.ustring.find(term, "%/") then
		hyphenstress = tertiary -- tertiary if we have slashes
	end
	
	if is_narrow then
		term, notinitial = add_secondary_stress(term)
	end
	
	term = mw.ustring.gsub(term, "^%-+", "")
	term = mw.ustring.gsub(term, "%-+$", "")
	
	-- make sure we keep slashes to figure out if secondary or tertiary
	term = mw.ustring.gsub(term, "%/", "-%1")
	local wordparts = mw.text.split(term, "-", true)

	for key, val in ipairs(wordparts) do
		local stress = key > 1 and hyphenstress or "ˈ"
		local part = val

		if mw.ustring.find(part, "^%/") then
			stress = "ˌ" -- always secondary
			part = part:sub(2)
		end

		wordparts[key] = stress .. IPA_word(part, is_narrow, not notinitial[key])
	end
	
	IPA = table.concat(wordparts, "")
	
	if is_narrow then
		-- handle * in narrow transcription
		IPA = mw.ustring.gsub(IPA, "ˣ(%s*)("..stress_p.."?)((.?)" .. diacritic .. "*)",
			function (space, stress, after, potential_consonant)
				if potential_consonant == "" then
					return space .. stress .. "(ʔ)" .. after
				elseif consonants:find(potential_consonant) then
					if #space > 0 or #stress > 0 then
						local amark = ""
						if plosives:find(mw.ustring.sub(after, 1, 1)) then
							amark = unreleased
						end
						return after .. amark .. space .. stress .. after
					else
						return space .. after .. long
					end
				else
					return space .. stress .. "ʔ" .. after
				end
			end)		
	else
		--	Replace double letters (vowels or consonants) with single letter plus length sign.
		IPA = gsub(IPA, "(%a)%1", "%1" .. long)
		IPA = handle_diphthongs(IPA, false)
	end
	
	for letter, phoneme in pairs(post_fixes) do
		IPA = mw.ustring.gsub(IPA, letter, phoneme)
	end
	
	if is_narrow then
		for letter, phoneme in pairs(post_fixes_narrow) do
			IPA = mw.ustring.gsub(IPA, letter, phoneme)
		end
	end
	
	return IPA
end

function export.IPA(term)
	if type(term) == "table" then
		term = term:getParent().args[1]
	end
	
	local title = mw.title.getCurrentTitle().text
	
	if not term then
		term = title
	elseif term == "*" then
		term = title .. "*"
	end
	
	local no_count = mw.ustring.match(term, " ")
	
	IPA_narrow = export.IPA_wordparts(term, true)
	IPA = export.IPA_wordparts(term, false)
	return m_IPA.format_IPA_full(lang, {{pron = "/" .. IPA .. "/"}, {pron = "[" .. IPA_narrow .. "]"}}, nil, nil, nil, no_count)
end

return export