Documentation for this module may be created at မဝ်ဂျူ:tl-pronunciation/doc

local export = {}
local gsub = mw.ustring.gsub
local len = mw.ustring.len

function export.show(word, phonetic, do_debug)
	local debug = {}
	
	if type(word) == 'table' then
		do_debug = word.args[4]
		word = word.args[1]
	end
	
	local orig_word = word
	word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
	word = mw.ustring.gsub(word,"[^abcdefghijklmnopqrstuvwxyzáâàéêèíîìóôòúûùñ7_.]","") -- 7 is for glottal stop. Underscore is used as spaces or to break consonant pair that could be treated as digraph.
	
	table.insert(debug,word)
	
	local V = "[aeiouáâàéèêíîìóôòúùû]" -- vowels
	local C = "[^aeiouáâàéèêíîìóôòúùû_.]" -- consonants
	
	-- add glottal stop for words starting with 
	word = mw.ustring.gsub(word, "^([aáeéiíoóuú])", "ʔ%1")

	--Substitute C, CH, QU, QUI and X
	word = mw.ustring.gsub (word,"c([h])","ts") --not the real sound
    word = mw.ustring.gsub (word,"q([u])","k") --Spanish QU only
	word = mw.ustring.gsub (word,"x","ks")

    --C and G before I and E, and CU plus vowel (proper nouns from Spanish, native words spelled in Spanish only)
    --If the original Spanish uses güe/i, please respell to gw.
    --Only "gue" and "gui" are replaced to avoid affecting native "ge" and "gi". If spelled with "gi" and "ge", please respell to "hi" and "he"
    
    word = mw.ustring.gsub(word, "c([ieíé])", "s%1")
    word = mw.ustring.gsub(word, "cu([aeo])", "kw%1")
	word = mw.ustring.gsub(word, "gu([ieíé])", "g%1")
	
	--Glottal stop in word boundary or other places
	word = mw.ustring.gsub(word,"7","ʔ")
	
    --Underscore to break consonant cluster or add space
	word = mw.ustring.gsub (word,"_", " ")

	table.insert(debug,word)

    -- letter-to-IPA

    word = mw.ustring.gsub (word,"c","k")
    word = mw.ustring.gsub (word,"g","ɡ")
	word = mw.ustring.gsub (word,"j","ĵ") --Please respell Spanish J to H. Not the real sound.
	word = mw.ustring.gsub (word,"ñ", "ɲ") --
	word = mw.ustring.gsub (word,"nɡ","ŋ")
	word = mw.ustring.gsub (word,"r","ɾ") --Use this to respell Spanish "rr"
	word = mw.ustring.gsub (word,"v","b")
	word = mw.ustring.gsub (word,"y","j")
	word = mw.ustring.gsub (word,"z","s")
    
    --digraphs
    word = mw.ustring.gsub(word, "ts", "č")

    table.insert(debug, word)

    --syllable division
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,"(" .. V .. ")(" .. C  .. V .. ")",
        "%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,"(" .. V .. C .. ")(" .. C .. V .. ")",
		"%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,"(" .. V .. C .. ")(" .. C .. C .. V .. ")",
		"%1.%2")
	end
	word = mw.ustring.gsub(word, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
    word = mw.ustring.gsub(word, "([aáeéiíoóuú])([aáeéiíoóuú])", "%1%2")
	word = mw.ustring.gsub(word, "([ií])([ií])", "%1.%2")
	word = mw.ustring.gsub(word, "([oóuú])([oóuú])", "%1.%2")

    table.insert(debug, word)
	--accentuation
	local syllables = mw.text.split(word,"%.")
	if mw.ustring.find(word,"[áéíóúâêîôû]") then
		for i=1,#syllables do
			if mw.ustring.find(syllables[i],"[áéíóúâêîôû]") then 
				syllables[i] = "ˈ"..syllables[i] 
			end
		end
	else
		if mw.ustring.find(word,"[^aàeèiìoòuùbdɡfjklmnŋpɾstw]$") then
			syllables[#syllables] = "ˈ"..syllables[#syllables]
		else
			if #syllables > 1 then syllables[#syllables-1] = "ˈ"..syllables[#syllables-1] end
		end
	end

    table.insert(debug, word)

	word = table.concat(syllables)
	
	--back-replace
    word = mw.ustring.gsub(word,"č","t͡ʃ") 
    word = mw.ustring.gsub(word,"ĵ","d͡ʒ") 
	
    --secondary stress
	word = mw.ustring.gsub(word, "ˈ(.+)ˈ", "ˌ%1ˈ")
	word = mw.ustring.gsub(word, "ˈ(.+)ˌ", "ˌ%1ˌ")
	word = mw.ustring.gsub(word, "ˌ(.+)ˈ(.+)ˈ", "ˌ%1ˌ%2ˈ")

   table.insert(debug,word)
   
   --remove "j" and "w" inserted on vowel pair starting with "i" and "u"
   word = mw.ustring.gsub(word,"([ií])([ˈˌ]?)j([aáeéoóuú])","%1%2%3")
   word = mw.ustring.gsub(word,"([uú])([ˈˌ]?)w([aáéeií])","%1%2%3")
   
   table.insert(debug,word)

	-- Change the semivowels /j/ or /w/ to /i/ or /u/ (part of diphthongs).
	word = mw.ustring.gsub(word,"j([ˈˌ]?)([bdɡjklmnŋpɾstwʔ])","i%1%2")
	word = mw.ustring.gsub(word,"j$","i") 
	word = mw.ustring.gsub(word,"w([ˈˌ]?)([bdɡjklmnŋpɾstwʔ])","u%1%2")
	word = mw.ustring.gsub(word,"w$","u")

   table.insert(debug,word)

      --Corrections for diphthongs
	word = mw.ustring.gsub(word,"([aá])([i])","ai") --ay
	word = mw.ustring.gsub(word,"([aá])([u])","au") --aw
	word = mw.ustring.gsub(word,"([ií])u","iu") --iw
	word = mw.ustring.gsub(word,"([oó])u","ou") --ow


   table.insert(debug,word)

	--Phonemic to phonetic/allophonic transcription
    if phonetic then

	    table.insert(debug,word)

        --turn phonemic diphthongs to phonetic diphthongs

	    word = mw.ustring.gsub(word,"([aá])i","aɪ̯") --ay
	    word = mw.ustring.gsub(word,"([aá])u","aʊ̯") --aw
	    word = mw.ustring.gsub(word,"([oó])i","oɪ̯") --oy
	    word = mw.ustring.gsub(word,"([eé])i","eɪ̯") --ey
	    word = mw.ustring.gsub(word,"([ií])[u]","ɪʊ̯") --iw

	    table.insert(debug, word)

        --replace unstressed vowels
	    word = mw.ustring.gsub (word,"a","ɐ")
	    word = mw.ustring.gsub (word,"i","ɪ")
	    word = mw.ustring.gsub (word,"u","ʊ")

        --remove accents
        word = mw.ustring.gsub(word,"[áàâéèêíìîóòôúùû]",{
        ["á"] = "a", ["â"] ="aʔ", ["à"] = "ɐʔ", 
        ["é"] ="e", ["ê"] = "eʔ", ["è"] ="eʔ", 
        ["í"]="i", ["î"] = "iʔ", ["ì"] = "ɪʔ", 
        ["ó"] = "o", ["ô"] = "oʔ", ["ò"]="oʔ", 
        ["ú"]="u", ["û"] ="uʔ", ["ù"] = "ʊʔ"
        })

        table.insert(debug,word)
        
        --remove "j" and "w" inserted on vowel pair starting with "i" and "u"
        word = mw.ustring.gsub(word,"([ɪi])([ˈˌ]?)j([ɐaeoʊu])","%1%2%3")
        word = mw.ustring.gsub(word,"([ʊu])([ˈˌ]?)w([aɐeɪi])","%1%2%3")
        
        table.insert(debug,word)

        --Combine consonants (except H) followed by I/U and certain stressed vowels
	    word = mw.ustring.gsub(word,"([bkdɡlmnpɾst])([ɪi])([ˈˌ])([ɐaeoʊu])","%3%1j%4")
	    word = mw.ustring.gsub(word,"([bkdɡlmnpɾst])([ʊu])([ˈˌ])([ɐaeɪi])","%3%1w%4")

        table.insert(debug,word)

	    word = mw.ustring.gsub(word,"^([_]?)([ˈˌ]?)k([lɾ]?)([ɐaeɪiɔoʊu])", "%1%2kx%3%4") -- word-initial /k/
	    word = mw.ustring.gsub(word,"([ɐaeɪɪ̯ioʊʊ̯u])([ˈˌ]?)k([ɐaeɪioʊu])","%1%2x%3") -- /k/ between vowels
        word = mw.ustring.gsub(word,"([ɐaeɪɪ̯ioʊʊ̯u])([ˈˌ]?)ɡ([ɐaeɪioʊu])","%1%2ɰ%3") -- /ɡ/ between vowels
        word = mw.ustring.gsub(word,"d([ˈˌ]?)j","%1d͡ʒ") --/d/ before /j/
        word = mw.ustring.gsub(word,"n([ˈˌ]?)j","%1ɲ") -- /n/ before /j/
        word = mw.ustring.gsub(word,"n([ˈˌ]?)k","ŋ%1k") -- /n/ before /k/ (some proper nouns)
        word = mw.ustring.gsub(word,"n([ˈˌ]?)ɡ","ŋ%1ɡ") -- /n/ before /ɡ/ (some proper nouns and loanwords)
        word = mw.ustring.gsub(word,"n([ˈˌ]?)h","ŋ%1h") -- /n/ before /h/ (some proper nouns)
        word = mw.ustring.gsub(word,"n([ˈˌ]?)m","m%1m") -- /n/ before /m/
        word = mw.ustring.gsub(word,"s([ˈˌ]?)j","%1ʃ") -- /s/ before /j/
        word = mw.ustring.gsub(word,"t([ˈˌ]?)j","%1t͡ʃ") -- /t/ before /j/
        word = mw.ustring.gsub(word,"t([ˈˌ]?)s","%1t͡s") -- /t/ before /s/ (where parameter has t.s)
        word = mw.ustring.gsub(word,"([ˈˌ]?)d([j])([ɐaeɪioʊu])","%1d͡ʒ%3") -- /dj/ before any vowel following stress
        word = mw.ustring.gsub(word,"([ˈˌ]?)s([j])([ɐaeɪioʊu])","%1ʃ%3") -- /sj/ before any vowel following stress
        word = mw.ustring.gsub(word,"([ˈˌ]?)t([j])([ɐaeɪioʊu])","%1t͡ʃ%3") -- /tj/ before any vowel following stress
        word = mw.ustring.gsub(word,"([oʊ])([m])([ˈ]?)([pb])","u%2%3%4") -- /o/ and /ʊ/ before /mb/
	    
        --final fix for phonetic diphthongs

	    word = mw.ustring.gsub(word,"([aɐ])ɪ̯","aɪ̯") --ay
	    word = mw.ustring.gsub(word,"([aɐ])ʊ̯","aʊ̯") --aw
	    word = mw.ustring.gsub(word,"([e])ɪ̯","eɪ̯") --ey
	    
	    table.insert(debug,word)
        
        --Change /ɐ/, /ɪ/ and /ʊ/ back to /a/, /i/ and /u/ in penultimate
	    word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdfɡɰhklmnŋɲpɾstwjx])([lɾtx]?)ɐ","%1%2%3a")
	    word = mw.ustring.gsub(word,"([ˈˌ])(d͡ʒ)ɐ","%1%2a")
	    word = mw.ustring.gsub(word,"([ˈˌ])(t͡ʃ)ɐ","%1%2a")
	    word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdfɡɰhklmnŋɲpɾstwjx])([lɾtx]?)ɪ","%1%2%3i")
	    word = mw.ustring.gsub(word,"([ˈˌ])(d͡ʒ)ɪ","%1%2i")
	    word = mw.ustring.gsub(word,"([ˈˌ])(t͡ʃ)ɪ","%1%2i")
	    word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdfɡɰhklmnŋɲpɾstwjx])([lɾtx]?)ʊ","%1%2%3u")
	    word = mw.ustring.gsub(word,"([ˈˌ])(d͡ʒ)ʊ","%1%2u")
	    word = mw.ustring.gsub(word,"([ˈˌ])(t͡ʃ)ʊ","%1%2u")

    end

    table.insert(debug,word)

    --remove accents
    word = mw.ustring.gsub(word,"[áàâéèêíìîóòôúùû]",{
    ["á"] = "a", ["â"] ="aʔ", ["à"] = "aʔ", 
    ["é"] ="e", ["ê"] = "eʔ", ["è"] ="eʔ", 
    ["í"] ="i", ["î"] = "iʔ", ["ì"] = "iʔ", 
    ["ó"] = "o", ["ô"] = "oʔ", ["ò"] = "oʔ", 
    ["ú"] ="u", ["û"] = "uʔ", ["ù"] = "uʔ"
    })

    return word .. (do_debug == "yes" and table.concat(debug, "") or "")

end

function export.phonetic(frame)
	return export.show(frame, true)
end

return export