မဝ်ဂျူ:tl-pronunciation
Documentation for this module may be created at မဝ်ဂျူ:tl-pronunciation/doc
local export = {}
local gsub = mw.ustring.gsub
local len = mw.ustring.len
function export.show(word, phonetic, do_debug)
local debug = {}
if type(word) == 'table' then
do_debug = word.args[4]
word = word.args[1]
end
local orig_word = word
word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
word = mw.ustring.gsub(word,"[^abcdefghijklmnopqrstuvwxyzáâàéêèíîìóôòúûùñ7_.]","") -- 7 is for glottal stop. Underscore is used as spaces or to break consonant pair that could be treated as digraph.
table.insert(debug,word)
local V = "[aeiouáâàéèêíîìóôòúùû]" -- vowels
local C = "[^aeiouáâàéèêíîìóôòúùû_.]" -- consonants
-- add glottal stop for words starting with
word = mw.ustring.gsub(word, "^([aáeéiíoóuú])", "ʔ%1")
--Substitute C, CH, QU, QUI and X
word = mw.ustring.gsub (word,"c([h])","ts") --not the real sound
word = mw.ustring.gsub (word,"q([u])","k") --Spanish QU only
word = mw.ustring.gsub (word,"x","ks")
--C and G before I and E, and CU plus vowel (proper nouns from Spanish, native words spelled in Spanish only)
--If the original Spanish uses güe/i, please respell to gw.
--Only "gue" and "gui" are replaced to avoid affecting native "ge" and "gi". If spelled with "gi" and "ge", please respell to "hi" and "he"
word = mw.ustring.gsub(word, "c([ieíé])", "s%1")
word = mw.ustring.gsub(word, "cu([aeo])", "kw%1")
word = mw.ustring.gsub(word, "gu([ieíé])", "g%1")
--Glottal stop in word boundary or other places
word = mw.ustring.gsub(word,"7","ʔ")
--Underscore to break consonant cluster or add space
word = mw.ustring.gsub (word,"_", " ")
table.insert(debug,word)
-- letter-to-IPA
word = mw.ustring.gsub (word,"c","k")
word = mw.ustring.gsub (word,"g","ɡ")
word = mw.ustring.gsub (word,"j","ĵ") --Please respell Spanish J to H. Not the real sound.
word = mw.ustring.gsub (word,"ñ", "ɲ") --
word = mw.ustring.gsub (word,"nɡ","ŋ")
word = mw.ustring.gsub (word,"r","ɾ") --Use this to respell Spanish "rr"
word = mw.ustring.gsub (word,"v","b")
word = mw.ustring.gsub (word,"y","j")
word = mw.ustring.gsub (word,"z","s")
--digraphs
word = mw.ustring.gsub(word, "ts", "č")
table.insert(debug, word)
--syllable division
for _ = 1, 2 do
word = mw.ustring.gsub(word,"(" .. V .. ")(" .. C .. V .. ")",
"%1.%2")
end
for _ = 1, 2 do
word = mw.ustring.gsub(word,"(" .. V .. C .. ")(" .. C .. V .. ")",
"%1.%2")
end
for _ = 1, 2 do
word = mw.ustring.gsub(word,"(" .. V .. C .. ")(" .. C .. C .. V .. ")",
"%1.%2")
end
word = mw.ustring.gsub(word, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
word = mw.ustring.gsub(word, "([aáeéiíoóuú])([aáeéiíoóuú])", "%1%2")
word = mw.ustring.gsub(word, "([ií])([ií])", "%1.%2")
word = mw.ustring.gsub(word, "([oóuú])([oóuú])", "%1.%2")
table.insert(debug, word)
--accentuation
local syllables = mw.text.split(word,"%.")
if mw.ustring.find(word,"[áéíóúâêîôû]") then
for i=1,#syllables do
if mw.ustring.find(syllables[i],"[áéíóúâêîôû]") then
syllables[i] = "ˈ"..syllables[i]
end
end
else
if mw.ustring.find(word,"[^aàeèiìoòuùbdɡfjklmnŋpɾstw]$") then
syllables[#syllables] = "ˈ"..syllables[#syllables]
else
if #syllables > 1 then syllables[#syllables-1] = "ˈ"..syllables[#syllables-1] end
end
end
table.insert(debug, word)
word = table.concat(syllables)
--back-replace
word = mw.ustring.gsub(word,"č","t͡ʃ")
word = mw.ustring.gsub(word,"ĵ","d͡ʒ")
--secondary stress
word = mw.ustring.gsub(word, "ˈ(.+)ˈ", "ˌ%1ˈ")
word = mw.ustring.gsub(word, "ˈ(.+)ˌ", "ˌ%1ˌ")
word = mw.ustring.gsub(word, "ˌ(.+)ˈ(.+)ˈ", "ˌ%1ˌ%2ˈ")
table.insert(debug,word)
--remove "j" and "w" inserted on vowel pair starting with "i" and "u"
word = mw.ustring.gsub(word,"([ií])([ˈˌ]?)j([aáeéoóuú])","%1%2%3")
word = mw.ustring.gsub(word,"([uú])([ˈˌ]?)w([aáéeií])","%1%2%3")
table.insert(debug,word)
-- Change the semivowels /j/ or /w/ to /i/ or /u/ (part of diphthongs).
word = mw.ustring.gsub(word,"j([ˈˌ]?)([bdɡjklmnŋpɾstwʔ])","i%1%2")
word = mw.ustring.gsub(word,"j$","i")
word = mw.ustring.gsub(word,"w([ˈˌ]?)([bdɡjklmnŋpɾstwʔ])","u%1%2")
word = mw.ustring.gsub(word,"w$","u")
table.insert(debug,word)
--Corrections for diphthongs
word = mw.ustring.gsub(word,"([aá])([i])","ai") --ay
word = mw.ustring.gsub(word,"([aá])([u])","au") --aw
word = mw.ustring.gsub(word,"([ií])u","iu") --iw
word = mw.ustring.gsub(word,"([oó])u","ou") --ow
table.insert(debug,word)
--Phonemic to phonetic/allophonic transcription
if phonetic then
table.insert(debug,word)
--turn phonemic diphthongs to phonetic diphthongs
word = mw.ustring.gsub(word,"([aá])i","aɪ̯") --ay
word = mw.ustring.gsub(word,"([aá])u","aʊ̯") --aw
word = mw.ustring.gsub(word,"([oó])i","oɪ̯") --oy
word = mw.ustring.gsub(word,"([eé])i","eɪ̯") --ey
word = mw.ustring.gsub(word,"([ií])[u]","ɪʊ̯") --iw
table.insert(debug, word)
--replace unstressed vowels
word = mw.ustring.gsub (word,"a","ɐ")
word = mw.ustring.gsub (word,"i","ɪ")
word = mw.ustring.gsub (word,"u","ʊ")
--remove accents
word = mw.ustring.gsub(word,"[áàâéèêíìîóòôúùû]",{
["á"] = "a", ["â"] ="aʔ", ["à"] = "ɐʔ",
["é"] ="e", ["ê"] = "eʔ", ["è"] ="eʔ",
["í"]="i", ["î"] = "iʔ", ["ì"] = "ɪʔ",
["ó"] = "o", ["ô"] = "oʔ", ["ò"]="oʔ",
["ú"]="u", ["û"] ="uʔ", ["ù"] = "ʊʔ"
})
table.insert(debug,word)
--remove "j" and "w" inserted on vowel pair starting with "i" and "u"
word = mw.ustring.gsub(word,"([ɪi])([ˈˌ]?)j([ɐaeoʊu])","%1%2%3")
word = mw.ustring.gsub(word,"([ʊu])([ˈˌ]?)w([aɐeɪi])","%1%2%3")
table.insert(debug,word)
--Combine consonants (except H) followed by I/U and certain stressed vowels
word = mw.ustring.gsub(word,"([bkdɡlmnpɾst])([ɪi])([ˈˌ])([ɐaeoʊu])","%3%1j%4")
word = mw.ustring.gsub(word,"([bkdɡlmnpɾst])([ʊu])([ˈˌ])([ɐaeɪi])","%3%1w%4")
table.insert(debug,word)
word = mw.ustring.gsub(word,"^([_]?)([ˈˌ]?)k([lɾ]?)([ɐaeɪiɔoʊu])", "%1%2kx%3%4") -- word-initial /k/
word = mw.ustring.gsub(word,"([ɐaeɪɪ̯ioʊʊ̯u])([ˈˌ]?)k([ɐaeɪioʊu])","%1%2x%3") -- /k/ between vowels
word = mw.ustring.gsub(word,"([ɐaeɪɪ̯ioʊʊ̯u])([ˈˌ]?)ɡ([ɐaeɪioʊu])","%1%2ɰ%3") -- /ɡ/ between vowels
word = mw.ustring.gsub(word,"d([ˈˌ]?)j","%1d͡ʒ") --/d/ before /j/
word = mw.ustring.gsub(word,"n([ˈˌ]?)j","%1ɲ") -- /n/ before /j/
word = mw.ustring.gsub(word,"n([ˈˌ]?)k","ŋ%1k") -- /n/ before /k/ (some proper nouns)
word = mw.ustring.gsub(word,"n([ˈˌ]?)ɡ","ŋ%1ɡ") -- /n/ before /ɡ/ (some proper nouns and loanwords)
word = mw.ustring.gsub(word,"n([ˈˌ]?)h","ŋ%1h") -- /n/ before /h/ (some proper nouns)
word = mw.ustring.gsub(word,"n([ˈˌ]?)m","m%1m") -- /n/ before /m/
word = mw.ustring.gsub(word,"s([ˈˌ]?)j","%1ʃ") -- /s/ before /j/
word = mw.ustring.gsub(word,"t([ˈˌ]?)j","%1t͡ʃ") -- /t/ before /j/
word = mw.ustring.gsub(word,"t([ˈˌ]?)s","%1t͡s") -- /t/ before /s/ (where parameter has t.s)
word = mw.ustring.gsub(word,"([ˈˌ]?)d([j])([ɐaeɪioʊu])","%1d͡ʒ%3") -- /dj/ before any vowel following stress
word = mw.ustring.gsub(word,"([ˈˌ]?)s([j])([ɐaeɪioʊu])","%1ʃ%3") -- /sj/ before any vowel following stress
word = mw.ustring.gsub(word,"([ˈˌ]?)t([j])([ɐaeɪioʊu])","%1t͡ʃ%3") -- /tj/ before any vowel following stress
word = mw.ustring.gsub(word,"([oʊ])([m])([ˈ]?)([pb])","u%2%3%4") -- /o/ and /ʊ/ before /mb/
--final fix for phonetic diphthongs
word = mw.ustring.gsub(word,"([aɐ])ɪ̯","aɪ̯") --ay
word = mw.ustring.gsub(word,"([aɐ])ʊ̯","aʊ̯") --aw
word = mw.ustring.gsub(word,"([e])ɪ̯","eɪ̯") --ey
table.insert(debug,word)
--Change /ɐ/, /ɪ/ and /ʊ/ back to /a/, /i/ and /u/ in penultimate
word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdfɡɰhklmnŋɲpɾstwjx])([lɾtx]?)ɐ","%1%2%3a")
word = mw.ustring.gsub(word,"([ˈˌ])(d͡ʒ)ɐ","%1%2a")
word = mw.ustring.gsub(word,"([ˈˌ])(t͡ʃ)ɐ","%1%2a")
word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdfɡɰhklmnŋɲpɾstwjx])([lɾtx]?)ɪ","%1%2%3i")
word = mw.ustring.gsub(word,"([ˈˌ])(d͡ʒ)ɪ","%1%2i")
word = mw.ustring.gsub(word,"([ˈˌ])(t͡ʃ)ɪ","%1%2i")
word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdfɡɰhklmnŋɲpɾstwjx])([lɾtx]?)ʊ","%1%2%3u")
word = mw.ustring.gsub(word,"([ˈˌ])(d͡ʒ)ʊ","%1%2u")
word = mw.ustring.gsub(word,"([ˈˌ])(t͡ʃ)ʊ","%1%2u")
end
table.insert(debug,word)
--remove accents
word = mw.ustring.gsub(word,"[áàâéèêíìîóòôúùû]",{
["á"] = "a", ["â"] ="aʔ", ["à"] = "aʔ",
["é"] ="e", ["ê"] = "eʔ", ["è"] ="eʔ",
["í"] ="i", ["î"] = "iʔ", ["ì"] = "iʔ",
["ó"] = "o", ["ô"] = "oʔ", ["ò"] = "oʔ",
["ú"] ="u", ["û"] = "uʔ", ["ù"] = "uʔ"
})
return word .. (do_debug == "yes" and table.concat(debug, "") or "")
end
function export.phonetic(frame)
return export.show(frame, true)
end
return export