Hindi IPA pronunciation module. See {{hi-IPA}}.

Module:hi-IPA/testcases:

12 tests failed. (refresh)

လိက် ဗွဲမရံၚ်လၟဳ မဇေတ်ဍာံ တၚ်လညာတ်ဂမၠိုၚ်
test_all:
Passed अशिष्ट॰ता (aśiṣṭ.tā) ə.ʃɪʂʈ.t̪ɑː ə.ʃɪʂʈ.t̪ɑː syllabification
Passed अशिष्ट-ता (aśiṣṭ-tā) ə.ʃɪʂʈ.t̪ɑː ə.ʃɪʂʈ.t̪ɑː syllabification
Passed अल्प्संख्यक (alpsaṅkhyak) əlp.səŋ.kʰjək əlp.səŋ.kʰjək syllabification
Passed अंडकोष (aṇḍkoṣ) əɳɖ.koːʂ əɳɖ.koːʂ syllabification
Passed अंग्रेज़ (aṅgrez) əŋ.ɡɾeːz əŋ.ɡɾeːz syllabification
Passed अंटर्क्टिका (aṇṭarkṭikā) əɳ.ʈəɾk.ʈɪ.kɑː əɳ.ʈəɾk.ʈɪ.kɑː syllabification
Passed मैं (ma͠i) mɛ̃ː mɛ̃ː
Passed देश (deś) d̪eːʃ d̪eːʃ
Passed मेरा (merā) meː.ɾɑː meː.ɾɑː
Passed खिलौना (khilaunā) kʰɪ.lɔː.nɑː kʰɪ.lɔː.nɑː
Passed नौटंकी (nauṭaṅkī) nɔː.ʈəŋ.kiː nɔː.ʈəŋ.kiː
Passed हौं (ha͠u) ɦɔ̃ː ɦɔ̃ː
Failed मुँह (mũh) mũːʱ mũːɦ
Failed माह (māh) mɑːʱ mɑːɦ
Failed बहना (bahnā) bəʱ.nɑː bəɦ.nɑː
Failed विवाह (vivāh) ʋɪ.ʋɑːʱ ʋɪ.ʋɑːɦ
Passed ग़म (ġam) ɣəm ɣəm
Passed ख़रगोश (xargoś) xəɾ.ɡoːʃ xəɾ.ɡoːʃ
Passed इकट्ठा (ikaṭṭhā) ɪ.kəʈ.ʈʰɑː ɪ.kəʈ.ʈʰɑː
Passed संस्थान (sansthān) sən.st̪ʰɑːn sən.st̪ʰɑːn
Passed मधु (madhu) mə.d̪ʱuː mə.d̪ʱuː final u is lengthened, aspiration should not be split in syllabification
Failed मियाँ (miyā̃) miː.jɑ̃ː mɪ.jɑ̃ː i + y lengthens i
Failed मुहाफ़ज़ाह (muhāfzāh) mʊ.ɦɑːf.zɑːʱ mʊ.ɦɑːf.zɑːɦ
Passed स्त्रीत्व (strītva) st̪ɾiːt̪.ʋᵊ st̪ɾiːt̪.ʋᵊ
Passed शास्त्र (śāstra) ʃɑːs.t̪ɾᵊ ʃɑːs.t̪ɾᵊ
Passed समाचार (samācār) sə.mɑː.t͡ʃɑːɾ sə.mɑː.t͡ʃɑːɾ
Passed श्रावण (śrāvaṇ) ʃɾɑː.ʋəɳ ʃɾɑː.ʋəɳ
Passed हमें (hamẽ) ɦə.mẽː ɦə.mẽː
Passed में (mẽ) mẽː mẽː
Failed भैया (bhaiyā) bʱə.iː.jɑː bʱə̯i.jɑː
Failed सुलह (sulah) sʊ.ləʱ sʊ.ləɦ
Passed दृष्टि (dŕṣṭi) d̪ɾɪʂ.ʈiː d̪ɾɪʂ.ʈiː
Passed सोई (soī) soː.iː soː.iː
Passed खाइए (khāie) kʰɑː.ɪ.eː kʰɑː.ɪ.eː
Passed शक्ति (śakti) ʃək.t̪iː ʃək.t̪iː
Passed उस्ताद (ustād) ʊs.t̪ɑːd̪ ʊs.t̪ɑːd̪
Passed पंकज (paṅkaj) pəŋ.kəd͡ʒ pəŋ.kəd͡ʒ
Passed माला (mālā) mɑː.lɑː mɑː.lɑː
Passed दीवार (dīvār) d̪iː.ʋɑːɾ d̪iː.ʋɑːɾ
Passed सुरुची (surucī) sʊ.ɾʊ.t͡ʃiː sʊ.ɾʊ.t͡ʃiː
Passed निरस्त्र (nirastra) nɪ.ɾəs.t̪ɾᵊ nɪ.ɾəs.t̪ɾᵊ
Passed निर्वृत्त (nirvŕtt) nɪɾ.ʋɾɪt̪t̪ nɪɾ.ʋɾɪt̪t̪
Passed मृत्युंजय (mŕtyuñjay) mɾɪt̪.jʊn.d͡ʒəj mɾɪt̪.jʊn.d͡ʒəj
Passed पितृओं (pitŕõ) pɪt̪.ɾõː pɪt̪.ɾõː
Passed गर्भ॰पात (garbh.pāt) ɡəɾbʱ.pɑːt̪ ɡəɾbʱ.pɑːt̪
Passed गर्भ (garbh) ɡəɾbʱ ɡəɾbʱ
Passed वस्त्र (vastra) ʋəs.t̪ɾᵊ ʋəs.t̪ɾᵊ
Passed यक्ष्मा (yakṣmā) jək.ʂmɑː jək.ʂmɑː
Passed उत्प्रेक्षा (utprekṣā) ʊt̪.pɾeːk.ʂɑː ʊt̪.pɾeːk.ʂɑː
Passed झुंझलाहट (jhuñjhlāhaṭ) d͡ʒʱʊn.d͡ʒʱlɑː.ɦəʈ d͡ʒʱʊn.d͡ʒʱlɑː.ɦəʈ
Passed संख्या (saṅkhyā) səŋ.kʰjɑː səŋ.kʰjɑː
Passed घुँघरू (ghuṅghrū) ɡʱʊŋ.ɡʱɾuː ɡʱʊŋ.ɡʱɾuː
Passed संभ्रांत (sambhrānt) səm.bʱɾɑːnt̪ səm.bʱɾɑːnt̪
Passed इन्फ़्लुएंज़ा (influenzā) ɪn.flʊ.eːn.zɑː ɪn.flʊ.eːn.zɑː
Failed इंफ़्लुएंज़ा (imfluenzā) ɪn.flʊ.eːn.zɑː ɪm.flʊ.eːn.zɑː
Failed हिमाचल प्रदेश (himācal pradeś) /ɦɪ.mɑː.t͡ʃəl pɾə.d̪eːʃ/ ɦɪ.mɑː.t͡ʃəl pɾə.d̪eːʃ
Failed तंक़ीद (taṅqīd) [t̪ɐ̃ɴ.qiːd̪] t̪əŋ.qiːd̪ anusvara before uvulars
Passed चेरापूंजी (cerāpūñjī) t͡ʃeː.ɾɑː.puːn.d͡ʒiː t͡ʃeː.ɾɑː.puːn.d͡ʒiː
Failed चेरापूंजी (cerāpūñjī) t͡ʃeː.ɾäː.pũːn.d͡ʒiː t͡ʃeː.ɾäː.pũːn.d͡ʒiː nasal allophone before postalveolar

local export = {}

local lang = require("Module:languages").getByCode("hi")
local sc = require("Module:scripts").getByCode("Deva")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")

local m_str_utils = require("Module:string utilities")

local find = m_str_utils.find
local gcodepoint = m_str_utils.gcodepoint
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local u = m_str_utils.char

local correspondences = {
	["ṅ"] = "ŋ", ["g"] = "ɡ", 
	["c"] = "t͡ʃ", ["j"] = "d͡ʒ", 
	["ṭ"] = "ʈ", ["ḍ"] = "ɖ", ["ṇ"] = "ɳ",
	["t"] = "t̪", ["d"] = "d̪",
	["y"] = "j", ["r"] = "ɾ", ["v"] = "ʋ",
	["ś"] = "ʃ", ["ṣ"] = "ʂ", ["ź"] = "ʒ", ["ž"] = "ʒ", ["h"] = "ɦ",
	["ṛ"] = "ɽ", ["ẓ"] = "ʒ", ["ḷ"] = "l", ["ḻ"] = "l", ["ġ"] = "ɣ", ["q"] = "q", ["x"] = "x", ["ṉ"] = "n", ["ṟ"] = "ɾ",

	["a"] = "ə", ["ā"] = "ɑː", ["i"] = "ɪ",
	["ī"] = "iː", ["o"] = "oː", ["e"] = "eː",
	["u"] = "ʊ", ["ū"] = "uː", ["ŏ"] = "ɔ", ["ĕ"] = "æ",

	["ẽ"] = "ẽː", ["ũ"] = "ʊ̃", ["õ"] = "õː", ["ã"] = "ə̃", ["ā̃"] = "ɑ̃ː",  ["ĩ"] = "ɪ̃", ["ī̃"] = "ĩː",

	["ॐ"] = "oːm", ["ḥ"] = "(ɦ)", ["'"] = "(ʔ)",
}

local perso_arabic = {
	["x"] = "kh", ["ġ"] = "g", ["q"] = "k", ["ź"] = "z", ["z"] = "j", ["f"] = "ph", ["'"] = "",
}

local urdu = {
	["ṣ"] = "ʃ", ["ṇ"] = "n",
}

local deccani = {
	["q"] = "x",
}

local lengthen = {
	["a"] = "ā", ["i"] = "ī", ["u"] = "ū",
}

local vowels = "aāiīuūoǒŏěĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː"
local vowel = "[aāiīuūoǒŏěĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃]ː?"
local weak_h = "([gjdḍbṛnm])h"
local aspirate = "([kctṭp])"
local syllabify_pattern = "([" .. vowels .. "]̃?)([^" .. vowels .. "%.%-]+)([" .. vowels .. "]̃?)"

local function find_consonants(text)
	local current = ""
	local cons = {}
	for cc in gcodepoint(text .. " ") do
		local ch = u(cc)
		if find(current .. ch, "^[kgṅcjñṭḍṇtdnpbmyrlvśṣshqxġzžḻṛṟfθṉḥ]$") or find(current .. ch, "^[kgcjṭḍtdpbṛ]h$") then
			current = current .. ch
		else
			table.insert(cons, current)
			current = ch
		end
	end
	return cons
end

local function syllabify(text)
	for count = 1, 2 do
		text = gsub(text, syllabify_pattern, function(a, b, c)
			b_set = find_consonants(b)
			table.insert(b_set, #b_set > 1 and 2 or 1, ".")
			return a .. table.concat(b_set) .. c
			end)
		text = gsub(text, "(" .. vowel .. ")(?=" .. vowel .. ")", "%1.")
	end
	for count = 1, 2 do
		text = gsub(text, "(" .. vowel .. ")(" .. vowel .. ")", "%1.%2")
	end
	-- syllabification corrections
	-- ([^.]) is added in front, just in case one of the (unlikely) clusters 
	-- would occur after a blank space (temporarily reformatted as '..')
	text =  gsub(text, '([^.])%.([kqgcjṭḍtdpb])(h?)([kqgcjṭḍtdpbxġfnɳmsśzź])', '%1%2%3.%4')
	text =  gsub(text, '([^.])%.([qgcjṭḍtdpb])(h?)ṣ', '%1%2%3.ṣ')
	text =  gsub(text, '([^.])%.khṣ', '%1kh.ṣ') 						-- not kṣ/क्ष 
	text =  gsub(text, '([^.])%.([xġfnɳmzźyrlv])([kqgcjṭḍtdpbxġfnɳmsśṣzźh])', '%1%2.%3')
	text =  gsub(text, '([^.])%.([sśṣ])([gjḍdbġsśṣzźh])', '%1%2.%3')
	return text	
end

local identical = "knlsfzθ"
for character in gmatch(identical, ".") do
	correspondences[character] = character
end

local function transliterate(text)
	return (lang:transliterate(text))
end

function export.link(term)
	return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end

function export.toIPA(text, style)
	text = gsub(text, '॰', '-')
	local translit = text
	if lang:findBestScript(text):isTransliterated() then
		translit = transliterate(text)
	end
	if not translit then
		error('The term "' .. text .. '" could not be transliterated.')
	end
	
	if style == "nonpersianized" then
		translit = gsub(translit, "[xġqźzf']", perso_arabic)
	end

	if style == "dakhini" then
		translit = gsub(translit, "[q]", deccani)
	end
	
	-- force final schwa for Hindi
	translit = gsub(translit, "a~$", "ə")

	if style == "desanskritize" then
		translit = gsub(translit, "(...)ə$", "%1ɑ(ː)")
		translit = gsub(translit, "[ṣṇ]", urdu)
	end
	
	-- vowels
	translit = gsub(translit, "͠", "̃")
	translit = gsub(translit, 'a(̃?)i', 'ɛ%1ː')
	translit = gsub(translit, 'a(̃?)u', 'ɔ%1ː')
	translit = gsub(translit, "%-$", "")
	translit = gsub(translit, "^%-", "")
	translit = gsub(translit, "ŕ$", "r")
	translit = gsub(translit, "ŕ(" .. vowel .. ")", "r%1")
	translit = gsub(translit, "ŕ", "ri")
    
	translit = gsub(translit, 'jñ', 'gy')
	translit = gsub(translit, ",", "")
	translit = gsub(translit, " ", "..")
	translit = syllabify(translit)
	translit = gsub(translit, "%.ː", "ː.")
	translit = gsub(translit, "%.̃", "̃")

	translit = gsub(translit, aspirate .. "h", '%1ʰ')
	translit = gsub(translit, weak_h, '%1ʱ')
	
	local result = gsub(translit, ".", correspondences)
	
	-- remove final schwa (Pandey, 2014)
	-- actually weaken
	result = gsub(result, "(...)ə$", "%1ᵊ")
	result = gsub(result, "(...)ə ", "%1ᵊ ")
	result = gsub(result, "(...)ə%.?%-", "%1ᵊ-")
	
	-- formatting	
	result = gsub(result, "%.?%-", ".")
	result = gsub(result, "%.%.", " ")
	result = gsub(result, "ː̃", "̃ː")
	result = gsub(result, "ː%.̃", "̃ː.")
	result = gsub(result, "%.$", "")
    
    -- ñ
    result = gsub(result, "ñ", "n")

	-- i and u lengthening
	result = gsub(result, "ʊ(̃?)(ɦ?)$", "u%1ː%2")
	result = gsub(result, "ɪ(̃?)(ɦ?)$", "i%1ː%2")
	
	-- deaffricate first affricate in geminates
	result = gsub(result, "t͡ʃ(%.?)t͡ʃ", "t̪%1t͡ʃ")	
	result = gsub(result, "d͡ʒ(%.?)d͡ʒ", "d̪%1d͡ʒ")
	
	-- silent h in 'lh-', 'vh-' (Ohala 1983, p.45)
	result = gsub(result, "^([lʋ])ɦ", "%1")  
    result = gsub(result, "([ .])([lʋ])ɦ", "%1%2")
    
	result = gsub(result, "ɛː(%.?)j", function(a)
		local res = "ə̯i"
		res = res .. a .. "j"
		return res
	end)
	result = gsub(result, "ɔː(%.?)ʋ", function(a)
		local res = "ə̯u"
		res = res .. a .. "ʋ"
		return res
	end)
	
	return result
end

function export.narrow_IPA(ipa)
	-- what /ɑ/ and /ə/ really are
	ipa = gsub(ipa, 'ɑ', 'ä')
	ipa = gsub(ipa, 'ə', 'ɐ')
	-- uvular /x/, /ɣ/ ??
	-- ipa = gsub(ipa, 'x', 'χ')
	-- ipa = gsub(ipa, 'ɣ', 'ʁ')
	-- retroflex s rules
	ipa = gsub(ipa, 'ʂ(%.?)([^ʈɖ.])', 'ʃ%1%2')
	ipa = gsub(ipa, 'ʂ$', 'ʃ')
	-- nasal allophones
	ipa = gsub(ipa, 'ŋ(%.?)([qχʁ])', 'ɴ%1%2')
	ipa = gsub(ipa, 'n%.j', 'ɲ.j')
	ipa = gsub(ipa, '[nɳ](%.?)ʃ', 'ɲ%1ʃ')  -- this nasal is likely more front than before /j/, but not doing a too narrow transcription seems preferable
	ipa = gsub(ipa, 'n(%.?)([td])̪', 'n̪%1%2̪')
    ipa = gsub(ipa, 'm(%.?)f', 'ɱ%1f')  
	-- nasals induce nasalization
	ipa = gsub(ipa, '([ɐäɪiʊueɛoɔæ])(ː?)([nɳɲŋɴmɱ])', '%1̃%2%3')
	-- cc, jj
	ipa = gsub(ipa, 't̪(%.?)t͡ʃ', 't̚%1t͡ʃ')
	ipa = gsub(ipa, 'd̪(%.?)d͡ʒ', 'd̚%1d͡ʒ')
	-- syllable boundary consonants
	ipa = gsub(ipa, '([kɡ])%.([kɡ])', '%1̚.%2')
	ipa = gsub(ipa, '([ʈɖ])%.([ʈɖ])', '%1̚.%2')
	ipa = gsub(ipa, '([td]̪?)%.([tdn])', '%1̚.%2')
	ipa = gsub(ipa, '([pb])%.([pb])', '%1̚.%2')
	-- aspiration rules
	ipa = gsub(ipa, 'ɐɦ([%. ])', 'ɛɦ%1')
	ipa = gsub(ipa, 'ɐɦ$', 'ɛɦ')
	ipa = gsub(ipa, 'ɐ%.ɦɐ', 'ɛ.ɦɛ')
	ipa = gsub(ipa, 'ɐ%(ɦ%)', 'ɛ(ɦ)')
	ipa = gsub(ipa, 'ʊɦ%.', 'ɔɦ.')
	ipa = gsub(ipa, 'ʊ%.ɦɐ', 'ɔ.ɦɔ')
	ipa = gsub(ipa, 'ɐ%.ɦʊ', 'ɔ.ɦɔ')
	ipa = gsub(ipa, '([ɐäɪiʊueɛoɔæ])(̃?)(ː?)ɦ', '%1%2%3ʱ')
	-- v/w
	ipa = gsub(ipa, '([kɡŋtdɲʈɖɳnpbm]̪?%.?)ʋ', '%1w')

	-- geminate /ɾ/ is trill
	ipa = gsub(ipa, "ɾ%.ɾ", "r.r")	
	-- for onomatopeic words ending on -र्र 
	ipa = gsub(ipa, "ɾɾ", "rː")	
	-- final geminates often pronounced as singletons
	ipa = gsub(ipa, "([kɡʈɖɳtdnpbml]̪?)%1", "%1(ː)")
	-- final cc, jj
	ipa = gsub(ipa, "t̚t͡ʃ", "(t̚)t͡ʃ")
	ipa = gsub(ipa, "d̚d͡ʒ", "(d̚)d͡ʒ")
	
	ipa = gsub(ipa, "ɪ%.j", "i.j")		
	ipa = gsub(ipa, " ", "‿")
	return ipa
end

function export.make(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		p = { pagetitle }
	end
	
	for _, Hindi in ipairs(p) do
		local persianized = export.toIPA(Hindi, "persianized")
		local nonpersianized = export.toIPA(Hindi, "nonpersianized")
		table.insert(results, { pron = "/" .. persianized .. "/" })
		local narrow = export.narrow_IPA(persianized)
		if narrow ~= persianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		if persianized ~= nonpersianized then
			table.insert(results, { pron = "/" .. nonpersianized .. "/" })
			local narrow = export.narrow_IPA(nonpersianized)
			if narrow ~= nonpersianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		end
	end
	
	return m_a.format_qualifiers(lang, {"Delhi"}) .. " " .. m_IPA.format_IPA_full { lang = lang, items = results }
end

function export.make_ur(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	local lang = require("Module:languages").getByCode("ur")
	local sc = require("Module:scripts").getByCode("ur-Arab")
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("No transliterations given.")
	end
	
	for _, Urdu in ipairs(p) do
		Urdu = lang:transliterate(Urdu) or Urdu
		local desanskritize = export.toIPA(Urdu, "desanskritize")
		table.insert(results, { pron = "/" .. desanskritize .. "/" })
	end
	
	return m_a.format_qualifiers(lang, {"ur"}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results }
end

function export.make_deccani(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	local lang = require("Module:languages").getByCode("ur")
	local sc = require("Module:scripts").getByCode("ur-Arab")
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("No transliterations given.")
	end
	
	for _, Urdu in ipairs(p) do
		local dakhini = export.toIPA(Urdu, "dakhini")
		table.insert(results, { pron = "/" .. dakhini .. "/" })
	end
	
	return m_a.format_qualifiers(lang, {"Deccani"}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results }
end

return export