မဝ်ဂျူ:hi-IPA

Hindi IPA pronunciation module. See {{hi-IPA}}.

Testcases

12 tests failed. (refresh)

test_all:
လိက်	ဗွဲမရံၚ်လၟဳ	မဇေတ်ဍာံ	တၚ်လညာတ်ဂမၠိုၚ်
अशिष्ट॰ता (aśiṣṭ.tā)	ə.ʃɪʂʈ.t̪ɑː	ə.ʃɪʂʈ.t̪ɑː	syllabification
अशिष्ट-ता (aśiṣṭ-tā)	ə.ʃɪʂʈ.t̪ɑː	ə.ʃɪʂʈ.t̪ɑː	syllabification
अल्प्संख्यक (alpsaṅkhyak)	əlp.səŋ.kʰjək	əlp.səŋ.kʰjək	syllabification
अंडकोष (aṇḍkoṣ)	əɳɖ.koːʂ	əɳɖ.koːʂ	syllabification
अंग्रेज़ (aṅgrez)	əŋ.ɡɾeːz	əŋ.ɡɾeːz	syllabification
अंटर्क्टिका (aṇṭarkṭikā)	əɳ.ʈəɾk.ʈɪ.kɑː	əɳ.ʈəɾk.ʈɪ.kɑː	syllabification
मैं (ma͠i)	mɛ̃ː	mɛ̃ː
देश (deś)	d̪eːʃ	d̪eːʃ
मेरा (merā)	meː.ɾɑː	meː.ɾɑː
खिलौना (khilaunā)	kʰɪ.lɔː.nɑː	kʰɪ.lɔː.nɑː
नौटंकी (nauṭaṅkī)	nɔː.ʈəŋ.kiː	nɔː.ʈəŋ.kiː
हौं (ha͠u)	ɦɔ̃ː	ɦɔ̃ː
मुँह (mũh)	mũːʱ	mũːɦ
माह (māh)	mɑːʱ	mɑːɦ
बहना (bahnā)	bəʱ.nɑː	bəɦ.nɑː
विवाह (vivāh)	ʋɪ.ʋɑːʱ	ʋɪ.ʋɑːɦ
ग़म (ġam)	ɣəm	ɣəm
ख़रगोश (xargoś)	xəɾ.ɡoːʃ	xəɾ.ɡoːʃ
इकट्ठा (ikaṭṭhā)	ɪ.kəʈ.ʈʰɑː	ɪ.kəʈ.ʈʰɑː
संस्थान (sansthān)	sən.st̪ʰɑːn	sən.st̪ʰɑːn
मधु (madhu)	mə.d̪ʱuː	mə.d̪ʱuː	final u is lengthened, aspiration should not be split in syllabification
मियाँ (miyā̃)	miː.jɑ̃ː	mɪ.jɑ̃ː	i + y lengthens i
मुहाफ़ज़ाह (muhāfzāh)	mʊ.ɦɑːf.zɑːʱ	mʊ.ɦɑːf.zɑːɦ
स्त्रीत्व (strītva)	st̪ɾiːt̪.ʋᵊ	st̪ɾiːt̪.ʋᵊ
शास्त्र (śāstra)	ʃɑːs.t̪ɾᵊ	ʃɑːs.t̪ɾᵊ
समाचार (samācār)	sə.mɑː.t͡ʃɑːɾ	sə.mɑː.t͡ʃɑːɾ
श्रावण (śrāvaṇ)	ʃɾɑː.ʋəɳ	ʃɾɑː.ʋəɳ
हमें (hamẽ)	ɦə.mẽː	ɦə.mẽː
में (mẽ)	mẽː	mẽː
भैया (bhaiyā)	bʱə.iː.jɑː	bʱə̯i.jɑː
सुलह (sulah)	sʊ.ləʱ	sʊ.ləɦ
दृष्टि (dŕṣṭi)	d̪ɾɪʂ.ʈiː	d̪ɾɪʂ.ʈiː
सोई (soī)	soː.iː	soː.iː
खाइए (khāie)	kʰɑː.ɪ.eː	kʰɑː.ɪ.eː
शक्ति (śakti)	ʃək.t̪iː	ʃək.t̪iː
उस्ताद (ustād)	ʊs.t̪ɑːd̪	ʊs.t̪ɑːd̪
पंकज (paṅkaj)	pəŋ.kəd͡ʒ	pəŋ.kəd͡ʒ
माला (mālā)	mɑː.lɑː	mɑː.lɑː
दीवार (dīvār)	d̪iː.ʋɑːɾ	d̪iː.ʋɑːɾ
सुरुची (surucī)	sʊ.ɾʊ.t͡ʃiː	sʊ.ɾʊ.t͡ʃiː
निरस्त्र (nirastra)	nɪ.ɾəs.t̪ɾᵊ	nɪ.ɾəs.t̪ɾᵊ
निर्वृत्त (nirvŕtt)	nɪɾ.ʋɾɪt̪t̪	nɪɾ.ʋɾɪt̪t̪
मृत्युंजय (mŕtyuñjay)	mɾɪt̪.jʊn.d͡ʒəj	mɾɪt̪.jʊn.d͡ʒəj
पितृओं (pitŕõ)	pɪt̪.ɾõː	pɪt̪.ɾõː
गर्भ॰पात (garbh.pāt)	ɡəɾbʱ.pɑːt̪	ɡəɾbʱ.pɑːt̪
गर्भ (garbh)	ɡəɾbʱ	ɡəɾbʱ
वस्त्र (vastra)	ʋəs.t̪ɾᵊ	ʋəs.t̪ɾᵊ
यक्ष्मा (yakṣmā)	jək.ʂmɑː	jək.ʂmɑː
उत्प्रेक्षा (utprekṣā)	ʊt̪.pɾeːk.ʂɑː	ʊt̪.pɾeːk.ʂɑː
झुंझलाहट (jhuñjhlāhaṭ)	d͡ʒʱʊn.d͡ʒʱlɑː.ɦəʈ	d͡ʒʱʊn.d͡ʒʱlɑː.ɦəʈ
संख्या (saṅkhyā)	səŋ.kʰjɑː	səŋ.kʰjɑː
घुँघरू (ghuṅghrū)	ɡʱʊŋ.ɡʱɾuː	ɡʱʊŋ.ɡʱɾuː
संभ्रांत (sambhrānt)	səm.bʱɾɑːnt̪	səm.bʱɾɑːnt̪
इन्फ़्लुएंज़ा (influenzā)	ɪn.flʊ.eːn.zɑː	ɪn.flʊ.eːn.zɑː
इंफ़्लुएंज़ा (imfluenzā)	ɪn.flʊ.eːn.zɑː	ɪm.flʊ.eːn.zɑː
हिमाचल प्रदेश (himācal pradeś)	/ɦɪ.mɑː.t͡ʃəl pɾə.d̪eːʃ/	ɦɪ.mɑː.t͡ʃəl pɾə.d̪eːʃ
तंक़ीद (taṅqīd)	[t̪ɐ̃ɴ.qiːd̪]	t̪əŋ.qiːd̪	anusvara before uvulars
चेरापूंजी (cerāpūñjī)	t͡ʃeː.ɾɑː.puːn.d͡ʒiː	t͡ʃeː.ɾɑː.puːn.d͡ʒiː
चेरापूंजी (cerāpūñjī)	t͡ʃeː.ɾäː.pũːn.d͡ʒiː	t͡ʃeː.ɾäː.pũːn.d͡ʒiː	nasal allophone before postalveolar

local export = {}

local lang = require("Module:languages").getByCode("hi")
local sc = require("Module:scripts").getByCode("Deva")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")

local m_str_utils = require("Module:string utilities")

local find = m_str_utils.find
local gcodepoint = m_str_utils.gcodepoint
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local u = m_str_utils.char

local correspondences = {
	["ṅ"] = "ŋ", ["g"] = "ɡ", 
	["c"] = "t͡ʃ", ["j"] = "d͡ʒ", 
	["ṭ"] = "ʈ", ["ḍ"] = "ɖ", ["ṇ"] = "ɳ",
	["t"] = "t̪", ["d"] = "d̪",
	["y"] = "j", ["r"] = "ɾ", ["v"] = "ʋ",
	["ś"] = "ʃ", ["ṣ"] = "ʂ", ["ź"] = "ʒ", ["ž"] = "ʒ", ["h"] = "ɦ",
	["ṛ"] = "ɽ", ["ẓ"] = "ʒ", ["ḷ"] = "l", ["ḻ"] = "l", ["ġ"] = "ɣ", ["q"] = "q", ["x"] = "x", ["ṉ"] = "n", ["ṟ"] = "ɾ",

	["a"] = "ə", ["ā"] = "ɑː", ["i"] = "ɪ",
	["ī"] = "iː", ["o"] = "oː", ["e"] = "eː",
	["u"] = "ʊ", ["ū"] = "uː", ["ŏ"] = "ɔ", ["ĕ"] = "æ",

	["ẽ"] = "ẽː", ["ũ"] = "ʊ̃", ["õ"] = "õː", ["ã"] = "ə̃", ["ā̃"] = "ɑ̃ː",  ["ĩ"] = "ɪ̃", ["ī̃"] = "ĩː",

	["ॐ"] = "oːm", ["ḥ"] = "(ɦ)", ["'"] = "(ʔ)",
}

local perso_arabic = {
	["x"] = "kh", ["ġ"] = "g", ["q"] = "k", ["ź"] = "z", ["z"] = "j", ["f"] = "ph", ["'"] = "",
}

local urdu = {
	["ṣ"] = "ʃ", ["ṇ"] = "n",
}

local deccani = {
	["q"] = "x",
}

local lengthen = {
	["a"] = "ā", ["i"] = "ī", ["u"] = "ū",
}

local vowels = "aāiīuūoǒŏěĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː"
local vowel = "[aāiīuūoǒŏěĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃]ː?"
local weak_h = "([gjdḍbṛnm])h"
local aspirate = "([kctṭp])"
local syllabify_pattern = "([" .. vowels .. "]̃?)([^" .. vowels .. "%.%-]+)([" .. vowels .. "]̃?)"

local function find_consonants(text)
	local current = ""
	local cons = {}
	for cc in gcodepoint(text .. " ") do
		local ch = u(cc)
		if find(current .. ch, "^[kgṅcjñṭḍṇtdnpbmyrlvśṣshqxġzžḻṛṟfθṉḥ]$") or find(current .. ch, "^[kgcjṭḍtdpbṛ]h$") then
			current = current .. ch
		else
			table.insert(cons, current)
			current = ch
		end
	end
	return cons
end

local function syllabify(text)
	for count = 1, 2 do
		text = gsub(text, syllabify_pattern, function(a, b, c)
			b_set = find_consonants(b)
			table.insert(b_set, #b_set > 1 and 2 or 1, ".")
			return a .. table.concat(b_set) .. c
			end)
		text = gsub(text, "(" .. vowel .. ")(?=" .. vowel .. ")", "%1.")
	end
	for count = 1, 2 do
		text = gsub(text, "(" .. vowel .. ")(" .. vowel .. ")", "%1.%2")
	end
	-- syllabification corrections
	-- ([^.]) is added in front, just in case one of the (unlikely) clusters 
	-- would occur after a blank space (temporarily reformatted as '..')
	text =  gsub(text, '([^.])%.([kqgcjṭḍtdpb])(h?)([kqgcjṭḍtdpbxġfnɳmsśzź])', '%1%2%3.%4')
	text =  gsub(text, '([^.])%.([qgcjṭḍtdpb])(h?)ṣ', '%1%2%3.ṣ')
	text =  gsub(text, '([^.])%.khṣ', '%1kh.ṣ') 						-- not kṣ/क्ष 
	text =  gsub(text, '([^.])%.([xġfnɳmzźyrlv])([kqgcjṭḍtdpbxġfnɳmsśṣzźh])', '%1%2.%3')
	text =  gsub(text, '([^.])%.([sśṣ])([gjḍdbġsśṣzźh])', '%1%2.%3')
	return text	
end

local identical = "knlsfzθ"
for character in gmatch(identical, ".") do
	correspondences[character] = character
end

local function transliterate(text)
	return (lang:transliterate(text))
end

function export.link(term)
	return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end

function export.toIPA(text, style)
	text = gsub(text, '॰', '-')
	local translit = text
	if lang:findBestScript(text):isTransliterated() then
		translit = transliterate(text)
	end
	if not translit then
		error('The term "' .. text .. '" could not be transliterated.')
	end
	
	if style == "nonpersianized" then
		translit = gsub(translit, "[xġqźzf']", perso_arabic)
	end

	if style == "dakhini" then
		translit = gsub(translit, "[q]", deccani)
	end
	
	-- force final schwa for Hindi
	translit = gsub(translit, "a~$", "ə")

	if style == "desanskritize" then
		translit = gsub(translit, "(...)ə$", "%1ɑ(ː)")
		translit = gsub(translit, "[ṣṇ]", urdu)
	end
	
	-- vowels
	translit = gsub(translit, "͠", "̃")
	translit = gsub(translit, 'a(̃?)i', 'ɛ%1ː')
	translit = gsub(translit, 'a(̃?)u', 'ɔ%1ː')
	translit = gsub(translit, "%-$", "")
	translit = gsub(translit, "^%-", "")
	translit = gsub(translit, "ŕ$", "r")
	translit = gsub(translit, "ŕ(" .. vowel .. ")", "r%1")
	translit = gsub(translit, "ŕ", "ri")
    
	translit = gsub(translit, 'jñ', 'gy')
	translit = gsub(translit, ",", "")
	translit = gsub(translit, " ", "..")
	translit = syllabify(translit)
	translit = gsub(translit, "%.ː", "ː.")
	translit = gsub(translit, "%.̃", "̃")

	translit = gsub(translit, aspirate .. "h", '%1ʰ')
	translit = gsub(translit, weak_h, '%1ʱ')
	
	local result = gsub(translit, ".", correspondences)
	
	-- remove final schwa (Pandey, 2014)
	-- actually weaken
	result = gsub(result, "(...)ə$", "%1ᵊ")
	result = gsub(result, "(...)ə ", "%1ᵊ ")
	result = gsub(result, "(...)ə%.?%-", "%1ᵊ-")
	
	-- formatting	
	result = gsub(result, "%.?%-", ".")
	result = gsub(result, "%.%.", " ")
	result = gsub(result, "ː̃", "̃ː")
	result = gsub(result, "ː%.̃", "̃ː.")
	result = gsub(result, "%.$", "")
    
    -- ñ
    result = gsub(result, "ñ", "n")

	-- i and u lengthening
	result = gsub(result, "ʊ(̃?)(ɦ?)$", "u%1ː%2")
	result = gsub(result, "ɪ(̃?)(ɦ?)$", "i%1ː%2")
	
	-- deaffricate first affricate in geminates
	result = gsub(result, "t͡ʃ(%.?)t͡ʃ", "t̪%1t͡ʃ")	
	result = gsub(result, "d͡ʒ(%.?)d͡ʒ", "d̪%1d͡ʒ")
	
	-- silent h in 'lh-', 'vh-' (Ohala 1983, p.45)
	result = gsub(result, "^([lʋ])ɦ", "%1")  
    result = gsub(result, "([ .])([lʋ])ɦ", "%1%2")
    
	result = gsub(result, "ɛː(%.?)j", function(a)
		local res = "ə̯i"
		res = res .. a .. "j"
		return res
	end)
	result = gsub(result, "ɔː(%.?)ʋ", function(a)
		local res = "ə̯u"
		res = res .. a .. "ʋ"
		return res
	end)
	
	return result
end

function export.narrow_IPA(ipa)
	-- what /ɑ/ and /ə/ really are
	ipa = gsub(ipa, 'ɑ', 'ä')
	ipa = gsub(ipa, 'ə', 'ɐ')
	-- uvular /x/, /ɣ/ ??
	-- ipa = gsub(ipa, 'x', 'χ')
	-- ipa = gsub(ipa, 'ɣ', 'ʁ')
	-- retroflex s rules
	ipa = gsub(ipa, 'ʂ(%.?)([^ʈɖ.])', 'ʃ%1%2')
	ipa = gsub(ipa, 'ʂ$', 'ʃ')
	-- nasal allophones
	ipa = gsub(ipa, 'ŋ(%.?)([qχʁ])', 'ɴ%1%2')
	ipa = gsub(ipa, 'n%.j', 'ɲ.j')
	ipa = gsub(ipa, '[nɳ](%.?)ʃ', 'ɲ%1ʃ')  -- this nasal is likely more front than before /j/, but not doing a too narrow transcription seems preferable
	ipa = gsub(ipa, 'n(%.?)([td])̪', 'n̪%1%2̪')
    ipa = gsub(ipa, 'm(%.?)f', 'ɱ%1f')  
	-- nasals induce nasalization
	ipa = gsub(ipa, '([ɐäɪiʊueɛoɔæ])(ː?)([nɳɲŋɴmɱ])', '%1̃%2%3')
	-- cc, jj
	ipa = gsub(ipa, 't̪(%.?)t͡ʃ', 't̚%1t͡ʃ')
	ipa = gsub(ipa, 'd̪(%.?)d͡ʒ', 'd̚%1d͡ʒ')
	-- syllable boundary consonants
	ipa = gsub(ipa, '([kɡ])%.([kɡ])', '%1̚.%2')
	ipa = gsub(ipa, '([ʈɖ])%.([ʈɖ])', '%1̚.%2')
	ipa = gsub(ipa, '([td]̪?)%.([tdn])', '%1̚.%2')
	ipa = gsub(ipa, '([pb])%.([pb])', '%1̚.%2')
	-- aspiration rules
	ipa = gsub(ipa, 'ɐɦ([%. ])', 'ɛɦ%1')
	ipa = gsub(ipa, 'ɐɦ$', 'ɛɦ')
	ipa = gsub(ipa, 'ɐ%.ɦɐ', 'ɛ.ɦɛ')
	ipa = gsub(ipa, 'ɐ%(ɦ%)', 'ɛ(ɦ)')
	ipa = gsub(ipa, 'ʊɦ%.', 'ɔɦ.')
	ipa = gsub(ipa, 'ʊ%.ɦɐ', 'ɔ.ɦɔ')
	ipa = gsub(ipa, 'ɐ%.ɦʊ', 'ɔ.ɦɔ')
	ipa = gsub(ipa, '([ɐäɪiʊueɛoɔæ])(̃?)(ː?)ɦ', '%1%2%3ʱ')
	-- v/w
	ipa = gsub(ipa, '([kɡŋtdɲʈɖɳnpbm]̪?%.?)ʋ', '%1w')

	-- geminate /ɾ/ is trill
	ipa = gsub(ipa, "ɾ%.ɾ", "r.r")	
	-- for onomatopeic words ending on -र्र 
	ipa = gsub(ipa, "ɾɾ", "rː")	
	-- final geminates often pronounced as singletons
	ipa = gsub(ipa, "([kɡʈɖɳtdnpbml]̪?)%1", "%1(ː)")
	-- final cc, jj
	ipa = gsub(ipa, "t̚t͡ʃ", "(t̚)t͡ʃ")
	ipa = gsub(ipa, "d̚d͡ʒ", "(d̚)d͡ʒ")
	
	ipa = gsub(ipa, "ɪ%.j", "i.j")		
	ipa = gsub(ipa, " ", "‿")
	return ipa
end

function export.make(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		p = { pagetitle }
	end
	
	for _, Hindi in ipairs(p) do
		local persianized = export.toIPA(Hindi, "persianized")
		local nonpersianized = export.toIPA(Hindi, "nonpersianized")
		table.insert(results, { pron = "/" .. persianized .. "/" })
		local narrow = export.narrow_IPA(persianized)
		if narrow ~= persianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		if persianized ~= nonpersianized then
			table.insert(results, { pron = "/" .. nonpersianized .. "/" })
			local narrow = export.narrow_IPA(nonpersianized)
			if narrow ~= nonpersianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		end
	end
	
	return m_a.format_qualifiers(lang, {"Delhi"}) .. " " .. m_IPA.format_IPA_full { lang = lang, items = results }
end

function export.make_ur(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	local lang = require("Module:languages").getByCode("ur")
	local sc = require("Module:scripts").getByCode("ur-Arab")
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("No transliterations given.")
	end
	
	for _, Urdu in ipairs(p) do
		Urdu = lang:transliterate(Urdu) or Urdu
		local desanskritize = export.toIPA(Urdu, "desanskritize")
		table.insert(results, { pron = "/" .. desanskritize .. "/" })
	end
	
	return m_a.format_qualifiers(lang, {"ur"}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results }
end

function export.make_deccani(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	local lang = require("Module:languages").getByCode("ur")
	local sc = require("Module:scripts").getByCode("ur-Arab")
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("No transliterations given.")
	end
	
	for _, Urdu in ipairs(p) do
		local dakhini = export.toIPA(Urdu, "dakhini")
		table.insert(results, { pron = "/" .. dakhini .. "/" })
	end
	
	return m_a.format_qualifiers(lang, {"Deccani"}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results }
end

return export