မဝ်ဂျူ:typing-aids/data/sa

Documentation for this module may be created at မဝ်ဂျူ:typing-aids/data/sa/doc
local data = {}

local U = require("Module:string/char")

local anusvAra = U(0x902)
local visarga = U(0x903)
local virAma = U(0x94D)
local avagraha = "ऽ"
local consonants = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलळवशषसह"
local consonant = "[" .. consonants .. "]"

local Lconsonants = "kgṅcjñṭḍṇtdnpbmyrlvśṣs" -- excludes h and ḷ
local Lcons1 = "[" .. Lconsonants .. "h]"
local Lcons2 = "[" .. Lconsonants .. "]"
local Lvowels = "āeĕēiïīoŏōuŭuṛṝl̥̄l̥ḹ" -- excludes a
local Lvowel1 = "[" .. Lvowels .. "]"
local Lvowel2 = "[" .. Lvowels .. "a]"
local accents = U(0x301, 0x306, 0x308) -- combining acute, breve and diaeresis
local accent = "[" .. accents .. "]"
local acute = U(0x301)		-- combining acute
data["sa"] = {
	-- Resolve ḷ where possible
	{"("..Lcons1..")(ḷ)", "%1l̥"}, -- It's a vowel next to a consonant
	{"(ḷ)("..Lcons2..")", "l̥%2"},
	{"("..Lvowel1..accent.."?)(ḷ)", "%1L"}, -- It's a consonant next to a vowel.
	{"(ḷ)(h?"..Lvowel2..")", "L%2"},

	-- Vowels and modifiers. Do the diphthongs and diaereses first.
	{"ai", "ऐ"},
	{"au", "औ"},

	{".", {	["ä"] = "अ", ["ö"] = "ओ", ["ï"] = "इ", ["ü"] = "उ",
			["a"] = "अ", ["ā"] = "आ", ["i"] = "इ", ["ī"] = "ई",
			["u"] = "उ", ["ū"] = "ऊ", ["e"] = "ए", ["o"] = "ओ",
			["ṝ"] = "ॠ", ["ṛ"] = "ऋ", ["ḹ"] = "ॡ", ["ḷ"] = "ऌ", }},
	{"r̥", "ऋ"},
	{"l̥", "ऌ"},
	{"l̥̄", "ॡ"},
	{"(अ)[%-/]([इउ])", "%1%2"},		-- a-i, a-u for अइ, अउ; must follow rules for "ai", "au"

	-- Two-letter consonants must go before h.
	{".h", {["kh"] = "ख", ["gh"] = "घ", ["ch"] = "छ", ["jh"] = "झ",
			["ṭh"] = "ठ", ["ḍh"] = "ढ", ["th"] = "थ", ["dh"] = "ध",
			["ph"] = "फ", ["bh"] = "भ", }},
	-- Other letters
	{".", {h = "ह",
				-- Other stops.
			  k = "क",       g = "ग",   c = "च", j =  "ज",
			["ṭ"] = "ट", ["ḍ"] = "ड", t = "त", d = "द",
			  p   = "प",   b   = "ब",
	-- Nasals.
			["ṅ"] = "ङ", ["ñ"] = "ञ", ["ṇ"] = "ण", ["n"] = "न",
			  n   = "न",   m   = "म",
	-- Remaining consonants.
			  y   = "य",   r   = "र",    l   = "ल",   L   = "ळ",
			  v   = "व", ["ś"] = "श",  ["ṣ"] = "ष",   s   = "स",
			["ẏ"] = "य़", ["ṃ"] = anusvAra, 	["ḥ"] =  visarga,
			["'"] = avagraha,
			}},
	-- This rule must be applied twice because a consonant may only be in one capture per operation,
	-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")$", "%1" .. virAma},
	{acute, ""},
}

local vowels = {
	["इ"] = U(0x93F),
	["उ"] = U(0x941),
	["ऋ"] = U(0x943),
	["ऌ"] = U(0x962),
	["ए"] = U(0x947),
	["ओ"] = U(0x94B),
	["आ"] = U(0x93E),
	["ई"] = U(0x940),
	["ऊ"] = U(0x942),
	["ॠ"] = U(0x944),
	["ॡ"] = U(0x963),
	["ऐ"] = U(0x948),
	["औ"] = U(0x94C),
}

-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
local independentForms = {}
for independentForm in pairs(vowels) do
	-- assert(mw.ustring.len(independentForm) == 1)
	table.insert(independentForms, independentForm)
end
table.insert(data["sa"], {"%f[^" .. consonants .. "]([" .. table.concat(independentForms) .. "])", vowels})

-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["sa"], {"(" .. consonant .. ")अ", "%1"})

-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["sa-tr"] = {
	[1] = {
		["A"] = "ā",
		["ĕ"] = "e",
		["I"] = "ī",
		["U"] = "ū",
		["ŏ"] = "o",
		["J"] = "ñ",
		["T"] = "ṭ",
		["D"] = "ḍ",
		["N"] = "ṇ",
		["G"] = "ṅ",
		["z"] = "ś",
		["S"] = "ṣ",
		["M"] = "ṃ",
		["H"] = "ḥ",
		["lRR"] = "ḹ",
		["/"] = acute,
	},
	[2] = {
		["lR"] = "l̥", -- was "ḷ",
		["RR"] = "ṝ",
	},
	[3] = {
		["R"] = "ṛ",
	},
}

return data