မဝ်ဂျူ:tengwar

Documentation for this module may be created at မဝ်ဂျူ:tengwar/doc
local export = {}

local PAGENAME = mw.title.getCurrentTitle().text

function export.convert2(frame)
	-- get arguments
	local args = frame:getParent().args
	local lang = frame.args[1] or error("Language code has not been specified.")
	local font = frame.args[2] or 'annatar'
	local text = frame.args[3] or PAGENAME
	return export.convert(lang, font, text)
end

function export.convert(lang, font, text)
	text = mw.ustring.lower(text)
	text = mw.ustring.gsub(text, 'appendix:[^/]+/', '')	-- remove Appendix:<Language>/
	text = '␂' .. text .. '␃'	-- mark start and end of text
	text = mw.ustring.gsub(text, '[\.]', '‡')	-- unused character for syllable separator
	
	if lang == 'qya' then	-- Quenya
		text = export.conv_qya(font, text)
	elseif lang == 'sjn' then	-- Sindarin, Tehtar mode
		text = export.conv_sjn(font, text)
	elseif lang == 'sjn2' then	-- Sindarin, mode of Beleriand
		text = export.conv_sjn2(font, text)
	else
		error("Language code is not supported.")
	end
	text = mw.ustring.gsub(text, '[‡␂␃]', '')	-- remove syllable separator and start/end marks
	return text
end

function export.tel2ana(text)
	local repltel = {'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '-', ',', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''}
	local replana = {'1Í', '1[', '1\'', 'q[', 'q\'', 'a[', 'a\'', 'z[', '2Ì', '2{', 'w{', 's{', 'x{', 'xè', '3[', 'c[', 'cé', '4Ì', '5Ì', '5{', '5"', 't{', 't"', 'hÍ', '7Í', '7\'', 'j´', 'j¸', '9Ï', '\\', '¹', '1', 'q', 'a', 'z', '2', 'w', 's', 'x', '3', 'e', 'd', 'c', '4', 'r', 'f', 'v', '5', 't', 'g', 'b', '6', 'y', 'h', 'n', '7', 'u', 'j', 'm', '8', 'i', 'k', ',', '9', 'o', 'l', 'l', '.', '~', '½', '`', ']', 'E', 'Õ', 'T', 'R', 'Y', 'å', 'U', '\+', 'â', 'ë'}
	
	for i=1, #repltel do
		text = mw.ustring.gsub(text, repltel[i], replana[i])
	end
		
	-- adjust diacritics to the letter width in Annatar
	local vowdef = {'E', 'R', 'T', 'Y', 'U', 'Õ'}
	local vowwide = {'#' ,'\$', '&#x25;', '\^', '&', 'Ô', ''}
	local vowround = {'D' ,'F', 'G', 'H', 'J', 'Ö'}
	local vownarr = {'C' ,'V', 'B', 'N', 'M', '×'}
	
	for i=1, #vowdef do
		text = mw.ustring.gsub(text, '([245rwtfsgvxbj][\'"´ÍÌÏ{[]?)'..vowdef[i], '%1'..vowwide[i])
		text = mw.ustring.gsub(text, '([9~`][\'"´ÍÌÏ{[]?)'..vowdef[i], '%1'..vownarr[i])
		text = mw.ustring.gsub(text, '([3ei,.][\'"´ÍÌÏ{[]?)'..vowdef[i], '%1'..vowround[i])
	end
	return text
end

function export.conv_qya(font, text)
	-- merge duplicate transcriptions (replaced by the easier processable one)
	local dublfind = {'x', 'z', 'qu', 'mb', 'nd', 'ng', 'nw', 'th', 'j', 'k', 'â', 'ê', 'î', 'ô', 'û'}
	local duplrepl = {'cs', 'ss', 'cw', 'b', 'd', 'g', 'ñw', 'þ', 'y', 'c', 'á', 'é', 'í', 'ó', 'ú'}
	
	for i=1, #dublfind do
		text = mw.ustring.gsub(text, dublfind[i], duplrepl[i])
	end
	
	-- define arrays for conversion
	local arfind = {'gw', 'ncw', 'ndy', 'nty', 'cc',  'dy', 'hl', 'hr', 'hw', 'hy', 'ld', 'll', 'ly', 'mm', 'mp', 'nc', 'nn', 'nt', 'ñw', 'ny', 'pp', 'ps', 'cs', 'cw', 'rd', 'rr', 'ry', 'ss', 'ts', 'tt', 'ty', 'b', 'c', 'd', 'f', 'g', 'h', 'l', 'm', 'n', 'ñ', 'p', 'r', 's', 't', 'v', 'w', 'y', 'þ', 'ai', 'au', 'eu', 'iu', 'oi', 'ui', 'á', 'é', 'í', 'ó', 'ú', 'a', 'e', 'i', 'o', 'u'}
	local repltel = {'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''}
	
	-- detect hiatus onsets
	text = mw.ustring.gsub(text, '([äöë])([aeiou])', '%1%2')
	text = mw.ustring.gsub(text, '([aeiou])([äöëaeo])', '%1%2')
	text = mw.ustring.gsub(text, '([ei])(i)', '%1%2')
	text = mw.ustring.gsub(text, '([ou])(u)', '%1%2')
	text = mw.ustring.gsub(text, 'ä', 'a')
	text = mw.ustring.gsub(text, 'ë', 'e')
	text = mw.ustring.gsub(text, 'ö', 'o')
	
	-- convert to Telcontar (base font, as it uses own codepoints)
	for i=1, #arfind do
		text = mw.ustring.gsub(text, arfind[i], repltel[i])
	end
	
	-- detect initial vowel
	text = mw.ustring.gsub(text, '([^-])([])', '%1%2')
	
	-- switch consonants, that have another form before vowels
	local cons2find = {'', '', '', ''}
	local cons2repl = {'',  '',  '', ''}
	-- before short, long vowels and diphthongs
	for i=1, 2 do
		text = mw.ustring.gsub(text, cons2find[i]..'([])', cons2repl[i]..'%1')
	end
	-- before short vowels only
	for i=3, 4 do
		text = mw.ustring.gsub(text, cons2find[i]..'([])', cons2repl[i]..'%1')
	end
	
	if font == 'annatar' then	-- covert to Annatar
		text = export.tel2ana(text)
	end
	return text
end

function export.conv_sjn(font, text)
	text = mw.ustring.gsub(text, 'f␃', 'v␃')	-- final f to v
	
	-- merge duplicate transcriptions (replaced by the easier processable one)
	local dublfind = {'dh', 'ph', 'th', 'k', 'â', 'ê', 'î', 'ô', 'û', 'ŷ', 'œ', 'ai', 'ae', 'au', 'ei', 'oe', 'ui'}
	local duplrepl = {'ð', 'f', 'þ', 'c', 'á', 'é', 'í', 'ó', 'ú', 'ý', 'e', 'aÿ', 'aj', 'aw', 'eÿ', 'oj', 'uÿ'}
	
	for i=1, #dublfind do
		text = mw.ustring.gsub(text, dublfind[i], duplrepl[i])
	end
	
	-- define arrays for conversion
	local arfind = {'chw', 'nch',  'nþ', 'ch', 'ð', 'gh', 'gw', 'hw', 'lh', 'll', 'mb', 'mm', 'mp', 'nc', 'nd', 'ng', 'nn', 'nt', 'rh', 'rr', 'ss', 'þ', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'l', 'm', 'n', 'ŋ', 'p', 'r', 's', 't', 'v', 'w', 'á', 'é', 'í', 'ó', 'ú', 'ý', 'ÿ', '\''}
	local repltel = {'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''}
	
	-- detect initial i glide
	text = mw.ustring.gsub(text, '([^a-zÿðþ])i([aeou])', '%1j%2')
	-- detect initial ng
	text = mw.ustring.gsub(text, '([^a-zÿðþ])ng([aeou])', '%1ŋ%2')
	-- detect hiatus onsets
	text = mw.ustring.gsub(text, '([äöë][aeiouy])', '%1')
	text = mw.ustring.gsub(text, '([aeiouy][äöëao])', '%1')
	text = mw.ustring.gsub(text, '([iy][iuy])', '%1')
	text = mw.ustring.gsub(text, '([eu]e|oi|[eou]u)', '%1')
	text = mw.ustring.gsub(text, 'ä', 'a')
	text = mw.ustring.gsub(text, 'ë', 'e')
	text = mw.ustring.gsub(text, 'ö', 'o')
	-- detect final vowel
	text = mw.ustring.gsub(text, '([^])([aeiouy][^a-zÿðþ])', '%1%2')
	
	-- convert to Telcontar (base font, as it uses own codepoints)
	for i=1, #arfind do
		text = mw.ustring.gsub(text, arfind[i], repltel[i])
	end
	
	local vowfind  = {'a', 'e', 'i', 'o', 'u', 'y'}
	local vowrepl  = {'', '', '', '', '', ''}
	
	-- move vowel marks to the next consonant
	for i=1, #vowfind do
		text = mw.ustring.gsub(text, '([^])'..vowfind[i]..'([-][]?)', '%1%2'..vowrepl[i])
		text = mw.ustring.gsub(text, '([^])'..vowfind[i]..'([-][]?)', '%1%2'..vowrepl[i])	-- excecute twice because of overlap
		text = mw.ustring.gsub(text, vowfind[i], vowrepl[i])
	end
	
	-- switch consonants, that have another form before vowels
	local cons2find = {'', '', ''}
	local cons2repl = {'', '', ''}
	-- before short, long vowels and diphthongs
	text = mw.ustring.gsub(text, cons2find[1]..'([]?[-][])', cons2repl[1]..'%1')
	-- carrying vowel mark
	for i=2, 3 do
		text = mw.ustring.gsub(text, cons2find[i]..'([])', cons2repl[i]..'%1')
	end
	
	if font == 'annatar' then	-- covert to Annatar
		text = export.tel2ana(text)
	end
	
	return text
end

function export.conv_sjn2(font, text)
	text = mw.ustring.gsub(text, 'f␃', 'v␃')	-- final f to v
	
	-- merge duplicate transcriptions (replaced by the easier processable one)
	local dublfind = {'dh', 'ph', 'th', 'k', 'â', 'ê', 'î', 'ô', 'û', 'ŷ'}
	local duplrepl = {'ð', 'f', 'þ', 'c', 'á', 'é', 'í', 'ó', 'ú', 'ý'}
	
	for i=1, #dublfind do
		text = mw.ustring.gsub(text, dublfind[i], duplrepl[i])
	end
	
	-- define arrays for conversion
	local arfind = {'chw', 'nch', 'nþ', 'ch', 'ð', 'gh', 'gw', 'hw', 'lh', 'mb', 'mm', 'mp', 'nc', 'nd', 'ng', 'nn', 'nt', 'rh', 'ss', 'þ', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'l', 'm', 'n', 'ŋ', 'p', 'r', 's', 't', 'v', 'w', 'ai', 'ae', 'au', 'ei', 'oe', 'ui', 'a', 'ä', 'e', 'ë', 'i', 'o', 'ö', 'u', 'y', 'œ', 'á', 'é', 'í', 'ó', 'ú', 'ý', '\''}
	local repltel = {'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''}
	
	-- detect initial ng
	text = mw.ustring.gsub(text, '([^a-zðþ])ng([aeou])', '%1ŋ%2')
	-- detect final au
	text = mw.ustring.gsub(text, 'aw([^a-zðþ])', 'au%1')
	
	-- convert to Telcontar (base font, as it uses own codepoints)
	for i=1, #arfind do
		text = mw.ustring.gsub(text, arfind[i], repltel[i])
	end
	
	if font == 'annatar' then	-- covert to Annatar
		text = export.tel2ana(text)
	end
	
	return text
end

return export