Documentation for this module may be created at မဝ်ဂျူ:ja-ojad/doc

local export = {}

local find = mw.ustring.find
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local match = mw.ustring.match

local m_ja = require("Module:ja")
local m_ja_pron = require("Module:ja-pron")

local function contains(list, item)
	for i = 1, #list do
		if list[i] == item then return true end
	end
	return false
end

local function ja(text)
	return '<span lang="ja" class="Jpan">' .. text .. '</span>'
end

-- interpolate_kanji('開く', 'あく', 'あきます') returns '開きます'
local function interpolate_kanji(kanji, kana1, kana2)
	local kanji_stem, okurigana = match(kanji, '^(.-)([ぁ-ゖ]*)$')
	if not find(kana1, okurigana .. '$') then error(kana1 .. ' does not end with ' .. okurigana) end
	local kana_stem = gsub(kana1, okurigana .. '$', '')
	if find(kanji_stem, '来$') and find(kana_stem, 'く$') then kana_stem = gsub(kana_stem, 'く$', '[こきく]') end
	if not find(kana2, '^' .. kana_stem) then error(kana2 .. ' does not begin with ' .. kana_stem) end
	local kanji2 = gsub(kana2, '^' .. kana_stem, kanji_stem)
	return kanji2
end

-- a tick in pron denotes a fall in pitch, no tick means heibangata
-- format_accent("かえりま'す") returns overlined かえります and [kàérímáꜜsù]
local function format_accent(pron)
	-- count morae
	local acc = gsub(pron, "[^ぁ-ゖァ-ヺー']", '')
	acc = m_ja.count_morae(match(acc, "^(.-)'") or '')
	
	-- call ja_pron to format the accent
	pron = gsub(pron, "'", '')
	local ja_pron = m_ja_pron.accent(pron, acc)
	local kana, romaji = match(ja_pron, '(<span lang="ja" class="Jpan">.-) (<span class="Latn"><samp>.-</samp></span>)')
	return kana, romaji
end

local function format_row(label, lemma_kanji, lemma_kana, prons) -- prons is '-' (no accent provided) or one or more pron separated by '/'
	local kanji, kana, romaji
	if prons == '' then
		kanji = '-'
		kana = '-'
		romaji = '-'
	else
		kanji = {}
		kana = {}
		romaji = {}
		for pron in mw.text.gsplit(prons, '/') do
			local new_kanji = interpolate_kanji(lemma_kanji, lemma_kana, gsub(pron, "[^ぁ-ゖァ-ヺー]", ''))
			if not contains(kanji, new_kanji) then table.insert(kanji, new_kanji) end
			local new_kana, new_romaji = format_accent(pron)
			table.insert(kana, new_kana)
			table.insert(romaji, new_romaji)
		end
		kanji = table.concat(kanji, '<br>')
		kana = table.concat(kana, '<br>')
		romaji = table.concat(romaji, '<br>')
		kanji = ja(kanji)
	end
	return '! ' .. label .. '\n| ' .. kanji .. '\n| ' .. kana .. '\n| ' .. romaji .. '\n|-\n'
end

local function format_verb(key)
	local rows = {}
	local entry
	if find(key, 'る$') then
		entry = mw.loadData("Module:ja/data/ojad/る")[key]
	else
		entry = mw.loadData("Module:ja/data/ojad")[key]
	end
	if entry then
		entry = mw.text.split(entry, ',')
		local lemma_kanji = gsub(key, ':.*', '')
		local lemma_kana = gsub(gsub(entry[1], '/.*', ''), '[^ぁ-ゖァ-ヺー]', '')
		table.insert(rows, '<div class="vsSwitcher" data-toggle-category="accent">\n<div style="display: inline-block;">\n* Tokyo pitch accent of conjugated forms of "' .. ja(lemma_kanji) .. '"<span class="vsToggleElement" style="margin-left: 1em;"></span></div>\n<div class="vsHide">\n{| class="wikitable" style="white-space: nowrap;"\n| colspan="4" style="text-align: left; padding: .2em 1em" | Source: [http://www.gavo.t.u-tokyo.ac.jp/ojad/eng/search/index/word:' .. lemma_kanji .. ' Online Japanese Accent Dictionary]\n|-\n! colspan="4" | Stem forms\n|-\n')
		table.insert(rows, format_row('Terminal (<span class="Jpan" lang="ja-Jpan">[[終止形#Japanese|終止形]]</span>)<br>Attributive (<span class="Jpan" lang="ja-Jpan">[[連体形#Japanese|連体形]]</span>)', lemma_kanji, lemma_kana, entry[1]))
		table.insert(rows, format_row('Imperative (<span class="Jpan" lang="ja-Jpan">[[命令形#Japanese|命令形]]</span>)', lemma_kanji, lemma_kana, entry[10]))
		table.insert(rows, '! colspan="4" | Key constructions\n|-\n')
		table.insert(rows, format_row('Passive', lemma_kanji, lemma_kana, entry[9]))
		table.insert(rows, format_row('Causative', lemma_kanji, lemma_kana, entry[8]))
		table.insert(rows, format_row('Potential', lemma_kanji, lemma_kana, entry[11]))
		table.insert(rows, format_row('Volitional', lemma_kanji, lemma_kana, entry[12]))
		table.insert(rows, format_row('Negative', lemma_kanji, lemma_kana, entry[5]))
		table.insert(rows, format_row('Negative perfective', lemma_kanji, lemma_kana, entry[6]))
		table.insert(rows, format_row('Formal', lemma_kanji, lemma_kana, entry[2]))
		table.insert(rows, format_row('Perfective', lemma_kanji, lemma_kana, entry[4]))
		table.insert(rows, format_row('Conjunctive', lemma_kanji, lemma_kana, entry[3]))
		table.insert(rows, format_row('Hypothetical conditional', lemma_kanji, lemma_kana, entry[7]))
		table.insert(rows, '|}</div></div>')
	else
		require("Module:debug").track("ja-ojad/no entry")
		return '<!-- no OJAD entry -->'
	end
	return table.concat(rows, '')
end

local function format_adjective(key)
	local rows = {}
	local entry = mw.loadData("Module:ja/data/ojad/い")[key]
	if entry then
		entry = mw.text.split(entry, ',')
		local lemma_kanji = gsub(key, ':.*', '')
		local lemma_kana = gsub(gsub(entry[1], '/.*', ''), '[^ぁ-ゖァ-ヺー]', '')
		table.insert(rows, '<div class="vsSwitcher" data-toggle-category="accent">\n<div style="display: inline-block;">\n* Tokyo pitch accent of inflected forms of "' .. ja(lemma_kanji) .. '"<span class="vsToggleElement" style="margin-left: 1em;"></span></div>\n<div class="vsHide">\n{| class="wikitable" style="white-space: nowrap;"\n| colspan="4" style="text-align: left; padding: .2em 1em" | Source: [http://www.gavo.t.u-tokyo.ac.jp/ojad/eng/search/index/word:' .. lemma_kanji .. ' Online Japanese Accent Dictionary]\n|-\n! colspan="4" | Stem forms\n|-\n')
		table.insert(rows, format_row('Continuative (<span class="Jpan" lang="ja-Jpan">[[連用形#Japanese|連用形]]</span>)', lemma_kanji, lemma_kana, entry[9]))
		table.insert(rows, format_row('Terminal (<span class="Jpan" lang="ja-Jpan">[[終止形#Japanese|終止形]]</span>)', lemma_kanji, lemma_kana, entry[8]))
		table.insert(rows, format_row('Attributive (<span class="Jpan" lang="ja-Jpan">[[連体形#Japanese|連体形]]</span>)', lemma_kanji, lemma_kana, entry[1]))
		table.insert(rows, '! colspan="4" | Key constructions\n|-\n')
		table.insert(rows, format_row('Informal negative', lemma_kanji, lemma_kana, entry[5]))
		table.insert(rows, format_row('Informal past', lemma_kanji, lemma_kana, entry[4]))
		table.insert(rows, format_row('Informal negative past', lemma_kanji, lemma_kana, entry[6]))
		table.insert(rows, format_row('Formal', lemma_kanji, lemma_kana, entry[2]))
		table.insert(rows, format_row('Conjunctive', lemma_kanji, lemma_kana, entry[3]))
		table.insert(rows, format_row('Conditional', lemma_kanji, lemma_kana, entry[7]))
		table.insert(rows, '|}</div></div>')
	else
		require("Module:debug").track("ja-ojad/no entry")
		return '<!-- no OJAD entry -->'
	end
	return table.concat(rows, '')
end

function export.show(frame)
	local args = frame:getParent().args
	local key = args[1] or mw.title.getCurrentTitle().text
	if find(key, 'い$') then
		return format_adjective(key)
	else
		return format_verb(key)
	end
end

return export