Documentation for this module may be created at မဝ်ဂျူ:category tree/poscatboiler/data/affixes and compounds/doc

local labels = {}
local raw_categories = {}
local handlers = {}



-----------------------------------------------------------------------------
--                                                                         --
--                                  LABELS                                 --
--                                                                         --
-----------------------------------------------------------------------------


labels["alliterative compounds"] = {
	description = "{{{langname}}} noun phrases composed of two or more stems that alliterate.",
	parents = {"compound terms", "alliterative phrases"},
}

labels["antonymous compounds"] = {
	description = "{{{langname}}} compounds in which one part is an antonym of the other.",
	parents = {"dvandva compounds", sort = "antonym"},
}

labels["bahuvrihi compounds"] = {
	description = "{{{langname}}} compounds in which the first part (A) modifies the second (B), and whose meaning follows a [[metonymic]] pattern: “<person> having a B that is A.”",
	parents = {"compound terms", "exocentric compounds"},
}

-- Add "compound POS" categories for various parts of speech.

local compound_poses = {
	"နာမဝိသေသန",
	"ကြိယာဝိသေသန",
	"conjunctions",
	"determiners",
	"interjections",
	"နာမ်",
	"numerals",
	"particles",
	"postpositions",
	"prefixes",
	"prepositions",
	"သဗ္ဗနာမ်",
	"နာမ်မကိတ်ညဳ",
	"suffixes",
	"ကြိယာ",
}

for _, pos in ipairs(compound_poses) do
	labels["compound " .. pos] = {
		description = "{{{langname}}} " .. pos .. " composed of two or more stems.",
		parents = {{name = "compound terms", sort = " "}, pos},
	}
end

labels["compound determinatives"] = {
	description = "{{{langname}}} determinatives composed of two or more stems.",
	parents = {"compound terms", "determiners"},
}

labels["compound terms"] = {
	description = "{{{langname}}} terms composed of two or more stems.",
	umbrella_parents = "Terms by etymology subcategories by language",
	parents = {"terms by etymology"},
}

labels["dvandva compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems whose stems could be connected by an 'and'.",
	parents = {"compound terms"},
}

labels["dvigu compounds"] = {
	description = "{{{langname}}} [[tatpuruṣa]] compounds where the modifying member is a number",
	parents = {"tatpurusa compounds"},
}

labels["endocentric compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems, one of which is the [[w:head (linguistics)|head]] of that compound.",
	parents = {"compound terms"},
}

labels["endocentric noun-noun compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems, one of which is the [[w:head (linguistics)|head]] of that compound.",
	breadcrumb = "noun-noun",
	parents = {"endocentric compounds", "compound terms"},
}

labels["endocentric verb-noun compounds"] = {
	description = "{{{langname}}} compounds in which the first element is a verbal stem, the second a nominal stem and the head of the compound.",
	breadcrumb = "verb-noun",
	parents = {"endocentric compounds", "verb-noun compounds"},
}

labels["exocentric compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems, none of which is the [[w:head (linguistics)|head]] of that compound.",
	parents = {"compound terms"},
}

labels["exocentric verb-noun compounds"] = {
	description = "{{{langname}}} compounds in which the first element is a transitive verb, the second a noun functioning as its direct object, and whose referent is the person or thing doing the action.",
	breadcrumb = "verb-noun",
	parents = {"exocentric compounds", "verb-noun compounds"},
}

labels["karmadharaya compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems in which the main stem determines the case endings.",
	parents = {"tatpurusa compounds"},
}

labels["itaretara dvandva compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems whose stems could be connected by an 'and'.",
	breadcrumb = "itaretara",
	parents = {"dvandva compounds"},
}

labels["rhyming compounds"] = {
	description = "{{{langname}}} noun phrases composed of two or more stems that rhyme.",
	parents = {"compound terms", "rhyming phrases"},
}

labels["samahara dvandva compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems whose stems could be connected by an 'and'.",
	breadcrumb = "samahara",
	parents = {"dvandva compounds"},
}

labels["shitgibbons"] = {
	description = "{{{langname}}} terms that consist of a single-syllable [[expletive]] followed by a two-syllable [[trochee]] that serves as a [[nominalizer]] or [[intensifier]].",
	parents = {"endocentric compounds"},
}

labels["synonymous compounds"] = {
	description = "{{{langname}}} compounds in which one part is a synonym of the other.",
	parents = {"dvandva compounds", sort = "synonym"},
}

labels["tatpurusa compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems",
	parents = {"compound terms"},
}

labels["verb-noun compounds"] = {
	description = "{{{langname}}} compounds in which the first element is a transitive verb, the second a noun functioning as its direct object, and whose referent is the person or thing doing the action, or an adjective describing such a person or thing.",
	parents = {"verb-object compounds"},
}

labels["verb-object compounds"] = {
	description = "{{{langname}}} compounds in which the first element is a transitive verb, the second a term (usually but not always a noun) functioning as its (normally direct) object, and whose referent is the person or thing doing the action, or an adjective describing such a person or thing.",
	additional = "Examples in English are {{m|en|pickpocket|lit=someone who picks pockets}} and {{m|en|catch-all|lit=something that catches everything}}.",
	parents = {"compound terms"},
}

labels["verb-verb compounds"] = {
	description = "{{{langname}}} compounds composed of two or more verbs in apposition, often either synonyms or antonyms, and whose referent refers to the result of performing those actions.",
	parents = {"compound terms"},
}

labels["vrddhi derivatives"] = {
	description = "{{{langname}}} terms derived from a Proto-Indo-European root by the process of [[w:vṛddhi|vṛddhi]] derivation.",
	parents = {"terms by etymology"},
}

labels["vrddhi gerundives"] = {
	description = "{{{langname}}} [[gerundive]]s derived from a Proto-Indo-European root by the process of [[w:vṛddhi|vṛddhi]] derivation.",
	parents = {"vrddhi derivatives"},
}

labels["vyadhikarana compounds"] = {
	description = "{{{langname}}} terms composed of two or more stems in which the non-main stem determines the case endings.",
	parents = {"tatpurusa compounds"},
}

for _, fixtype in ipairs({"circumfix", "infix", "interfix", "prefix", "suffix",}) do
	labels["terms by " .. fixtype] = {
		description = "{{{langname}}} terms categorized by their " .. fixtype .. "es.",
		umbrella_parents = "Terms by etymology subcategories by language",
		parents = {{name = "terms by etymology", sort = fixtype}, fixtype .. "es"},
	}
end


-- Add 'umbrella_parents' key if not already present.
for key, data in pairs(labels) do
	-- NOTE: umbrella.parents overrides umbrella_parents if both are given.
	if not data.umbrella_parents then
		data.umbrella_parents = "Types of compound terms by language"
	end
end



-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories["Types of compound terms by language"] = {
	description = "Umbrella categories covering topics related to types of compound terms.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"Umbrella metacategories",
		{name = "compound terms", is_label = true, sort = " "},
		{name = "Terms by etymology subcategories by language", sort = " "},
	},
}


-----------------------------------------------------------------------------
--                                                                         --
--                                 HANDLERS                                --
--                                                                         --
-----------------------------------------------------------------------------

-----------------------------------------------------------------------------
------------------------------ Affix handlers -------------------------------
-----------------------------------------------------------------------------

table.insert(handlers, function(data)
	local labelpref, pos, affixtype, term_and_id = data.label:match("^(([a-z -]+) ([a-z]+fix)ed with )(.+)$")
	if affixtype then
		local term, id = term_and_id:match("^(.+) %(([^()]+)%)$")
		term = term or term_and_id

		-- Convert term/alt into affixes if needed
		local desc = {
			["prefix"]		= "beginning with the prefix",
			["suffix"]		= "ending with the suffix",
			["circumfix"]	= "bookended with the circumfix",
			["infix"]		= "spliced with the infix",
			["interfix"]	= "joined with the interfix",
			-- Transfixes not supported currently.
			-- ["transfix"]	= "patterned with the transfix",
		}
		if not desc[affixtype] then
			return nil
		end

		-- Here, {LANG} is replaced with the actual language, {TERM_AND_ID} with the actual term (or with 'TERM<id:ID>'
		-- if there is an ID), {BASE} with '<var>base</var>', {BASE2} with '<var>base2</var>', {BASE_EXPL} with an
		-- explanation of what "base" means, {BASE_BASE2_EXPL} with an explanation of what "base" and "base2" mean, and
		-- {POS} with '|pos=POS' if there is a `pos` other than "terms", otherwise a blank string.
		local what_categorizes = {
			["prefix"] = "{{tl|af|{LANG}|{TERM_AND_ID}|{BASE}{POS}}} or {{tl|affix|{LANG}|{TERM_AND_ID}|{BASE}{POS}}} (or the more specific and less-preferred equivalents {{tl|pre}} or {{tl|prefix}}), where {BASE_EXPL}",
			["suffix"] = "{{tl|af|{LANG}|{BASE}|{TERM_AND_ID}{POS}}} or {{tl|affix|{LANG}|{BASE}|{TERM_AND_ID}{POS}}} (or the more specific and less-preferred equivalents {{tl|suf}} or {{tl|suffix}}), where {BASE_EXPL}",
			["circumfix"] = "{{tl|af|{LANG}|{BASE}|{TERM_AND_ID}{POS}}} or {{tl|affix|{LANG}|{BASE}|{TERM_AND_ID}{POS}}}, where {BASE_EXPL}",
			["infix"] = "{{tl|infix|{LANG}|{BASE}|{TERM_AND_ID}{POS}}}, where {BASE_EXPL}",
			["interfix"] = "{{tl|af|{LANG}|{BASE}|{TERM_AND_ID}{POS}|{BASE2}}} or {{tl|affix|{LANG}|{BASE}|{TERM_AND_ID}|{BASE2}{POS}}}, where {BASE_BASE2_EXPL}",
		}

		local args = require("Module:parameters").process(data.args, {
			["alt"] = true,
			["sc"] = true,
			["sort"] = true,
			["tr"] = true,
			["ts"] = true,
		})
		local sc = data.sc or args.sc and require("Module:scripts").getByCode(args.sc, "sc") or nil
		local m_affix = require("Module:affix")
		-- Call make_affix to add display hyphens if they're not already present.
		local _, display_term, lookup_term = m_affix.make_affix(term, data.lang, sc, affixtype, nil, true)
		local _, display_alt = m_affix.make_affix(args.alt, data.lang, sc, affixtype)
		local _, display_tr = m_affix.make_affix(args.tr, data.lang, require("Module:scripts").getByCode("Latn"), affixtype)
		local _, display_ts = m_affix.make_affix(args.ts, data.lang, require("Module:scripts").getByCode("Latn"), affixtype)
		local m_script_utilities = require("Module:script utilities")
		local id_text = id and " (" .. id .. ")" or ""

		-- Compute parents.
		local parents = {}
		if id then
			if pos == "words" then
				-- don't allow formerly-named categories with "words"
				return nil
			end
			if pos == "terms" then
				table.insert(parents, {name = labelpref .. term, sort = id, args = args})
			else
				table.insert(parents, {name = "terms " .. affixtype .. "ed with " .. term_and_id, sort = id .. ", " .. pos, args = args})
				table.insert(parents, {name = labelpref .. term, sort = id, args = args})
			end
		elseif pos == "words" then
			-- don't allow formerly-named categories with "words"
			return nil
		elseif pos ~= "terms" then
			table.insert(parents, {name = "terms " .. affixtype .. "ed with " .. term, sort = pos, args = args})
		end
		table.insert(parents, {name = "terms by " .. affixtype, sort = (data.lang:makeSortKey((data.lang:makeEntryName(args.sort or term))))})

		-- If other affixes are mapped to this one, show them.
		local additional

		if data.lang then
			local langcode = data.lang:getCode()
			if m_affix.langs_with_lang_specific_data[langcode] then
				local langdata = mw.loadData(m_affix.affix_lang_data_module_prefix .. langcode)
				local variants = {}
				if langdata.affix_mappings then
					for variant, canonical in pairs(langdata.affix_mappings) do
						-- Above, we converted the stripped link term as we received it to the lookup form, so we
						-- can look up the variants that are mapped to this term. Once we find them, map them to
						-- display form.
						local is_variant = false
						if type(canonical) == "table" then
							for _, canonical_v in pairs(canonical) do
								if canonical_v == lookup_term then
									is_variant = true
									break
								end
							end
						else
							is_variant = canonical == lookup_term
						end
						if is_variant then
							local _, display_variant = m_affix.make_affix(variant, data.lang, sc, affixtype)
							table.insert(variants, "{{m|" .. langcode .. "|" .. display_variant .. "}}")
						end
					end
					if #variants > 0 then
						table.sort(variants)
						additional = ("This category also includes terms %sed with %s."):format(affixtype,
							require("Module:table").serialCommaJoin(variants))
					end
				end
			end
		end

		if data.lang then
			local what_categorizes_msg = what_categorizes[affixtype]
			if not what_categorizes_msg then
				error(("Internal error: No what_categorizes value for affixtype '%s' for label '%s', lang '%s'"):
					format(affixtype, data.label, data.lang:getCode()))
			end
			what_categorizes_msg = "Terms are placed in this category using " .. (what_categorizes_msg
				:gsub("{LANG}", data.lang:getCode())
				:gsub("{TERM_AND_ID}", require("Module:string utilities").replacement_escape(
					id and ("%s<id:%s>"):format(term, id) or term))
				:gsub("{POS}", require("Module:string utilities").replacement_escape(
					pos == "terms" and "" or ("|pos=%s"):format(pos)))
				:gsub("{BASE}", "<var>base</var>")
				:gsub("{BASE2}", "<var>base2</var>")
				:gsub("{BASE_EXPL}", "<code><var>base</var></code> is the base lemma from which this term is derived")
				:gsub("{BASE_BASE2_EXPL}", "<code><var>base</var></code> and <code><var>base2</var></code> are the " ..
					"base lemmas from which this term is derived")
			) .. "."
			if additional then
				additional = additional .. "\n\n" .. what_categorizes_msg
			else
				additional = what_categorizes_msg
			end
		end

		return {
			description = "{{{langname}}} " .. pos .. " " .. desc[affixtype] .. " " .. require("Module:links").full_link({
				lang = data.lang, term = display_term, alt = display_alt, sc = sc, id = id, tr = display_tr, ts = display_ts}, "term") .. ".",
			additional = additional,
			breadcrumb = pos == "terms" and m_script_utilities.tag_text(display_alt or display_term, data.lang, sc, "term") .. id_text or pos,
			displaytitle = "{{{langname}}} " .. labelpref .. m_script_utilities.tag_text(term, data.lang, sc, "term") .. id_text,
			parents = parents,
			umbrella = false,
		}, true -- true = args handled
	end
end)


return {LABELS = labels, RAW_CATEGORIES = raw_categories, HANDLERS = handlers}