Wikimore

The following documentation is located at Module:grc-translit/documentation. ^[edit] Categories were auto-generated by Module:module categorization. ^[edit]

Useful links: subpage list • links • transclusions • testcases • sandbox

This module will transliterate Ancient Greek language text per WT:GRC TR. It is also used to transliterate Proto-Brythonic, Gaulish, Messapic, Eteocretan, Demotic, Greek, Paeonian, Pre-Samnite, Oscan, Sicel, Thracian, Bactrian, Dacian, Galatian, Alanic, Elymian, Old Median, Ancient Macedonian, Phrygian, and Punic. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:grc-translit/testcases.

Functions

tr(text, lang, sc): Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.; When the transliteration fails, returns nil.

2 of 57 tests failed. (refresh)

testcases for `tr` function in Module:grc-translit:
	Text	Expected	Actual
	λόγος	lógos	lógos
	οἷαι	hoîai	hoîai
velar
	ἄγγελος	ángelos	ángelos
	ἔγκειμαι	énkeimai	énkeimai
	σφίγξ	sphínx	sphínx
	τυγχάνω	tunkhánō	tunkhánō
	Ἀγϙυλίων	Anqulíōn	Anqulíōn
archaic letters
	Ϙόρῐνθοϻ	Qórĭnthos	Qórĭnthos
	ϝάναξ	wánax	wánax
	ἀρκͱᾱγέτας	arkhāgétas	arkhāgétas
	*-ϳω	*-jō	*-jō
current problems
	Υἱός	'''Hu'''iós	'''U'''hiós
u/y
	ταῦρος	taûros	taûros
	νηῦς	nēûs	nēûs
	σῦς	sûs	sûs
	ὗς	hûs	hûs
	γυῖον	guîon	guîon
	ἀναῡ̈τέω	anaṻtéō	anaṻtéō
	δαΐφρων	daḯphrōn	daḯphrōn
	πρηῠ́ς	prēŭ́s	prēŭ́s
vowel length
	τῶν	tôn	tôn
	τοὶ	toì	toì
	τῷ	tōî	tōî
	τούτῳ	toútōi	toútōi
	σοφίᾳ	sophíāi	sophíāi
	Θρᾴκη	Thrāíkē	Thrāíkē
	προσηύδᾱ	prosēúdā	prosēúdā
	Καῖσᾰρ	Kaîsăr	Kaîsăr
	ᾰ̓γᾰ́πη	ăgắpē	ăgắpē
	μᾱ̆νός	mā̆nós	mā̆nós
	ὑπόγυͅον	hupógūion	hupógūion
breathing
	ὁ	ho	ho
	οἱ	hoi	hoi
	εὕρισκε	heúriske	heúriske
	ὑϊκός	huïkós	huïkós
	πυρρός	purrhós	purrhós
	ῥέω	rhéō	rhéō
	ῤάριον	rárion	rárion
	Ρ̓ᾶρος	Râros	Râros
	σάἁμον	sáhamon	sáhamon
	ϝ̔έ	whé	whé
	μύῤῥᾱ	múrrhā	múrrhā
	**ἔῥῥευσᾰ	**érhrheusă	**érhrheusă
	**Βοῤῤᾶς	**Borrâs	**Borrâs
capitals
	Ὀδυσσεύς	Odusseús	Odusseús
	Εἵλως	Heílōs	Heílōs
	ᾍδης	Hāídēs	Hāídēs
	ἡ Ἑλήνη	hē Helḗnē	hē Helḗnē
	ΙΧΘΥΣ	IKHTHUS	IKHTHUS
punctuation
	ἔχεις μοι εἰπεῖν, ὦ Σώκρατες, ἆρα διδακτὸν ἡ ἀρετή;	ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ?	ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ?
	τί τηνικάδε ἀφῖξαι, ὦ Κρίτων; ἢ οὐ πρῲ ἔτι ἐστίν;	tí tēnikáde aphîxai, ô Krítōn? ḕ ou prōì éti estín?	tí tēnikáde aphîxai, ô Krítōn? ḕ ou prōì éti estín?
	τούτων φωνήεντα μέν ἐστιν ἑπτά· α ε η ι ο υ ω.	toútōn phōnḗenta mén estin heptá; a e ē i o u ō.	toútōn phōnḗenta mén estin heptá; a e ē i o u ō.
	πήγ(νῡμῐ)	pḗg(nūmĭ)	pḗg(nūmĭ)
	ἄ(γ)γελος	á(n)gelos	á(n)gelos
	ἄγκυρ(ρ)α	ánkur(rh)a	ánkur(rh)a
HTML entities
	καλός καὶ ἀγαθός	kalós kaì agathós	kalós kaì agathós
	καλός καὶ ἀγαθός	kalós kaì agathós	kalós kaì agathós

local export = {}

local m_grc_utils = require("Module:grc-utilities")
local m_grc_utils_data = require("Module:grc-utilities/data")
local m_str_utils = require("Module:string utilities")

local tokenize = require("Module:grc-utilities").tokenize

local canonicalize = m_grc_utils.canonicalize
local concat = table.concat
local insert = table.insert
local split = m_str_utils.split
local u = m_str_utils.char
local ugsub = m_str_utils.gsub
local ulower = m_str_utils.lower
local umatch = mw.ustring.match
local uupper = m_str_utils.upper

-- Diacritics
local diacritic = m_grc_utils_data.diacritic
local diacritics = m_grc_utils_data.diacritics

-- Greek
local acute = diacritics.acute
local grave = diacritics.grave
local circumflex = diacritics.circum
local smooth = diacritics.smooth
local rough = diacritics.rough
local breve = diacritics.breve
local macron = diacritics.macron
local subscript = diacritics.subscript
local vowel = m_grc_utils_data.vowel

-- Latin
local hat = diacritics.Latin_circum

local au_subscript = "^[αυ].*" .. subscript .. "$"
local question_mark = u(0x37E)
local velar = "[γκξχϙ]"

local long_vowels = { -- Macron will be added.
	["η"] = "e",
	["ω"] = "o",
}

local tt = {
	-- Vowels
	["α"] = "a",
	["ε"] = "e",
	["ι"] = "i",
	["ο"] = "o",
	["υ"] = "u",

	-- Consonants
	["β"] = "b",
	["γ"] = "g",
	["δ"] = "d",
	["ζ"] = "z",
	["θ"] = "th",
	["κ"] = "k",
	["λ"] = "l",
	["μ"] = "m",
	["ν"] = "n",
	["ξ"] = "x",
	["π"] = "p",
	["ρ"] = "r",
	["σ"] = "s",
	["ς"] = "s",
	["τ"] = "t",
	["φ"] = "ph",
	["χ"] = "kh",
	["ψ"] = "ps",

	-- Other letters
	["ϛ"] = "st",
	["ϝ"] = "w",
	["ͱ"] = "h",
	["ϳ"] = "j",
	["ϙ"] = "q",
	["ϻ"] = "s",
	["ϸ"] = "š",
	["ͳ"] = "s",
	--["ͷ"] = "v", Differs by dialect.

	-- Diacritics
	-- unchanged: macron, diaeresis, grave, acute
	[smooth] = "",
	[rough] = "",
	[circumflex] = hat,
	[subscript] = "i",
}

local function get_next_token(tokens, i)
	local new = i + 1
	local token = tokens[new]
	while token and token:match("[()[%]{}]") do
		new = new + 1
		token = tokens[new]
	end
	return new, token, token and ulower(token), concat(tokens, nil, i + 1, new - 1)
end

local function translit_letter(letter, trail)
	local tr = long_vowels[letter]
	return (tr and (tr .. (trail:find(breve) and "" or macron)) or tt[letter] or letter) .. trail:gsub(".[\128-\191]*", tt)
end

local function do_translit(token)
	-- Put iota subscript before accent marks, so that they appear on "i".
	token = ugsub(token, "([" .. acute .. grave .. circumflex .. "]+)" .. subscript, subscript .. "%1")
	return ugsub(token, "(.)(%W*)", translit_letter)
end

local function remove_macron_if_hat(m)
	return m:find(hat) and m:gsub(macron, "") or m
end

local function insert_translit(output, translit, token, lower_token, next_token, next_token_lower, suffix)
	-- Remove any duplicate diacritics (this shouldn't really happen).
	local n
	repeat
		translit, n = ugsub(translit, "(" .. diacritic .. ")(%W-)%1", "%1%2")
	until n == 0
	-- Remove macron from a vowel that has a circumflex.
	translit = ugsub(translit, "%W+", remove_macron_if_hat)
	-- If capitalized, only capitalize the first letter unless the following token is capitalized as well.
	insert(
		output,
		(token == lower_token and translit or
			next_token == next_token_lower and translit:gsub("^" .. ".[\128-\191]*", uupper) or
			uupper(translit)
		) .. suffix
	)
end

function export.tr(text, lang, sc)
	if text == "῾" then
		return "h"
	end
	--[[
		Replace semicolon or Greek question mark with regular question mark,
		except any that occur in HTML entities. Use split to separate out the
		chunks between any entities.
	]]
	text = split(canonicalize(text), "(&#?%w+;)")
	for i = 1, #text, 2 do
		text[i] = text[i]:gsub(";", "?"):gsub(question_mark, "?")
	end
	text = concat(text)

	-- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common.
	text = text:gsub("·", ";")

	local tokens = tokenize(text)

	--now read the tokens
	local next_i, next_token, next_token_lower, suffix = get_next_token(tokens, 0)
	local output = {suffix}
	while next_token do
		local i, token, lower_token, is_rough = next_i, next_token, next_token_lower
		local translit = do_translit(lower_token)
		next_i, next_token, next_token_lower, suffix = get_next_token(tokens, i)

		-- γ before a velar should be <n>
		if lower_token:find("γ") and next_token_lower and umatch(next_token_lower, velar) then
			translit = translit:gsub("g", "n")
		elseif lang == "xbc" and lower_token:find("φ") then
			translit = translit:gsub("ph", "f")
		elseif token == "ρ"..rough then
			translit = "rh"
		elseif token == "ρ"..smooth then
			translit = "r"
		-- ρ after ρ should be <rh>
		elseif lower_token:find("ρ") then
			-- Keep adding ρs until they run out. Set is_rough, so that "h" will get appended.
			while next_token_lower and next_token_lower:find("ρ") do
				insert_translit(output, translit, token, lower_token, next_token, next_token_lower, suffix)
				i, token, lower_token, is_rough = next_i, next_token, next_token_lower, true
				translit = do_translit(lower_token)
				next_i, next_token, next_token_lower, suffix = get_next_token(tokens, i)
			end
		-- add macron to ᾳ
		elseif umatch(lower_token, au_subscript) then
			translit = translit:gsub("[au]", "%0" .. macron)
		end

		if is_rough or lower_token:find(rough) then
			if umatch(lower_token, vowel) then
				translit = "h" .. translit
			else
				local final = umatch(translit, "(%w)%W*$")
				if final and final ~= "h" then
					translit = translit .. "h"
				end
			end
		end

		insert_translit(output, translit, token, lower_token, next_token, next_token_lower, suffix)
	end

	return concat(output)
end

return export