Module:grc-translit
- පහත දැක්වෙන උපදෙස්, Module:grc-translit/documentation හි පිහිටා ඇත. Module:grc-translit/documentation]]. [සංස්කරණය] Categories were auto-generated by Module:module categorization. [edit]
- ප්රයෝජනවත් සබැඳි: උප පිටු ලැයිස්තුව • සබැඳි • transclusions • testcases • sandbox (වෙනස)
This module will transliterate පුරාතන ග්රීක භාෂාව text per WT:GRC TR. It is also used to transliterate Proto-Brythonic, Gaulish, Demotic, ග්රීක, Paeonian, Old Ossetic, Oscan, Sicel, Thracian, Dacian, Alanic, Old Median, Ancient Macedonian, සහ Phrygian.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:grc-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
10 of 45 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
![]() | λόγος | lógos | lógos |
![]() | οἷαι | hoîai | hoîai |
velar | |||
![]() | ἄγγελος | ángelos | ángelos |
![]() | ἔγκειμαι | énkeimai | énkeimai |
![]() | σφίγξ | sphínx | sphínx |
![]() | τυγχάνω | tunkhánō | tunkhánō |
![]() | ἀγϙυλίων | anqulíōn | agqulíōn |
archaic letters | |||
![]() | ἄϛρον | ástron | áϛron |
![]() | ϝάναξ | wánax | wánax |
current problems | |||
![]() | Υἱός | '''Hu'''iós | '''U'''hiós |
u/y | |||
![]() | ταῦρος | taûros | taûros |
![]() | νηῦς | nēûs | nēûs |
![]() | σῦς | sûs | sûs |
![]() | ὗς | hûs | hûs |
![]() | γυῖον | guîon | guîon |
![]() | ἀναῡ̈τέω | anaṻtéō | anaṻtéō |
![]() | δαΐφρων | daḯphrōn | daḯphrōn |
![]() | πρηῠ́ς | prēŭ́s | prēús |
vowel length | |||
![]() | τῶν | tôn | tôn |
![]() | τοὶ | toì | toì |
![]() | τῷ | tôi | tôi |
![]() | τούτῳ | toútōi | toútōi |
![]() | σοφίᾳ | sophíāi | sophíāi |
![]() | ᾰ̓γᾰ́πη | ăgắpē | agápē |
![]() | μᾱ̆νός | mā̆nós | mānós |
h (rough breathing) | |||
![]() | ὁ | ho | ho |
![]() | οἱ | hoi | hoi |
![]() | εὕρισκε | heúriske | heúriske |
![]() | ὑϊκός | huïkós | huïkós |
![]() | πυρρός | purrhós | purrhós |
![]() | ῥέω | rhéō | rhéō |
![]() | σάἁμον | sáhamon | sáhamon |
capitals | |||
![]() | Ὀδυσσεύς | Odusseús | Odusseús |
![]() | Εἵλως | Heílōs | Heílōs |
![]() | ᾍδης | Hā́idēs | Hā́idēs |
![]() | ἡ Ἑλήνη | hē Helḗnē | hē Helḗnē |
![]() | ΙΧΘΥΣ | IKHTHUS | IKhThUS |
punctuation | |||
![]() | ἔχεις μοι εἰπεῖν, ὦ Σώκρατες, ἆρα διδακτὸν ἡ ἀρετή; | ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ? | ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ? |
![]() | τί τηνικάδε ἀφῖξαι, ὦ Κρίτων; ἢ οὐ πρῲ ἔτι ἐστίν; | tí tēnikáde aphîxai, ô Krítōn? ḕ ou prṑi éti estín? | tí tēnikáde aphîxai, ô Krítōn? ḕ ou prṑi éti estín? |
![]() | τούτων φωνήεντα μέν ἐστιν ἑπτά· α ε η ι ο υ ω. | toútōn phōnḗenta mén estin heptá; a e ē i o u ō. | toútōn phōnḗenta mén estin heptá; a e ē i o u ō. |
![]() | πήγ(νῡμῐ) | pḗg(nūmĭ) | pḗg(nūmi) |
![]() | ἄ(γ)γελος | á(n)gelos | á(g)gelos |
![]() | ἄγκυρ(ρ)α | ánkur(rh)a | ánkur(r)a |
HTML entities | |||
![]() | καλός καὶ ἀγαθός | kalós kaì agathós | kalós kaì agathós |
![]() | καλός καὶ ἀγαθός | kalós kaì agathós | kalós kaì agathós |
local export = {}
local m_data = require('Module:grc-utilities/data')
local tokenize = require('Module:grc-utilities').tokenize
local ufind = mw.ustring.find
local ugsub = mw.ustring.gsub
local U = require("Module:string/char")
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local UTF8char = '[^\128-\191][\128-\191]*'
-- Diacritics
local diacritics = m_data.named
-- Greek
local acute = diacritics.acute
local grave = diacritics.grave
local circumflex = diacritics.circum
local diaeresis = diacritics.diaeresis
local smooth = diacritics.smooth
local rough = diacritics.rough
local macron = diacritics.macron
local breve = diacritics.breve
local subscript = diacritics.subscript
-- Latin
local hat = diacritics.Latin_circum
local macron_diaeresis = macron .. diaeresis .. "?" .. hat
local a_subscript = '^[αΑ].*' .. subscript .. '$'
local velar = 'κγχξ'
local tt = {
-- Vowels
["α"] = "a",
["ε"] = "e",
["η"] = "e" .. macron,
["ι"] = "i",
["ο"] = "o",
["υ"] = "u",
["ω"] = "o" .. macron,
-- Consonants
["β"] = "b",
["γ"] = "g",
["δ"] = "d",
["ζ"] = "z",
["θ"] = "th",
["κ"] = "k",
["λ"] = "l",
["μ"] = "m",
["ν"] = "n",
["ξ"] = "x",
["π"] = "p",
["ρ"] = "r",
["σ"] = "s",
["ς"] = "s",
["τ"] = "t",
["φ"] = "ph",
["χ"] = "kh",
["ψ"] = "ps",
-- Archaic letters
["ϝ"] = "w",
["ϻ"] = "ś",
["ϙ"] = "q",
["ϡ"] = "š",
["ͷ"] = "v",
-- Incorrect characters: see [[Wiktionary:About Ancient Greek#Miscellaneous]].
-- These are tracked by [[Module:script utilities]].
["ϐ"] = "b",
["ϑ"] = "th",
["ϰ"] = "k",
["ϱ"] = "r",
["ϲ"] = "s",
["ϕ"] = "ph",
-- Diacritics
-- unchanged: macron, diaeresis, grave, acute
[breve] = '',
[smooth] = '',
[rough] = '',
[circumflex] = hat,
[subscript] = 'i',
}
function export.tr(text, lang, sc)
if text == '῾' then
return 'h'
end
--[[
Replace semicolon or Greek question mark with regular question mark,
except after an ASCII alphanumeric character (to avoid converting
semicolons in HTML entities).
]]
text = ugsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?")
-- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common.
text = text:gsub("·", ";")
local tokens = tokenize(text)
--now read the tokens
local output = {}
for i, token in pairs(tokens) do
-- Convert token to lowercase and substitute each character
-- for its transliteration
local translit = ulower(token):gsub(UTF8char, tt)
local next_token = tokens[i + 1]
if token == 'γ' and next_token and velar:find(next_token, 1, true) then
-- γ before a velar should be <n>
translit = 'n'
elseif token == 'ρ' and tokens[i - 1] == 'ρ' then
-- ρ after ρ should be <rh>
translit = 'rh'
elseif ufind(token, a_subscript) then
-- add macron to ᾳ
translit = ugsub(translit, '([aA])', '%1' .. macron)
end
if token:find(rough) then
if ufind(token, '^[Ρρ]') then
translit = translit .. 'h'
else -- vowel
translit = 'h' .. translit
end
end
-- Remove macron from a vowel that has a circumflex.
if ufind(translit, macron_diaeresis) then
translit = translit:gsub(macron, '')
end
-- Capitalize first character of transliteration.
if token ~= ulower(token) then
translit = translit:gsub("^" .. UTF8char, uupper)
end
table.insert(output, translit)
end
output = table.concat(output)
return output
end
return export
ප්රවර්ගය:Alanic මොඩියුල
ප්රවර්ගය:Ancient Macedonian මොඩියුල
ප්රවර්ගය:Dacian මොඩියුල
ප්රවර්ගය:Demotic මොඩියුල
ප්රවර්ගය:Failing testcase modules
ප්රවර්ගය:Gaulish මොඩියුල
ප්රවර්ගය:Old Median මොඩියුල
ප්රවර්ගය:Old Ossetic මොඩියුල
ප්රවර්ගය:Oscan මොඩියුල
ප්රවර්ගය:Paeonian මොඩියුල
ප්රවර්ගය:Phrygian මොඩියුල
ප්රවර්ගය:Proto-Brythonic මොඩියුල
ප්රවර්ගය:Sicel මොඩියුල
ප්රවර්ගය:Thracian මොඩියුල
ප්රවර්ගය:අක්ෂර පරිවර්තන මොඩියුල
ප්රවර්ගය:ග්රීක මොඩියුල
ප්රවර්ගය:පුරාතන ග්රීක මොඩියුල
ප්රවර්ගය:භාෂා 15 ක් මගින් භාවිතා වන අක්ෂර පරිවර්තන මොඩියුල