local data = {}
local concat = table.concat
local insert = table.insert
local U = require("Module:string/char")
local macron = U(0x304)
local spacing_macron = U(0xAF)
local modifier_macron = U(0x2C9)
local breve = U(0x306)
local spacing_breve = U(0x2D8)
local rough = U(0x314)
local smooth = U(0x313)
local diaeresis = U(0x308)
local acute = U(0x301)
local grave = U(0x300)
local circum = U(0x342)
local Latin_circum = U(0x302)
local coronis = U(0x343)
local subscript = U(0x345)
local undertie = U(0x35C) -- actually "combining double breve below"
data["diacritics"] = {
["macron"] = macron,
["spacing_macron"] = spacing_macron,
["modifier_macron"] = modifier_macron,
["breve"] = breve,
["spacing_breve"] = spacing_breve,
["rough"] = rough,
["smooth"] = smooth,
["diaeresis"] = diaeresis,
["acute"] = acute,
["grave"] = grave,
["circum"] = circum,
["Latin_circum"] = Latin_circum,
["coronis"] = coronis,
["subscript"] = subscript,
}
local diacritics_all, diacritics_combining = {}, {}
for _, diacritic in pairs(data.diacritics) do
insert(diacritics_all, diacritic)
if not (diacritic == spacing_macron or diacritic == modifier_macron or diacritic == spacing_breve) then
insert(diacritics_combining, diacritic)
end
end
diacritics_all = concat(diacritics_all)
data.diacritics.all = diacritics_all
diacritics_combining = concat(diacritics_combining)
data.diacritics.combining = diacritics_combining
data["named"] = data["diacritics"]
data["diacritic"] = "[" .. diacritics_all .. "]"
data["combining_diacritic"] = "[" .. diacritics_combining .. "]"
data["all"] = data["diacritic"]
data["diacritic_groups"] = {
[1] = "[".. macron .. breve .."]",
[2] = "[".. diaeresis .. smooth .. rough .."]",
[3] = "[".. acute .. grave .. circum .. "]",
[4] = subscript,
}
data["groups"] = data["diacritic_groups"]
data["diacritic_groups"]["accents"] = data["groups"][3]
data["length"] = {
optional = macron .. "?" .. breve .. "?"
}
data["length"]["mandatory"] = "%f[" .. macron .. breve .. "]" .. data["length"]["optional"]
data["diacritic_order"] = {
[macron] = 1,
[breve] = 2,
[rough] = 3,
[smooth] = 3,
[diaeresis] = 3,
[acute] = 4,
[grave] = 4,
[circum] = 4,
[subscript] = 5,
}
data["diacritical_conversions"] = {
-- Convert spacing to combining diacritics
[spacing_macron] = macron, -- macron
[modifier_macron] = macron,
[spacing_breve] = breve, -- breve
["῾"] = rough, -- rough breathing, modifier letter reversed comma
["ʽ"] = rough,
["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis
["ʼ"] = smooth,
["´"] = acute, -- acute
["`"] = grave, -- grave
["῀"] = circum, -- Greek circumflex (perispomeni), circumflex, combining circumflex
["ˆ"] = circum,
[Latin_circum] = circum,
["¨"] = diaeresis,
}
-- Canonical letter forms
data["canonical"] = {
["ϴ"] = "Θ",
["Ϗ"] = "Καί",
["Ϗ̀"] = "Καὶ",
["Ϟ"] = "Ϙ",
["Ϲ"] = "Σ",
["ϒ"] = "Υ",
["ϓ"] = "Ύ",
["ϔ"] = "Ϋ",
["Ϡ"] = "Ͳ",
["ϐ"] = "β",
["ϵ"] = "ε",
["ϑ"] = "θ",
["ϰ"] = "κ",
["ϗ"] = "καί",
["ϗ̀"] = "καὶ",
["ϖ"] = "π",
["ϟ"] = "ϙ",
["ϱ"] = "ρ",
["ς"] = "σ",
["ϲ"] = "σ",
["ϕ"] = "φ",
["ϡ"] = "ͳ",
}
data["consonants"] = "ΒβΓγΔδϜϝͶͷϚϛΖζͰͱΘθͿϳΚκΛλΜμΝνΞξΠπϺϻϘϙΡρΣσςΤτΦφΧχΨψͲͳϷϸ"
data["consonant"] = "[" .. data.consonants .. "]"
data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω"
data["vowel"] = "[" .. data.vowels .. "]"
-- Basic letters with and without diacritics
data.word_characters = "%*" .. require("Module:scripts").getByCode("Polyt"):getCharacters() .. diacritics_combining .. undertie
data.word_character = "[" .. data.word_characters .. "]"
return data