Modulo:Unicode data/datasets
Dokumentado por ĉi tiu modulo povas esti kreata ĉe Modulo:Unicode data/datasets/dokumentado
local export = {}
--[==[
The sets are at [[:c:Category:Unicode Module Datasets]] such as [[:c:Data:Unicode data/images/01E.tab]] in the namespace "Data:".
Fetches a data set from Wikimedia Commons by the name 'dataset_name', parses it as a Unicode character key-value table, and returns that table.
The table is assumed to have at least two fields with the 'name' fields set to 'key' and 'value'. Both values are expected to be strings. The key string is parsed as a hexadecimal numeric literal (e.g. '0x1234') and converted into a number.
The table returned will then have numbers as the keys and strings as the values.
]==]
function export.dataset(dataset_name)
local dataset = mw.ext.data.get(dataset_name)
if not dataset then return nil end
-- Check schema.
local charcode_index = nil
local value_index = nil
for index, field in ipairs(dataset.schema.fields) do
local field_name_lower = field.name:lower()
if field_name_lower:find("key") then
charcode_index = index
elseif field_name_lower:find("value") then
value_index = index
end
end
if not charcode_index then error("Character code field not found in data schema.") end
if not value_index then error("Character code field not found in data schema.") end
-- Extract values from dataset data.
local result = {}
for _, item in ipairs(dataset.data) do
result[tonumber(item[charcode_index])] = item[value_index]
end
return result
end
return export