Modulo:Unicode data/datasets

Dokumentado por ĉi tiu modulo povas esti kreata ĉe Modulo:Unicode data/datasets/dokumentado

local export = {}

--[==[

The sets are at [[:c:Category:Unicode Module Datasets]] such as [[:c:Data:Unicode data/images/01E.tab]] in the namespace "Data:".

Fetches a data set from Wikimedia Commons by the name 'dataset_name', parses it as a Unicode character key-value table, and returns that table.

The table is assumed to have at least two fields with the 'name' fields set to 'key' and 'value'. Both values are expected to be strings. The key string is parsed as a hexadecimal numeric literal (e.g. '0x1234') and converted into a number.

The table returned will then have numbers as the keys and strings as the values.

]==]

function export.dataset(dataset_name)
	local dataset = mw.ext.data.get(dataset_name)
	
	if not dataset then return nil end
	
	-- Check schema.
	local charcode_index = nil
	local value_index = nil
	for index, field in ipairs(dataset.schema.fields) do
		local field_name_lower = field.name:lower()
		if field_name_lower:find("key") then
			charcode_index = index
		elseif field_name_lower:find("value") then
			value_index = index
		end
	end
	
	if not charcode_index then error("Character code field not found in data schema.") end
	if not value_index then error("Character code field not found in data schema.") end
	
	-- Extract values from dataset data.
	local result = {}
	for _, item in ipairs(dataset.data) do
		result[tonumber(item[charcode_index])] = item[value_index]
	end
	
	return result
end

return export