Module:Character list
此模塊使用Module:Unicode data的資料產生Appendix:Unicode及其子頁面。
local m_unicode = require("Module:Unicode data")
local m_uni_alias = require("Module:Unicode data/aliases")
local Array = require("Module:array")
local char_to_script = require("Module:scripts").charToScript
local concat = table.concat
local get_block_range = m_unicode.get_block_range
local get_category_long_name = m_unicode.get_category_long_name
local get_script_alias = m_unicode.get_script_alias
local html_create = mw.html.create
local insert = table.insert
local is_assigned = m_unicode.is_assigned
local list_to_text = mw.text.listToText
local lookup_category = m_unicode.lookup_category
local lookup_script = m_unicode.lookup_script
local max = math.max
local min = math.min
local new_title = mw.title.new
local nowiki = require("Module:string/nowiki")
local process_params = require("Module:parameters").process
local safe_require = require("Module:load").safe_require
local spell_number = require("Module:ConvertNumeric").spell_number
local u = require("Module:string utilities").char
local general_category_data = require("Module:Unicode data/category")
local general_category_aliases = general_category_data.long_names
local script_data = require("Module:Unicode data/scripts")
local content_lang = mw.language.getContentLanguage()
local export = {}
local Unicode_version = "16.0"
local IMAGE_DATA_ON_COMMONS = true
local function get_size(block_start, block_end)
return block_end - (block_start - 1)
end
-- Large blocks have more than 0x1000 codepoints (1/16 of a plane).
local function is_large_block(block_start, block_end)
return get_size(block_start, block_end) > 0x1000
end
-- Parse the page name to check if parameters can be generated automatically. This works for subpages of Appendix:Unicode, in the format "Appendix:Unicode/Block name". Large blocks are divided into sublists of (up to) 0x1000 characters, which are subpages of the block's page in the format "Appendix:Unicode/Block name/X000", where "X000" is the first codepoint of the sublist.
-- If the current page follows one of these formats, returns a table with the following keys:
-- `name` - the block name
-- `block_start` - the first codepoint in the block
-- `block_end` - the last codepoint in the block
-- If the page is a range subpage, the table will have two additional keys:
-- `range_start` - the first codepoint in the range
-- `range_end` - the last codepoint in the range
local function parse_page_name(title)
title = title and new_title(title) or mw.title.getCurrentTitle()
if title.namespace ~= 100 then
return
end
local base_text, block_name = title.baseText
local is_range_subpage = base_text ~= "Unicode"
-- Appendix:Unicode/Block name/0000.
if is_range_subpage then
local base_title = new_title(base_text, 100)
-- Block name must be a subpage of Appendix:Unicode.
if base_title.baseText ~= "Unicode" then
return
end
block_name = base_title.subpageText
-- Appendix:Unicode/Block name.
else
block_name = title.subpageText
end
local block_start, block_end = get_block_range(block_name)
if not (block_start and block_end) then
return
end
local page_data = {
name = block_name,
block_start = block_start,
block_end = block_end
}
if not is_range_subpage then
return page_data
end
-- Range start must be 4/5/6-digit codepoint.
local raw = title.subpageText
local range_start = tonumber(raw, 16)
page_data.range_start = range_start
if not (
-- Must be a hex number.
range_start and
-- Must be a 4/5/6-digit codepoint.
raw == ("%04X"):format(range_start) and
-- Must be within the block's range.
range_start >= block_start and
range_start <= block_end and
-- Msut be a large block.
is_large_block(block_start, block_end)
) then
return
end
local mod = range_start % 0x1000
-- Must be the start of the block or a X000 codepoint.
if not (range_start == block_start or range_start % 0x1000 == 0) then
return
end
page_data.range_end = min(range_start - mod + 0xFFF, block_end)
return page_data
end
local function get_data_for_code_point_range(block_start, block_end, filterer)
local cps = {}
for cp = block_start, block_end do
if not filterer or filterer(cp) then
local data = {}
data.aliases = m_uni_alias[cp]
for _, item in ipairs { "name", "script", "category", "image", "image_emoji" } do
data[item] = m_unicode["lookup_" .. item](cp)
end
data.cp = cp
insert(cps, data)
end
end
return cps
end
function export.block_list_t(frame)
local required_num_param = {required = true, type = "number", allow_hex = true}
local args = process_params(frame:getParent().args, {
[1] = required_num_param,
[2] = required_num_param,
})
local result = {}
local start_codepoint, end_codepoint = args[1], args[2]
insert(result, "{| class=\"wikitable\" style=\"width: 100%;\"\n! width=\"10%;\" | 始\n! width=\"10%;\" | 終\n ! 區段名稱\n")
for _, name, block_start, block_end in m_unicode.enum_blocks() do
if (block_start >= start_codepoint) and (block_end <= end_codepoint) then
insert(result, (
"|-\n|U+%04X\n|U+%04X\n|[[Appendix:Unicode/%s|%s]]\n"
):format(block_start, block_end, name, name))
end
end
insert(result, "|}")
return concat(result)
end
export.show_blocks = export.block_list_t
-- Checks if all codepoints between `block_start` and `block_end` return the same result from a given lookup function, and returns that value if so. Otherwise, returns nil.
local function get_shared_value(i, j, lookup_func)
local value
for cp = i, j do
-- Ignore unassigned codepoints.
if is_assigned(cp) then
local cp_value = lookup_func(cp)
if value == nil then
value = cp_value
elseif value ~= cp_value then
return
end
end
end
return value
end
local function navlink(target, display, left_arrow)
return ("[[%s|%s %s]]"):format(
target,
left_arrow and "⟵" or display,
left_arrow and display or "⟶"
)
end
local function block_navlink(block_name, left_arrow, subpage)
return block_name and navlink(
("%s../%s"):format(subpage and "../" or "", block_name),
block_name,
left_arrow
) or ""
end
local function subpage_navlink(range_start, range_end, left_arrow)
return range_start and range_end and navlink(
("../%04X"):format(range_start),
("U+%04X 至 U+%04X"):format(range_start, range_end),
left_arrow
) or ""
end
local function return_header(text, name)
return tostring(text) .. require("Module:utilities").format_categories({
"Unicode區段", name .. "區段"},
nil,
name
) .. require("Module:TemplateStyles")("Module:character list/styles.css")
end
function export.char_list_header_t(frame)
local pagename = process_params(frame:getParent().args, {
["pagename"] = {demo = true}
}).pagename
local page_data = parse_page_name(pagename)
if not page_data then
error("Page is not a valid subpage of [[Appendix:Unicode]].")
end
local name = page_data.name
local prev_block, next_block, found
for _, block_name in m_unicode.enum_blocks() do
if block_name == name then
found = true
elseif found then
next_block = block_name
break
else
prev_block = block_name
end
end
local block_start, block_end = page_data.block_start, page_data.block_end
local block_size = get_size(block_start, block_end)
local range_start, range_end = page_data.range_start, page_data.range_end
local page_type = is_large_block(block_start, block_end) and (
range_start and "range" or "large block"
)
local heading = html_create("td")
:addClass("unicode-header-heading")
:tag("h2")
:wikitext(name)
:done()
if page_type == "range" then
heading = heading:attr("rowspan", 2)
end
local tbl = html_create("table")
:addClass("unicode-header-table")
:tag("tr")
:tag("td")
:addClass("unicode-nav-button")
:addClass("unicode-nav-button-left")
:wikitext(block_navlink(prev_block, true, page_type == "range"))
:done()
:node(heading)
:tag("td")
:addClass("unicode-nav-button")
:addClass("unicode-nav-button-right")
:wikitext(block_navlink(next_block, false, page_type == "range"))
:allDone()
if page_type == "range" then
tbl = tbl:tag("tr")
:tag("td")
:addClass("unicode-nav-button")
:addClass("unicode-nav-button-left")
:wikitext(subpage_navlink(
range_start ~= block_start and max(block_start, range_start - 0x1000),
range_start - 1,
true
))
:done()
:tag("td")
:addClass("unicode-nav-button")
:addClass("unicode-nav-button-right")
:wikitext(subpage_navlink(
range_end + 1,
range_end ~= block_end and min(block_end, range_end + 0x1000),
false
))
:allDone()
end
local text = html_create():node(tbl)
local div = text:tag("div")
:wikitext("此附錄列出")
if page_type == "range" then
div:wikitext(("自 U+%04X 至 U+%04X 的碼位,其來源於"):format(range_start, range_end))
else
div:wikitext("的字元來源於")
end
div:wikitext(("[[w:Unicode|Unicode]]標準區段「[http://unicode.org/charts/PDF/U%04X.pdf %s]」(版本%s),覆蓋了自 U+%04X 至 U+%04X 的%s個碼位"):format(
block_start, name, Unicode_version, block_start, block_end, content_lang:formatNum(block_size)
))
local general_category = get_shared_value(range_start or block_start, range_end or block_end, lookup_category)
if general_category == "Cs" then
div:wikitext("。")
local div2 = text:tag("div")
:css("margin-top", "0.5em")
:wikitext("該區塊不分配給任何字元,並保留供")
if name:lower():match("私人") then
div2:wikitext("使用[[w:UTF-16|UTF-16]]的個人應用程式")
else
div2:wikitext("[[w:UTF-16|UTF-16]]")
end
div2:wikitext("使用。")
return return_header(text, name)
end
local assigned = 0
for cp = range_start or block_start, range_end or block_end do
if is_assigned(cp) then
assigned = assigned + 1
end
end
if page_type ~= "range" then
div:wikitext((",當中%s已被分配"):format(
assigned == block_size and "所有碼位都" or content_lang:formatNum(assigned) .. "個碼位"
))
end
if general_category == "Co" then
-- Supplementary Private Use Area-A & B. Kludgy, but fine unless more PUA areas are added.
if assigned ~= block_size then
div:wikitext((" (U+%04X 至 U+%04X)"):format(block_start, block_end - 2))
end
div:wikitext("。")
text = text:tag("div")
:css("margin-top", "0.5em")
:wikitext("此區段包含故意未指定解釋的碼位,僅供個人應用程式使用。")
:done()
return return_header(text, name)
end
div:wikitext("。")
local properties = {}
if general_category then
insert(properties, {
"Unicode 字元屬性",
"General_Category",
general_category,
get_category_long_name(general_category)
})
end
local script = get_shared_value(range_start or block_start, range_end or block_end, lookup_script)
if script then
local property = {
"文字 (Unicode)",
"文字",
script,
}
local alias = get_script_alias(script)
local script_obj = require("Module:scripts").getByCode(script)
insert(property, script_obj and ("[[w:%s|%s]]"):format(script_obj:getWikipediaArticle(), alias) or alias)
insert(properties, property)
end
if #properties > 0 then
local list = {}
for i = 1, #properties do
local property = properties[i]
insert(list, tostring(html_create()
-- :wikitext("the ")
:tag("code")
:wikitext(("[[w:%s|%s]]"):format(property[1], property[2]))
:done()
:wikitext(" 值 ")
:tag("code")
:wikitext(property[3])
:done()
:wikitext("(")
:tag("code")
:wikitext(property[4])
:done()
:wikitext(")")
))
end
text:tag("div")
:css("margin-top", "0.5em")
:wikitext(("此%s區段中所有%s字元都有%s。"):format(
page_type == "range" and "部份" or "",
assigned == (page_type == "range" and get_size(range_start, range_end) or block_size) and "" or "已分配的",
list_to_text(list)
))
end
if page_type == "large block" then
local list = html_create("ul")
local r_start, n = block_start - block_start % 0x1000, 0
while r_start <= block_end do
n = n + 1
local r_end = r_start + 0xFFF
local actual_start, actual_end = max(block_start, r_start), min(block_end, r_end)
list = list:tag("li")
:wikitext(("[[/%04X|U+%04X (%s) 至 U+%04X (%s)]]"):format(
actual_start, actual_start, u(actual_start), actual_end, u(actual_end))
)
:done()
r_start = r_end + 1
end
text:tag("div")
:css("margin-top", "0.5em")
:wikitext(("由於區段過大,此列表被分為%s頁:"):format(spell_number(n)))
:node(list)
:done()
end
return return_header(text, name)
end
export.show_header = export.char_list_header_t
function export.char_list_t(frame)
local parent = frame:getParent()
local num_param = {type = "number", allow_hex = true}
local args = process_params((parent and parent:getTitle() ~= mw.title.getCurrentTitle().fullText and parent or frame).args, {
[1] = num_param,
[2] = num_param,
["block"] = {convert = function(block_name, err)
local block_start, block_end = get_block_range(block_name)
if not (block_start and block_end) then
err("Invalid Unicode block specified")
end
return {
block_start = block_start,
block_end = block_end
}
end},
["pagename"] = {demo = true}
})
local result = {}
local block, range_start, range_end = args.block
if block then
range_start, range_end = block.block_start, block.block_end
elseif args[1] and args[2] then
range_start, range_end = args[1], args[2]
else
local page_data = parse_page_name(args.pagename)
if not page_data then
error("Must give a Unicode block or character range")
end
range_start = page_data.range_start or page_data.block_start
range_end = page_data.range_end or page_data.block_end
end
local function present_codepoint(codepoint)
if not m_unicode.is_printable(codepoint) then
local character = u(codepoint)
local text = "<small>(不可打印)</small>"
if new_title(character) then
return "[[" .. character .. "|" .. text .. "]]"
else
return text
end
end
local link_target = m_unicode.get_entry_title(codepoint)
local display = ("%s&#%u;"):format(m_unicode.is_combining(codepoint) and "◌" or "", codepoint)
if m_unicode.is_whitespace(codepoint) then
display = "]" .. display .. "["
end
return
(link_target and "[[:%s|<span class=\"character-sample %s\">%s</span>]]"
or "<!-- %s --><span class=\"character-sample %s\">%s</span>"):format(
link_target or "", char_to_script(codepoint), display
)
end
local cps = get_data_for_code_point_range(range_start, range_end, is_assigned)
local emoji_image_exists = false
local submodule = math.floor(range_start / 0x1000)
local image_module = ("Module:Unicode data/images/%03X"):format(submodule)
local emoji_image_module = ("Module:Unicode data/emoji images/%03X"):format(submodule)
if safe_require(emoji_image_module) then
for _, data in ipairs(cps) do
if data.image_emoji then
emoji_image_exists = true
break
end
end
end
insert(result, [=[
{| class="wikitable sortable"
! width="12%" data-sort-type="number" | 碼位
]=]
)
if IMAGE_DATA_ON_COMMONS then
local image_commons_data = ("Data:Unicode data/images/%03X.tab"):format(submodule)
local emoji_image_commons_data = ("Data:Unicode data/emoji images/%03X.tab"):format(submodule)
if emoji_image_exists then
insert(result, [=[
! width="5%" | 文字風格的圖像<br><sup>[[commons:Special:EditPage/]=] .. image_commons_data .. [=[|編輯]]</sup>
! width="5%" | 表情符號風格的圖像<br><sup>[[commons:Special:EditPage/]=] .. emoji_image_commons_data .. [=[|編輯]]</sup>
]=]
)
else
insert(result, [=[
! width="5%" | 圖像<br><sup>[[commons:Special:EditPage/]=] .. image_commons_data .. [=[|編輯]]</sup>
]=]
)
end
else
if emoji_image_exists then
insert(result, [=[
! width="5%" | 文字風格的圖像<br><sup>[[Special:EditPage/]=] .. image_module .. [=[|編輯]]</sup>
! width="5%" | 表情符號風格的圖像<br><sup>[[Special:EditPage/]=] .. emoji_image_module .. [=[|編輯]]</sup>
]=]
)
else
insert(result, [=[
! width="5%" | 圖像<br><sup>[[Special:EditPage/]=] .. image_module .. [=[|編輯]]</sup>
]=]
)
end
end
insert(result, [=[
! width="5%" | 字元
]=]
)
local all_with_same_general_category = Array(cps)
:all(function(data) return data.category == cps[1].category end)
local all_with_same_script = Array(cps)
:all(function(data) return data.script == cps[1].script end)
if not all_with_same_general_category then
insert(result, " ! [[:en:w:General Category|一般分類]]\n")
end
if not all_with_same_script then
insert(result, " ! [[w:統一碼收錄的字符|文字]]\n")
end
insert(result, " ! 名稱\n")
for _, data in ipairs(cps) do
local alt_names = ""
local cp = data.cp
if data.aliases then
local aliases = {
["correction" ] = {},
["control" ] = {},
["alternate" ] = {},
["figment" ] = {},
["abbreviation"] = {},
}
for _, info in ipairs(data.aliases) do
insert(aliases[info[1]], "<small>" .. info[2] .. "</small>")
end
for _, name in ipairs(aliases.alternate) do
alt_names = alt_names .. (" 又名 %s"):format(name)
end
if #aliases.control > 0 then
alt_names = alt_names .. ";控制字元名稱:" .. concat(aliases.control, " or ")
end
for _, name in ipairs(aliases.correction) do
alt_names = alt_names .. ("<br/>更正後名稱:%s"):format(name)
end
for _, name in ipairs(aliases.figment) do
alt_names = alt_names .. ("<br/>Figment name: %s"):format(name)
end
if #aliases.abbreviation > 0 then
alt_names = alt_names .. " (" .. concat(aliases.abbreviation, ", ") .. ")"
end
end
local current_image, current_image_emoji
if data.image then
current_image = ("[[File:%s|40x35px|class=skin-invert-image]]"):format(data.image)
else
current_image = ""
end
if emoji_image_exists then
if data.image_emoji then
current_image_emoji = ("[[File:%s|40x35px]]"):format(data.image_emoji)
else
current_image_emoji = ""
end
end
insert(result, (
" |- id=\"U-%04X\"\n" ..
" | data-sort-value=\"%u\" | U+%04X <small>(%u)</small>\n" ..
" | %s \n"
):format(
cp, cp, cp, cp,
current_image
))
if emoji_image_exists then
insert(result, (
" | %s \n"
):format(
current_image_emoji
))
end
insert(result, (
" | %s \n"
):format(
present_codepoint(cp),
data.category .. "<br />(" ..
general_category_aliases[data.category]:gsub("_", " ") ..
")"
))
if not all_with_same_general_category then
insert(result, (" | %s<br />(%s) \n")
:format(
data.category,
general_category_aliases[data.category]:gsub("_", " ")))
end
if not all_with_same_script then
insert(result, (" | %s<br />(%s) \n")
:format(
data.script,
script_data.aliases[data.script]))
end
insert(result, (" | <small>%s</small>%s\n")
:format(nowiki(data.name), alt_names))
end
insert(result,
" |}"
)
insert(result, require("Module:TemplateStyles")("Template:character info/style.css"))
return concat(result)
end
export.show = export.char_list_t
return export