Module:debug
Documentation for this module may be created at Module:debug/doc
local export = {}
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local byte = string.byte
local concat = table.concat
local escape -- defined below
local format = string.format
local gsub = string.gsub
local insert = table.insert
local match = string.match
local sub = string.sub
local toNFC = mw.ustring.toNFC
local function is_array(...)
is_array = require(table_module).isArray
return is_array(...)
end
local function isutf8(...)
isutf8 = require(string_utilities_module).isutf8
return isutf8(...)
end
local function sorted_pairs(...)
sorted_pairs = require(table_module).sortedPairs
return sorted_pairs(...)
end
local function table_size(...)
table_size = require(table_module).size
return table_size(...)
end
do
local escapes
local function get_escapes()
escapes, get_escapes = {
["\a"] = [[\a]], ["\b"] = [[\b]], ["\t"] = [[\t]], ["\n"] = [[\n]],
["\v"] = [[\v]], ["\f"] = [[\f]], ["\r"] = [[\r]], ["\""] = [[\"]],
["'"] = [[\']], ["\\"] = [[\\]],
}, nil
return escapes
end
local function escape_byte(ch)
return (escapes or get_escapes())[ch] or format("\\%03d", byte(ch))
end
local function escape_bytes(ch)
return (gsub(ch, ".", escape_byte))
end
local function escape_char(ch)
local ch_len = #ch
if ch_len == 1 then
return escape_byte(ch)
end
local b = byte(ch)
-- Matching bytes below \128 are all to be escaped, \128 to \191 can't
-- be leading bytes in UTF-8, \192 and \193 could only occur in overlong
-- encodings, so can't occur in UTF-8, U+0080 (\194\128) to U+009F
-- (\194\159) are control characters, U+00A0 (\194\160) is the no-break
-- space, and \245 to \255 could only occur in encodings for codepoints
-- above U+10FFFF, so can't occur in UTF-8.
if b < 194 or b > 244 or (b == 194 and byte(ch, 2) < 161) then
return escape_bytes(ch)
-- 2-byte encodings starting \194 to \223 are all valid, so no need to
-- check them with isutf8(). If there are additional trailing
-- bytes, escape them.
elseif b < 224 then
return ch_len == 2 and ch or (sub(ch, 1, 2) .. escape_bytes(sub(ch, 3)))
end
-- Check 3- and 4-byte encodings with isutf8(), as they might be
-- invalid due to overlong encodings or being above U+10FFFF. As above,
-- escape any additional trailing bytes.
local n = b < 240 and 3 or 4
if ch_len == n then
return isutf8(ch) and ch or escape_bytes(ch)
elseif ch_len > n then
local init_ch = sub(ch, 1, n)
if isutf8(init_ch) then
return init_ch .. escape_bytes(sub(ch, n + 1))
end
end
return escape_bytes(ch)
end
local function escape_non_NFC(str)
local normalized = toNFC(str)
if normalized == str then
return str
end
local str_len, i, start, offset, output = #str, 1, 1, 0
while i <= str_len do
local b = byte(str, i)
if b == byte(normalized, i + offset) then
i = i + 1
else
if output == nil then
output = {}
end
-- Backtrack to the start of the character.
while b >= 128 and b < 192 do
i = i - 1
b = byte(str, i)
end
-- Insert any intermediate characters up to this point.
if start ~= i then
insert(output, sub(str, start, i - 1))
end
-- Get the first character, then find the sequence of characters
-- which differs from the normalized string.
local seq = match(str, "^.[\128-\191]*", i)
-- Find the raw sequence and the normalized sequence by adding
-- a character at a time to the raw sequence, and checking if
-- it matches the current point in the normalized string.
-- This is necessary to ensure that the offset between the two
-- strings is correct, when comparing equivalent sections.
local seq_len, poss_seq, norm_seq = #seq, seq
while true do
if not norm_seq then
norm_seq = match(normalized, "^" .. toNFC(poss_seq), i + offset)
-- Once a matching sequence has been found, check if it's
-- still possible to match the same normalized sequence with
-- a longer raw sequence, as form NFC will have taken the
-- longest sequence when normalizing the input.
elseif toNFC(poss_seq) ~= norm_seq then
break
end
seq, seq_len = poss_seq, #poss_seq
local nxt_ch = match(str, "^.[\128-\191]*", i + seq_len)
if nxt_ch == nil then
break
end
poss_seq = poss_seq .. nxt_ch
end
-- Modify the offset to account for the difference in length
-- between the two sequences. Usually, the NFC form will be
-- shorter, but in rare cases it is longer (e.g. U+0F73
-- normalizes to U+0F71 + U+0F72).
offset = offset + #norm_seq - seq_len
i = i + seq_len
start = i
-- Escape the non-ASCII portion of the sequence. This ensures
-- that escapes added by escape_char don't end up double-escaped
-- if they would otherwise be modified by form NFC; e.g. "\n" +
-- U+0303 ("\ñ") needs to avoid escaping the "n".
if seq ~= "" then
insert(output, (gsub(seq, "[\128-\255]", escape_byte)))
end
end
end
if output == nil then
return str
end
insert(output, sub(str, start))
return concat(output)
end
-- Escapes control characters, backslash, double quote, the no-break space,
-- bytes that aren't used in UTF-8, invalid UTF-8 character sequences, and
-- any bytes necessary to ensure that the output is Unicode form NFC,
-- because MediaWiki automatically converts page content to form NFC; e.g.
-- "e" + U+0301 ("é") results in "e\204\129", because otherwise the sequence
-- would be converted to "é" (U+00E9)); this ensures that results can be
-- relied upon to be stable if saved as part of page content.
function export.escape(str)
return escape_non_NFC(gsub(str, "[%c\"'\\\128-\255][\128-\191]*", escape_char))
end
escape = export.escape
end
-- Convert a value to a string
function export.dump(value, prefix, tsort)
local t = type(value)
prefix = prefix or ""
if t == "string" then
return '"' .. escape(value) .. '"'
elseif t == "table" then
local str_table = {}
insert(str_table, " {")
for key, val in sorted_pairs(value, tsort) do
insert(str_table, " " .. prefix .. "\t[" .. export.dump(key, prefix .. "\t") .. "] = " .. gsub(export.dump(val, prefix .. "\t"), "^ ", "") .. ",")
end
insert(str_table, " " .. prefix .. "}")
return concat(str_table, "\n")
else
return tostring(value)
end
end
function export.highlight_dump(value, prefix, tsort, options)
options = options or {}
local func = options.modified and "modified_dump" or "dump"
local dump = export[func](value, prefix, tsort)
-- Remove spaces at beginnings of lines (which are simply to force a <pre></pre> tag).
dump = gsub(dump, "%f[^%z\n] ", "")
return export.highlight(dump)
end
-- Returns true if table contains a table as one of its values
local function containsTable(t)
for _, value in pairs(t) do
if type(value) == "table" then
return true
end
end
return false
end
local function containsTablesWithSize(t, size)
for _, value in pairs(t) do
if type(value) == "table" and table_size(value) ~= size then
return false
end
end
return true
end
--[=[
Convert a value to a string.
Like dump below, but if a table has consecutive numbered keys and does not
have a table as one of its values, it will be placed on a single line.
Used by [[Module:User:Erutuon/script recognition]].
]=]
function export.modified_dump(value, prefix, tsort)
local t = type(value)
prefix = prefix or ""
if t == "string" then
return '"' .. value .. '"'
elseif t == "table" then
local str_table = {}
local containsTable = containsTable(value)
local consecutive = is_array(value)
if consecutive and not containsTable or containsTable and containsTablesWithSize(value, 3) then
insert(str_table, "{")
for key, val in sorted_pairs(value, tsort) do
if containsTable then
insert(str_table, "\n\t" .. prefix)
else
insert(str_table, " ")
end
if type(key) == "string" then
insert(str_table, "[" .. export.modified_dump(key) .. "] = ")
end
insert(str_table, type(key) == "number" and type(val) == "number" and format("0x%05X", val) or export.modified_dump(val))
if not (consecutive and #value == 3) or type(key) == "number" and value[key + 1] then
insert(str_table, ",")
end
end
if containsTable then
insert(str_table, "\n" .. prefix)
else
insert(str_table, " ")
end
insert(str_table, "}")
return concat(str_table)
end
insert(str_table, " {")
for key, val in sorted_pairs(value, tsort) do
insert(str_table, " " .. prefix .. "\t[" .. export.modified_dump(key, prefix .. "\t") .. "] = " .. gsub(export.modified_dump(val, prefix .. "\t"), "^ ", "") .. ",")
end
insert(str_table, " " .. prefix .. "}")
return concat(str_table, "\n")
elseif t == "number" and value > 46 then
return format("0x%05X", value)
else
return tostring(value)
end
end
export.track = require("Module:debug/track")
-- Trigger a script error from a template
function export.error(frame)
error(frame.args[1] or "(no message specified)")
end
--[[
Convenience function for generating syntaxhighlight tags.
Display defaults to block.
Options is a table. To display inline text with HTML highlighting:
{ inline = true, lang = "html" }
]]
function export.highlight(content, options)
if type(content) == "table" then
options = content
options = {
lang = options.lang or "lua",
inline = options.inline and true
}
return function(content)
return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, options)
end
else
return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, {
lang = options and options.lang or "lua",
inline = options and options.inline and true or nil
})
end
end
function export.track_unrecognized_args(args, template_name)
local function track(code)
export.track(template_name .. "/" .. code)
end
track("unrecognized arg")
local arg_list = {}
for arg, value in pairs(args) do
track("unrecognized arg/" .. arg)
insert(arg_list, format("|%s=%s", arg, value))
end
mw.log(format("Unrecognized parameter%s in {{%s}}: %s.",
arg_list[2] and "s" or "", template_name, concat(arg_list, ", ")
))
end
do
local placeholder = "_message_"
function export._placeholder_error(frame)
-- A dummy function that throws an error with a placeholder message.
error(placeholder, (frame.args.level or 1) + 6)
end
-- Throw an error via callParserFunction, which generates a real error with traceback, automatic categorization in [[CAT:E]] etc., but the error message is returned as a string. Then, replace the placeholder error message with `message`, which is preprocessed. This is necessary when preprocessing needs to be applied (e.g. when using <pre> tags), since otherwise strip markers and other half-processed text gets displayed instead.
function export.formatted_error(message, level)
local frame = mw.getCurrentFrame()
return (frame:callParserFunction("#invoke", {"debug", "_placeholder_error", level = level})
:gsub(placeholder, frame:preprocess(message)))
end
end
return export