Module:debug

Documentation for this module may be created at Module:debug/doc

local export = {}

local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"

local byte = string.byte
local concat = table.concat
local escape -- defined below
local format = string.format
local gsub = string.gsub
local insert = table.insert
local match = string.match
local sub = string.sub
local toNFC = mw.ustring.toNFC

local function is_array(...)
	is_array = require(table_module).isArray
	return is_array(...)
end

local function isutf8(...)
	isutf8 = require(string_utilities_module).isutf8
	return isutf8(...)
end

local function sorted_pairs(...)
	sorted_pairs = require(table_module).sortedPairs
	return sorted_pairs(...)
end

local function table_size(...)
	table_size = require(table_module).size
	return table_size(...)
end

do
	local escapes
	local function get_escapes()
		escapes, get_escapes = {
			["\a"] = [[\a]], ["\b"] = [[\b]], ["\t"] = [[\t]], ["\n"] = [[\n]],
			["\v"] = [[\v]], ["\f"] = [[\f]], ["\r"] = [[\r]], ["\""] = [[\"]],
			["'"] = [[\']], ["\\"] = [[\\]],
		}, nil
		return escapes
	end

	local function escape_byte(ch)
		return (escapes or get_escapes())[ch] or format("\\%03d", byte(ch))
	end
	
	local function escape_bytes(ch)
		return (gsub(ch, ".", escape_byte))
	end

	local function escape_char(ch)
		local ch_len = #ch
		if ch_len == 1 then
			return escape_byte(ch)
		end
		local b = byte(ch)
		-- Matching bytes below \128 are all to be escaped, \128 to \191 can't
		-- be leading bytes in UTF-8, \192 and \193 could only occur in overlong
		-- encodings, so can't occur in UTF-8, U+0080 (\194\128) to U+009F
		-- (\194\159) are control characters, U+00A0 (\194\160) is the no-break
		-- space, and \245 to \255 could only occur in encodings for codepoints
		-- above U+10FFFF, so can't occur in UTF-8.
		if b < 194 or b > 244 or (b == 194 and byte(ch, 2) < 161) then
			return escape_bytes(ch)
		-- 2-byte encodings starting \194 to \223 are all valid, so no need to
		-- check them with isutf8(). If there are additional trailing
		-- bytes, escape them.
		elseif b < 224 then
			return ch_len == 2 and ch or (sub(ch, 1, 2) .. escape_bytes(sub(ch, 3)))
		end
		-- Check 3- and 4-byte encodings with isutf8(), as they might be
		-- invalid due to overlong encodings or being above U+10FFFF. As above,
		-- escape any additional trailing bytes.
		local n = b < 240 and 3 or 4
		if ch_len == n then
			return isutf8(ch) and ch or escape_bytes(ch)
		elseif ch_len > n then
			local init_ch = sub(ch, 1, n)
			if isutf8(init_ch) then
				return init_ch .. escape_bytes(sub(ch, n + 1))
			end
		end
		return escape_bytes(ch)
	end
	
	local function escape_non_NFC(str)
		local normalized = toNFC(str)
		if normalized == str then
			return str
		end
		local str_len, i, start, offset, output = #str, 1, 1, 0
		while i <= str_len do
			local b = byte(str, i)
			if b == byte(normalized, i + offset) then
				i = i + 1
			else
				if output == nil then
					output = {}
				end
				-- Backtrack to the start of the character.
				while b >= 128 and b < 192 do
					i = i - 1
					b = byte(str, i)
				end
				-- Insert any intermediate characters up to this point.
				if start ~= i then
					insert(output, sub(str, start, i - 1))
				end
				-- Get the first character, then find the sequence of characters
				-- which differs from the normalized string.
				local seq = match(str, "^.[\128-\191]*", i)
				-- Find the raw sequence and the normalized sequence by adding
				-- a character at a time to the raw sequence, and checking if
				-- it matches the current point in the normalized string.
				-- This is necessary to ensure that the offset between the two
				-- strings is correct, when comparing equivalent sections.
				local seq_len, poss_seq, norm_seq = #seq, seq
				while true do
					if not norm_seq then
						norm_seq = match(normalized, "^" .. toNFC(poss_seq), i + offset)
					-- Once a matching sequence has been found, check if it's
					-- still possible to match the same normalized sequence with
					-- a longer raw sequence, as form NFC will have taken the
					-- longest sequence when normalizing the input.
					elseif toNFC(poss_seq) ~= norm_seq then
						break
					end
					seq, seq_len = poss_seq, #poss_seq
					local nxt_ch = match(str, "^.[\128-\191]*", i + seq_len)
					if nxt_ch == nil then
						break
					end
					poss_seq = poss_seq .. nxt_ch
				end
				-- Modify the offset to account for the difference in length
				-- between the two sequences. Usually, the NFC form will be
				-- shorter, but in rare cases it is longer (e.g. U+0F73
				-- normalizes to U+0F71 + U+0F72).
				offset = offset + #norm_seq - seq_len
				i = i + seq_len
				start = i
				-- Escape the non-ASCII portion of the sequence. This ensures
				-- that escapes added by escape_char don't end up double-escaped
				-- if they would otherwise be modified by form NFC; e.g. "\n" +
				-- U+0303 ("\ñ") needs to avoid escaping the "n".
				if seq ~= "" then
					insert(output, (gsub(seq, "[\128-\255]", escape_byte)))
				end
			end
		end
		if output == nil then
			return str
		end
		insert(output, sub(str, start))
		return concat(output)
	end

	-- Escapes control characters, backslash, double quote, the no-break space,
	-- bytes that aren't used in UTF-8, invalid UTF-8 character sequences, and
	-- any bytes necessary to ensure that the output is Unicode form NFC,
	-- because MediaWiki automatically converts page content to form NFC; e.g.
	-- "e" + U+0301 ("é") results in "e\204\129", because otherwise the sequence
	-- would be converted to "é" (U+00E9)); this ensures that results can be
	-- relied upon to be stable if saved as part of page content.
	function export.escape(str)
		return escape_non_NFC(gsub(str, "[%c\"'\\\128-\255][\128-\191]*", escape_char))
	end
	escape = export.escape
end

-- Convert a value to a string
function export.dump(value, prefix, tsort)
	local t = type(value)
	
	prefix = prefix or ""
	
	if t == "string" then
		return '"' .. escape(value) .. '"'
	elseif t == "table" then
		local str_table = {}
		
		insert(str_table, " {")
		
		for key, val in sorted_pairs(value, tsort) do
			insert(str_table, " " .. prefix .. "\t[" .. export.dump(key, prefix .. "\t") .. "] = " .. gsub(export.dump(val, prefix .. "\t"), "^ ", "") .. ",")
		end
		
		insert(str_table, " " .. prefix .. "}")
		
		return concat(str_table, "\n")
	else
		return tostring(value)
	end
end


function export.highlight_dump(value, prefix, tsort, options)
	options = options or {}
	
	local func = options.modified and "modified_dump" or "dump"
	
	local dump = export[func](value, prefix, tsort)
	
	-- Remove spaces at beginnings of lines (which are simply to force a <pre></pre> tag).
	dump = gsub(dump, "%f[^%z\n] ", "")
	
	return export.highlight(dump)
end


-- Returns true if table contains a table as one of its values
local function containsTable(t)
	for _, value in pairs(t) do
		if type(value) == "table" then
			return true
		end
	end
	return false
end


local function containsTablesWithSize(t, size)
	for _, value in pairs(t) do
		if type(value) == "table" and table_size(value) ~= size then
			return false
		end
	end
	return true
end	


--[=[
	Convert a value to a string.
	Like dump below, but if a table has consecutive numbered keys and does not
	have a table as one of its values, it will be placed on a single line.
	Used by [[Module:User:Erutuon/script recognition]].
]=]
function export.modified_dump(value, prefix, tsort)
	local t = type(value)
	
	prefix = prefix or ""
	
	if t == "string" then
		return '"' .. value .. '"'
	elseif t == "table" then
		local str_table = {}
		
		local containsTable = containsTable(value)
		local consecutive = is_array(value)
		if consecutive and not containsTable or containsTable and containsTablesWithSize(value, 3) then
			insert(str_table, "{")
			
			for key, val in sorted_pairs(value, tsort) do
				if containsTable then
					insert(str_table, "\n\t" .. prefix)
				else
					insert(str_table, " ")
				end
				
				if type(key) == "string" then
					insert(str_table, "[" .. export.modified_dump(key) .. "] = ")
				end
				
				insert(str_table, type(key) == "number" and type(val) == "number" and format("0x%05X", val) or export.modified_dump(val))
				
				if not (consecutive and #value == 3) or type(key) == "number" and value[key + 1] then
					insert(str_table, ",")
				end
			end
			
			if containsTable then
				insert(str_table, "\n" .. prefix)
			else
				insert(str_table, " ")
			end
			
			insert(str_table, "}")
			return concat(str_table)
		end
		
		insert(str_table, " {")
		
		for key, val in sorted_pairs(value, tsort) do
			insert(str_table, " " .. prefix .. "\t[" .. export.modified_dump(key, prefix .. "\t") .. "] = " .. gsub(export.modified_dump(val, prefix .. "\t"), "^ ", "") .. ",")
		end
		
		insert(str_table, " " .. prefix .. "}")
		
		return concat(str_table, "\n")
	elseif t == "number" and value > 46 then
		return format("0x%05X", value)
	else
		return tostring(value)
	end
end


export.track = require("Module:debug/track")


-- Trigger a script error from a template
function export.error(frame)
	error(frame.args[1] or "(no message specified)")
end

--[[
	Convenience function for generating syntaxhighlight tags.
	Display defaults to block.
	Options is a table. To display inline text with HTML highlighting:
		{ inline = true, lang = "html" }
]]
function export.highlight(content, options)
	if type(content) == "table" then
		options = content
		options = {
			lang = options.lang or "lua",
			inline = options.inline and true
		}
		return function(content)
			return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, options)
		end
	else
		return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, {
			lang = options and options.lang or "lua",
			inline = options and options.inline and true or nil
		})
	end
end

function export.track_unrecognized_args(args, template_name)
	local function track(code)
		export.track(template_name .. "/" .. code)
	end
	
	track("unrecognized arg")
	
	local arg_list = {}
	for arg, value in pairs(args) do
		track("unrecognized arg/" .. arg)
		insert(arg_list, format("|%s=%s", arg, value))
	end
	
	mw.log(format("Unrecognized parameter%s in {{%s}}: %s.",
		arg_list[2] and "s" or "", template_name, concat(arg_list, ", ")
	))
end

do
	local placeholder = "_message_"
	
	function export._placeholder_error(frame)
		-- A dummy function that throws an error with a placeholder message.
		error(placeholder, (frame.args.level or 1) + 6)
	end
	
	-- Throw an error via callParserFunction, which generates a real error with traceback, automatic categorization in [[CAT:E]] etc., but the error message is returned as a string. Then, replace the placeholder error message with `message`, which is preprocessed. This is necessary when preprocessing needs to be applied (e.g. when using <pre> tags), since otherwise strip markers and other half-processed text gets displayed instead.
	function export.formatted_error(message, level)
		local frame = mw.getCurrentFrame()
		return (frame:callParserFunction("#invoke", {"debug", "_placeholder_error", level = level})
			:gsub(placeholder, frame:preprocess(message)))
	end
end

return export