[go: up one dir, main page]


Faster reimplementation of mw.text.nowiki, which also has some minor changes to match the PHP equivalent used by the native parser.


local byte = string.byte
local concat = table.concat
local gsub = string.gsub
local lower = string.lower
local match = string.match
local sub = string.sub

local data = mw.loadData("Module:string/nowiki/data")
local absolute = data.absolute
local after_newline = data.after_newline
local after_magic_link = data.after_magic_link
local first = data.first
local last = data.last
local uri_schemes = data.uri_schemes

local function escape_uri(uri, colon)
	return colon == "" and uri or uri .. (uri_schemes[lower(uri)] and ":" or ":")
end

return function(str)
	local ret, head, n, ch_first = {}, 1, 0, sub(str, 1, 1)
	if after_newline[ch_first] or first[ch_first] then
		n = n + 1
		ret[n] = "&#" .. byte(ch_first) .. ";"
		head = 2
	elseif sub(str, 1, 4) == "----" then
		n = n + 1
		ret[n] = "----"
		head = 5
	end
	local start = head
	while true do
		local loc, this = match(str, "()([\n\r!\"&':;<=>IPR[%]_{|}~])", head)
		if not loc then
			n = n + 1
			ret[n] = sub(str, start)
			-- (:?) is an optimization that resets the match head to the end of
			-- the [%w_]+ string, instead of backtracking to the next character.
			str = gsub(concat(ret), "([%w_]+)(:?)", escape_uri)
			local ch_last = sub(str, -1)
			return last[ch_last] and sub(str, 1, -2) .. "&#" .. byte(ch_last) .. ";" or str
		elseif absolute[this] then
			n = n + 1
			ret[n] = sub(str, start, loc - 1) .. "&#" .. byte(this) .. ";"
			head = loc + 1
			start = head
		elseif this == "\n" or this == "\r" then
			local nxt = loc + 1
			nxt = sub(str, nxt, nxt)
			if after_newline[nxt] then
				n = n + 1
				ret[n] = sub(str, start, loc) .. "&#" .. byte(nxt) .. ";"
				head = loc + 2
				start = head
			elseif sub(str, loc + 1, loc + 4) == "----" then
				n = n + 1
				ret[n] = sub(str, start, loc) .. "&#45;---"
				head = loc + 5
				start = head
			else
				head = head + 1
			end
		elseif this == "!" then
			local nxt = loc + 1
			if sub(str, nxt, nxt) == "!" then
				n = n + 1
				ret[n] = sub(str, start, loc - 1) .. "&#33;!"
				head = loc + 2
				start = head
			else
				head = head + 1
			end
		elseif this == "_" then
			local nxt = loc + 1
			if sub(str, nxt, nxt) == "_" then
				n = n + 1
				ret[n] = sub(str, start, loc) .. "&#95;"
				head = loc + 2
				start = head
			else
				head = head + 1
			end
		elseif this == ":" and sub(str, loc + 1, loc + 2) == "//" then
			n = n + 1
			ret[n] = sub(str, start, loc - 1) .. "&#58;//"
			head = loc + 3
			start = head
		elseif this == "~" and sub(str, loc + 1, loc + 2) == "~~" then
			n = n + 1
			ret[n] = sub(str, start, loc + 1) .. "&#126;"
			head = loc + 3
			start = head
		elseif (
			this == "I" and sub(str, loc + 1, loc + 3) == "SBN" or
			this == "P" and sub(str, loc + 1, loc + 3) == "MID"
		) then
			local nxt = loc + 4
			nxt = sub(str, nxt, nxt)
			if after_magic_link[nxt] then
				n = n + 1
				ret[n] = sub(str, start, loc + 3)  .. "&#" .. byte(nxt) .. ";"
				head = loc + 5
				start = head
			else
				head = head + 1
			end
		elseif this == "R" and sub(str, loc + 1, loc + 2) == "FC" then
			local nxt = loc + 3
			nxt = sub(str, nxt, nxt)
			if after_magic_link[nxt] then
				n = n + 1
				ret[n] = sub(str, start, loc + 2)  .. "&#" .. byte(nxt) .. ";"
				head = loc + 4
				start = head
			else
				head = head + 1
			end
		else
			head = head + 1
		end
	end
end