Editing Module:Transcluder

From WoopMC
Warning: You are not logged in. Your IP address will be publicly visible if you make any edits. If you log in or create an account, your edits will be attributed to your username, along with other benefits.

The edit can be undone. Please check the comparison below to verify that this is what you want to do, and then publish the changes below to finish undoing the edit.

Latest revision Your text
Line 1: Line 1:
-- Module:Transcluder is a general-purpose transclusion engine
-- Documentation and master version: https://en.wikipedia.org/wiki/Module:Transcluder
-- Authors: User:Sophivorus, User:Certes & others
-- License: CC-BY-SA-3.0
local p = {}
local p = {}


-- Helper function to test for truthy and falsy values
-- Helper function to test for truthy and falsy values
-- @todo Somehow internationalize it
local function truthy(value)
local function truthy(value)
if not value or value == '' or value == 0 or value == '0' or value == 'false' or value == 'no' then
if not value or value == '' or value == 0 or value == '0' or value == 'false' or value == 'no' or value == 'non' then
return false
return false
end
end
Line 9: Line 15:
end
end


-- Helper function to match from a list regular expressions
-- Helper function to match from a list of regular expressions
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
local function matchAny(text, pre, list, post, init)
local function matchAny(text, pre, list, post, init)
Line 20: Line 26:
end
end


-- Like matchAny but for Category/File links with less overhead
local function matchAnyLink(text, list)
local match
for _, v in ipairs(list) do
match = string.match(text, '%[%[%s*' .. v .. '%s*:.*%]%]')
if match then break end
end
return match
end
-- Helper function to escape a string for use in regexes
local function escapeString(str)
local function escapeString(str)
return mw.ustring.gsub(str, '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0')
return string.gsub(str, '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0')
end
end


Line 30: Line 47:
pattern = escapeString(mw.ustring.sub(str, 1, 999)) .. '.-' .. escapeString(mw.ustring.sub(str, -999))
pattern = escapeString(mw.ustring.sub(str, 1, 999)) .. '.-' .. escapeString(mw.ustring.sub(str, -999))
end
end
return mw.ustring.gsub(text, pattern, '')
return string.gsub(text, pattern, '')
end
end


Line 36: Line 53:
-- @param flags Comma-separated list of numbers or min-max ranges, for example '1,3-5'
-- @param flags Comma-separated list of numbers or min-max ranges, for example '1,3-5'
-- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true}
-- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true}
-- @return Boolean indicating wether the flags should be treated as a blacklist or not
-- @return Boolean indicating whether the flags should be treated as a blacklist or not
local function parseFlags(value)
local function parseFlags(value)
local flags = {}
local flags = {}
Line 45: Line 62:
if type(value) == 'number' then
if type(value) == 'number' then
if value < 0 then
if value < 0 then
value = value * -1
value = -value
blacklist = true
blacklist = true
end
end
Line 51: Line 68:


elseif type(value) == 'string' then
elseif type(value) == 'string' then
if mw.ustring.sub(value, 1, 1) == '-' then
if string.sub(value, 1, 1) == '-' then
blacklist = true
blacklist = true
value = mw.ustring.sub(value, 2)
value = string.sub(value, 2)
end
end
local ranges = mw.text.split(value, ',') -- split ranges: '1,3-5' to {'1','3-5'}
local ranges = mw.text.split(value, ',') -- split ranges: '1,3-5' to {'1','3-5'}
for _, range in pairs(ranges) do
for _, range in pairs(ranges) do
range = mw.text.trim(range)
range = mw.text.trim(range)
local min, max = mw.ustring.match(range, '^(%d+)%s*%-%s*(%d+)$') -- '3-5' to min=3 max=5
local min, max = mw.ustring.match(range, '^(%d+)%s*[-–—]%s*(%d+)$') -- '3-5' to min=3 max=5
if not max then min, max = mw.ustring.match(range, '^((%d+))$') end -- '1' to min=1 max=1
if not max then min, max = string.match(range, '^((%d+))$') end -- '1' to min=1 max=1
if max then
if max then
for p = min, max do flags[p] = true end
for i = min, max do flags[i] = true end
else
else
flags[range] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3'
flags[range] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3'
Line 85: Line 102:
value = tostring(value)
value = tostring(value)
local lang = mw.language.getContentLanguage()
local lang = mw.language.getContentLanguage()
local lcvalue = lang:lcfirst(value)
local ucvalue = lang:ucfirst(value)
for flag in pairs(flags) do
for flag in pairs(flags) do
if value == tostring(flag)
if value == tostring(flag)
or lang:lcfirst(value) == flag
or lcvalue == flag
or lang:ucfirst(value) == flag
or ucvalue == flag
or ( not tonumber(flag) and mw.ustring.match(value, flag) ) then
or ( not tonumber(flag) and mw.ustring.match(value, flag) ) then
return true
return true
Line 126: Line 145:
-- @return Local name of the namespace and all aliases, for example {'File','Image','Archivo','Imagen'}
-- @return Local name of the namespace and all aliases, for example {'File','Image','Archivo','Imagen'}
local function getNamespaces(name)
local function getNamespaces(name)
local namespaces = mw.site.namespaces[name].aliases
local namespaces = mw.clone(mw.site.namespaces[name].aliases) -- Clone because https://en.wikipedia.org/w/index.php?diff=1056921358
table.insert(namespaces, mw.site.namespaces[name].name)
table.insert(namespaces, mw.site.namespaces[name].name)
table.insert(namespaces, mw.site.namespaces[name].canonicalName)
table.insert(namespaces, mw.site.namespaces[name].canonicalName)
Line 146: Line 165:


-- Remove <noinclude> tags
-- Remove <noinclude> tags
text = mw.ustring.gsub(text, '<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '') -- remove noinclude bits
text = string.gsub(text, '<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '') -- remove noinclude bits


-- Keep <onlyinclude> tags
-- Keep <onlyinclude> tags
if mw.ustring.find(text, '[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]') then -- avoid expensive search if possible
if string.find(text, 'onlyinclude') then -- avoid expensive search if possible
text = mw.ustring.gsub(text, '</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '') -- remove text between onlyinclude sections
text = text
text = mw.ustring.gsub(text, '^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '') -- remove text before first onlyinclude section
:gsub('</onlyinclude>.-<onlyinclude>', '') -- remove text between onlyinclude sections
text = mw.ustring.gsub(text, '</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.*', '') -- remove text after last onlyinclude section
:gsub('^.-<onlyinclude>', '') -- remove text before first onlyinclude section
:gsub('</onlyinclude>.*', '') -- remove text after last onlyinclude section
end
end


Line 158: Line 178:
end
end


-- Get the requested files out of the given wikitext.
-- Get the requested files from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param flags Range of files to return, for example 2 or '1,3-5'. Omit to return all files.
-- @param flags Range of files to return, for example 2 or '1,3-5'. Omit to return all files.
Line 169: Line 189:
local name
local name
local count = 0
local count = 0
for file in mw.ustring.gmatch(text, '%b[]') do
for file in string.gmatch(text, '%b[]') do
if matchAny(file, '%[%[%s*', fileNamespaces, '%s*:.*%]%]') then
if matchAnyLink(file, fileNamespaces) then
name = mw.ustring.match(file, '%[%[[^:]-:([^]|]+)')
name = string.match(file, '%[%[[^:]-:([^]|]+)')
count = count + 1
count = count + 1
if not blacklist and ( not flags or flags[count] or matchFlag(name, flags) )
if not blacklist and ( not flags or flags[count] or matchFlag(name, flags) )
Line 185: Line 205:
end
end


-- Get the requested tables out of the given wikitext.
-- Get the requested tables from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param flags Range of tables to return, for example 2 or '1,3-5'. Omit to return all tables.
-- @param flags Range of tables to return, for example 2 or '1,3-5'. Omit to return all tables.
Line 195: Line 215:
local id
local id
local count = 0
local count = 0
for t in mw.ustring.gmatch('\n' .. text, '\n%b{}') do
for t in string.gmatch('\n' .. text, '\n%b{}') do
if mw.ustring.sub(t, 1, 3) == '\n{|' then
if string.sub(t, 1, 3) == '\n{|' then
id = mw.ustring.match(t, '\n{|[^\n]-id%s*=%s*["\']?([^"\'\n]+)["\']?[^\n]*\n')
id = string.match(t, '\n{|[^\n]-id%s*=%s*["\']?([^"\'\n]+)["\']?[^\n]*\n')
count = count + 1
count = count + 1
if not blacklist and ( not flags or flags[count] or flags[id] )
if not blacklist and ( not flags or flags[count] or flags[id] )
Line 210: Line 230:
end
end


-- Get the requested templates out of the given wikitext.
-- Get the requested templates from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param flags Range of templates to return, for example 2 or '1,3-5'. Omit to return all templates.
-- @param flags Range of templates to return, for example 2 or '1,3-5'. Omit to return all templates.
Line 220: Line 240:
local name
local name
local count = 0
local count = 0
for template in mw.ustring.gmatch(text, '{%b{}}') do
for template in string.gmatch(text, '{%b{}}') do
if mw.ustring.sub(template, 1, 3) ~= '{{#' then -- skip parser functions like #if
if string.sub(template, 1, 3) ~= '{{#' then -- skip parser functions like #if
name = mw.text.trim( mw.ustring.match(template, '{{([^}|\n]+)') ) -- get the template name
name = mw.text.trim( string.match(template, '{{([^}|\n]+)') ) -- get the template name
count = count + 1
count = count + 1
if not blacklist and ( not flags or flags[count] or matchFlag(name, flags) )
if not blacklist and ( not flags or flags[count] or matchFlag(name, flags) )
Line 235: Line 255:
end
end


-- Get the requested template parameters out of the given wikitext.
-- Get the requested template parameters from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param flags Range of parameters to return, for example 2 or '1,3-5'. Omit to return all parameters.
-- @param flags Range of parameters to return, for example 2 or '1,3-5'. Omit to return all parameters.
-- @return Map from parameter name to value, NOT IN THE ORIGINAL ORDER
-- @return Map from parameter name to value, NOT IN THE ORIGINAL ORDER
-- @return Original wikitext minus requested parameters.
-- @return Original wikitext minus requested parameters.
-- @return Order in which the parameters were parsed.
local function getParameters(text, flags)
local function getParameters(text, flags)
local parameters = {}
local parameters, parameterOrder = {}, {}
local flags, blacklist = parseFlags(flags)
local flags, blacklist = parseFlags(flags)
local params, count, parts, key, value
local params, count, parts, key, value
for template in mw.ustring.gmatch(text, '{%b{}}') do
for template in string.gmatch(text, '{%b{}}') do
params = mw.ustring.match(template, '{{[^|}]-|(.+)}}')
params = string.match(template, '{{[^|}]-|(.*)}}')
if params then
if params then
count = 0
count = 0
-- Temporarily replace pipes in subtemplates, tables and links to avoid chaos
-- Temporarily replace pipes in subtemplates and links to avoid chaos
for subtemplate in mw.ustring.gmatch(params, '%b{}') do
for subtemplate in string.gmatch(params, '{%b{}}') do
params = mw.ustring.gsub(params, escapeString(subtemplate), mw.ustring.gsub(mw.ustring.gsub(subtemplate, '%%', '%%%'), '|', '@@@') )
params = string.gsub(params, escapeString(subtemplate), string.gsub(subtemplate, ".", {["%"]="%%", ["|"]="@@:@@", ["="]="@@_@@"}) )
end
end
for link in mw.ustring.gmatch(params, '%b[]') do
for link in string.gmatch(params, '%b[]') do
params = mw.ustring.gsub(params, escapeString(link), mw.ustring.gsub(link, '|', '@@@') )
params = string.gsub(params, escapeString(link), string.gsub(link, ".", {["%"]="%%", ["|"]="@@:@@", ["="]="@@_@@"}) )
end
end
for parameter in mw.text.gsplit(params, '|') do
for parameter in mw.text.gsplit(params, '|') do
parts = mw.text.split(parameter, '=')
parts = mw.text.split(parameter, '=')
key = mw.text.trim(parts[1])
key = mw.text.trim(parts[1])
value = table.concat(parts, '=', 2)
if #parts == 1 then
if value == '' then
value = key
value = key
count = count + 1
count = count + 1
key = count
key = count
else
else
value = mw.text.trim(value)
value = mw.text.trim(table.concat(parts, '=', 2))
end
end
value = mw.ustring.gsub(value, '@@@', '|')
value = string.gsub(string.gsub(value, '@@:@@', '|'), '@@_@@', '=')
if not blacklist and ( not flags or matchFlag(key, flags) )
if not blacklist and ( not flags or matchFlag(key, flags) )
or blacklist and flags and not matchFlag(key, flags) then
or blacklist and flags and not matchFlag(key, flags) then
table.insert(parameterOrder, key)
parameters[key] = value
parameters[key] = value
else
else
Line 276: Line 297:
end
end
end
end
return parameters, text
return parameters, text, parameterOrder
end
end


-- Get the requested lists out of the given wikitext.
-- Get the requested lists from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param flags Range of lists to return, for example 2 or '1,3-5'. Omit to return all lists.
-- @param flags Range of lists to return, for example 2 or '1,3-5'. Omit to return all lists.
Line 288: Line 309:
local flags, blacklist = parseFlags(flags)
local flags, blacklist = parseFlags(flags)
local count = 0
local count = 0
for list in mw.ustring.gmatch('\n' .. text .. '\n\n', '\n([*#].-)\n[^*#]') do
for list in string.gmatch('\n' .. text .. '\n\n', '\n([*#].-)\n[^*#]') do
count = count + 1
count = count + 1
if not blacklist and ( not flags or flags[count] )
if not blacklist and ( not flags or flags[count] )
Line 300: Line 321:
end
end


-- Get the requested paragraphs out of the given wikitext.
-- Get the requested paragraphs from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param flags Range of paragraphs to return, for example 2 or '1,3-5'. Omit to return all paragraphs.
-- @param flags Range of paragraphs to return, for example 2 or '1,3-5'. Omit to return all paragraphs.
Line 314: Line 335:
elements, temp = getLists(temp, 0) -- remove lists
elements, temp = getLists(temp, 0) -- remove lists
elements, temp = getFiles(temp, 0) -- remove files
elements, temp = getFiles(temp, 0) -- remove files
temp = mw.ustring.gsub(temp, '\n%b{}\n', '\n%0\n') -- add spacing between tables and block templates
temp = mw.text.trim((temp
temp = mw.ustring.gsub(temp, '\n%b{}\n', '\n') -- remove tables and block templates
:gsub('\n%b{} *\n', '\n%0\n') -- add spacing between tables and block templates
temp = mw.ustring.gsub(temp, '\n==+[^=]+==+\n', '\n') -- remove section titles
:gsub('\n%b{} *\n', '\n') -- remove tables and block templates
temp = mw.text.trim(temp)
:gsub('\n==+[^=]+==+ *\n', '\n') -- remove section titles
))


-- Assume that anything remaining is a paragraph
-- Assume that anything remaining is a paragraph
Line 336: Line 358:
end
end


-- Get the requested categories out of the given wikitext.
-- Get the requested categories from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param flags Range of categories to return, for example 2 or '1,3-5'. Omit to return all categories.
-- @param flags Range of categories to return, for example 2 or '1,3-5'. Omit to return all categories.
Line 347: Line 369:
local name
local name
local count = 0
local count = 0
for category in mw.ustring.gmatch(text, '%b[]') do
for category in string.gmatch(text, '%b[]') do
if matchAny(category, '%[%[%s*', categoryNamespaces, '%s*:.*%]%]') then
if matchAnyLink(category, categoryNamespaces) then
name = mw.ustring.match(category, '%[%[[^:]-:([^]|]+)')
name = string.match(category, '%[%[[^:]-:([^]|]+)')
count = count + 1
count = count + 1
if not blacklist and ( not flags or flags[count] or matchFlag(name, flags) )
if not blacklist and ( not flags or flags[count] or matchFlag(name, flags) )
Line 362: Line 384:
end
end


-- Get the requested references out of the given wikitext.
-- Get the requested references from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param flags Range of references to return, for example 2 or '1,3-5'. Omit to return all references.
-- @param flags Range of references to return, for example 2 or '1,3-5'. Omit to return all references.
Line 369: Line 391:
local function getReferences(text, flags)
local function getReferences(text, flags)
local references = {}
local references = {}
-- Remove all references, including citations, when 0 references are requested
-- This is kind of hacky but currently necessary because the rest of the code
-- doesn't remove citations like <ref name="Foo" /> if Foo is defined elsewhere
if flags and not truthy(flags) then
text = string.gsub(text, '<%s*[Rr][Ee][Ff][^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>', '')
text = string.gsub(text, '<%s*[Rr][Ee][Ff][^>/]*/%s*>', '')
return references, text
end
local flags, blacklist = parseFlags(flags)
local flags, blacklist = parseFlags(flags)
local name
local name
local count = 0
local count = 0
for reference in mw.ustring.gmatch(text, '<%s*[Rr][Ee][Ff][^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>') do
for reference in string.gmatch(text, '<%s*[Rr][Ee][Ff][^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>') do
name = mw.ustring.match(reference, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>/]+)["\']?[^>]*%s*>')
name = string.match(reference, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>/]+)["\']?[^>]*%s*>')
count = count + 1
count = count + 1
if not blacklist and ( not flags or flags[count] or matchFlag(name, flags) )
if not blacklist and ( not flags or flags[count] or matchFlag(name, flags) )
Line 381: Line 413:
text = removeString(text, reference)
text = removeString(text, reference)
if name then
if name then
for citation in mw.ustring.gmatch(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?' .. escapeString(name) .. '["\']?[^/>]*/%s*>') do
for citation in string.gmatch(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?' .. escapeString(name) .. '["\']?[^/>]*/%s*>') do
text = removeString(text, citation)
text = removeString(text, citation)
end
end
Line 390: Line 422:
end
end


-- Get the lead section out of the given wikitext.
-- Get the lead section from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @return Wikitext of the lead section.
-- @return Wikitext of the lead section.
local function getLead(text)
local function getLead(text)
text = mw.ustring.gsub('\n' .. text, '\n==.*', '')
text = string.gsub('\n' .. text, '\n==.*', '')
text = mw.text.trim(text)
text = mw.text.trim(text)
if not text then return throwError('lead-empty') end
if not text then return throwError('lead-empty') end
Line 400: Line 432:
end
end


-- Get the wikitext of the requested sections
-- Get the requested sections from the given wikitext.
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param flags Range of sections to return, for example 2 or '1,3-5'. Omit to return all references.
-- @param flags Range of sections to return, for example 2 or '1,3-5'. Omit to return all sections.
-- @return Sequence of strings containing the wikitext of the requested sections.
-- @return Sequence of strings containing the wikitext of the requested sections.
-- @return Original wikitext minus requested sections.
-- @return Original wikitext minus requested sections.
Line 410: Line 442:
local count = 0
local count = 0
local prefix, section, suffix
local prefix, section, suffix
for title in mw.ustring.gmatch('\n' .. text .. '\n==', '\n==+%s*([^=]+)%s*==+\n') do
for title in string.gmatch('\n' .. text .. '\n==', '\n==+%s*([^=]+)%s*==+') do
count = count + 1
count = count + 1
prefix, section, suffix = mw.ustring.match('\n' .. text .. '\n==', '\n()==+%s*' .. escapeString(title) .. '%s*==+(.-)()\n==')
prefix, section, suffix = string.match('\n' .. text .. '\n==', '\n()==+%s*' .. escapeString(title) .. '%s*==+(.-)()\n==')
if not blacklist and ( not flags or flags[count] or matchFlag(title, flags) )
if not blacklist and ( not flags or flags[count] or matchFlag(title, flags) )
or blacklist and flags and not flags[count] and not matchFlag(title, flags) then
or blacklist and flags and not flags[count] and not matchFlag(title, flags) then
sections[title] = section
sections[title] = section
else
else
text = mw.ustring.sub(text, 1, prefix) .. mw.ustring.sub(text, suffix)
text = string.sub(text, 1, prefix) .. string.sub(text, suffix)
text = mw.ustring.gsub(text, '\n?==$', '') -- remove the trailing \n==
text = string.gsub(text, '\n?==$', '') -- remove the trailing \n==
end
end
end
end
Line 424: Line 456:
end
end


-- Get the requested section out of the given wikitext (including subsections).
-- Get the requested section or <section> tag from the given wikitext (including subsections).
-- @param text Required. Wikitext to parse.
-- @param text Required. Wikitext to parse.
-- @param section Required. Title of the section to get (in wikitext), for example 'History' or 'History of [[Athens]]'.
-- @param section Required. Title of the section to get (in wikitext), for example 'History' or 'History of [[Athens]]'.
Line 432: Line 464:
local escapedSection = escapeString(section)
local escapedSection = escapeString(section)
-- First check if the section title matches a <section> tag
-- First check if the section title matches a <section> tag
if mw.ustring.find(text, '<%s*[Ss]ection%s+begin%s*=%s*["\']?%s*' .. escapedSection .. '%s*["\']?%s*/>') then -- avoid expensive search if possible
if string.find(text, '<%s*[Ss]ection%s+begin%s*=%s*["\']?%s*' .. escapedSection .. '%s*["\']?%s*/>') then -- avoid expensive search if possible
text = mw.ustring.gsub(text, '<%s*[Ss]ection%s+end=%s*["\']?%s*'.. escapedSection ..'%s*["\']?%s*/>.-<%s*[Ss]ection%s+begin%s*=%s*["\']?%s*' .. escapedSection .. '%s*["\']?%s*/>', '') -- remove text between section tags
text = mw.text.trim((text
text = mw.ustring.gsub(text, '^.-<%s*[Ss]ection%s+begin%s*=%s*["\']?%s*' .. escapedSection .. '%s*["\']?%s*/>', '') -- remove text before first section tag
:gsub('<%s*[Ss]ection%s+end=%s*["\']?%s*'.. escapedSection ..'%s*["\']?%s*/>.-<%s*[Ss]ection%s+begin%s*=%s*["\']?%s*' .. escapedSection .. '%s*["\']?%s*/>', '') -- remove text between section tags
text = mw.ustring.gsub(text, '<%s*[Ss]ection%s+end=%s*["\']?%s*'.. escapedSection ..'%s*["\']?%s*/>.*', '') -- remove text after last section tag
:gsub('^.-<%s*[Ss]ection%s+begin%s*=%s*["\']?%s*' .. escapedSection .. '%s*["\']?%s*/>', '') -- remove text before first section tag
text = mw.text.trim(text)
:gsub('<%s*[Ss]ection%s+end=%s*["\']?%s*'.. escapedSection ..'%s*["\']?%s*/>.*', '') -- remove text after last section tag
))
if text == '' then return throwError('section-tag-empty', section) end
if text == '' then return throwError('section-tag-empty', section) end
return text
return text
end
end
local level, text = mw.ustring.match('\n' .. text .. '\n', '\n(==+)%s*' .. escapedSection .. '%s*==.-\n(.*)')
local level, text = string.match('\n' .. text .. '\n', '\n(==+)%s*' .. escapedSection .. '%s*==.-\n(.*)')
if not text then return throwError('section-not-found', section) end
if not text then return throwError('section-not-found', section) end
local nextSection = '\n==' .. mw.ustring.rep('=?', #level - 2) .. '[^=].*'
local nextSection = '\n==' .. string.rep('=?', #level - 2) .. '[^=].*'
text = mw.ustring.gsub(text, nextSection, '') -- remove later sections with headings at this level or higher
text = string.gsub(text, nextSection, '') -- remove later sections with headings at this level or higher
text = mw.text.trim(text)
text = mw.text.trim(text)
if text == '' then return throwError('section-empty', section) end
if text == '' then return throwError('section-empty', section) end
Line 481: Line 514:
end
end
end
end
page = string.gsub(page, '"', '') -- remove any quotation marks from the page title
text = mw.ustring.gsub(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>/]+)["\']?[^>/]*(/?)%s*>', '<ref name="' .. page .. ' %1"%2>')
text = mw.ustring.gsub(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>/]+)["\']?[^>/]*(/?)%s*>', '<ref name="' .. page .. ' %1"%2>')
text = mw.ustring.gsub(text, '<%s*[Rr][Ee][Ff]%s*group%s*=%s*["\']?[^"\'>/]+["\']%s*>', '<ref>')
text = mw.ustring.gsub(text, '<%s*[Rr][Ee][Ff]%s*group%s*=%s*["\']?[^"\'>/]+["\']%s*>', '<ref>')
Line 487: Line 521:


-- Replace the bold title or synonym near the start of the page by a link to the page
-- Replace the bold title or synonym near the start of the page by a link to the page
function linkBold(text, page)
local function linkBold(text, page)
local lang = mw.language.getContentLanguage()
local lang = mw.language.getContentLanguage()
local position = mw.ustring.find(text, "'''" .. lang:ucfirst(page) .. "'''", 1, true) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
local position = mw.ustring.find(text, "'''" .. lang:ucfirst(page) .. "'''", 1, true) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
Line 501: Line 535:
return nil -- instruct gsub to make no change
return nil -- instruct gsub to make no change
end
end
end, 1) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
end, 1) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
end
end
return text
return text
Line 514: Line 548:
local fileDescription
local fileDescription
local frame = mw.getCurrentFrame()
local frame = mw.getCurrentFrame()
for file in mw.ustring.gmatch(text, '%b[]') do
for file in string.gmatch(text, '%b[]') do
if matchAny(file, '%[%[%s*', fileNamespaces, '%s*:.*%]%]') then
if matchAnyLink(file, fileNamespaces) then
fileName = 'File:' .. mw.ustring.match(file, '%[%[[^:]-:([^]|]+)')
fileName = 'File:' .. string.match(file, '%[%[[^:]-:([^]|]+)')
fileDescription, fileName = getText(fileName)
fileDescription, fileName = getText(fileName)
if fileName then
if fileName then
Line 522: Line 556:
fileDescription = frame:preprocess('{{' .. fileName .. '}}') -- try Commons
fileDescription = frame:preprocess('{{' .. fileName .. '}}') -- try Commons
end
end
if fileDescription and mw.ustring.match(fileDescription, '[Nn]on%-free') then
if fileDescription and string.match(fileDescription, '[Nn]on%-free') then
text = removeString(text, file)
text = removeString(text, file)
end
end
Line 532: Line 566:


-- Remove any self links
-- Remove any self links
function removeSelfLinks(text)
local function removeSelfLinks(text)
local lang = mw.language.getContentLanguage()
local lang = mw.language.getContentLanguage()
local page = escapeString( mw.title.getCurrentTitle().prefixedText )
local page = escapeString(mw.title.getCurrentTitle().prefixedText)
text = mw.ustring.gsub(text, '%[%[(' .. lang:ucfirst(page) .. ')%]%]', '%1')
local ucpage = lang:ucfirst(page)
text = mw.ustring.gsub(text, '%[%[(' .. lang:lcfirst(page) .. ')%]%]', '%1')
local lcpage = lang:lcfirst(page)
text = mw.ustring.gsub(text, '%[%[' .. lang:ucfirst(page) .. '|([^]]+)%]%]', '%1')
text = text
text = mw.ustring.gsub(text, '%[%[' .. lang:lcfirst(page) .. '|([^]]+)%]%]', '%1')
:gsub('%[%[(' .. ucpage .. ')%]%]', '%1')
:gsub('%[%[(' .. lcpage .. ')%]%]', '%1')
:gsub('%[%[' .. ucpage .. '|([^]]+)%]%]', '%1')
:gsub('%[%[' .. lcpage .. '|([^]]+)%]%]', '%1')
return text
return text
end
end


-- Remove all wikilinks
-- Remove all wikilinks
function removeLinks(text)
local function removeLinks(text)
text = mw.ustring.gsub(text, '%[%[[^|]+|([^]]+)%]%]', '%1')
text = text
text = mw.ustring.gsub(text, '%[%[([^]]+)%]%]', '%1')
:gsub('%[%[[^%]|]+|([^]]+)%]%]', '%1')
:gsub('%[%[([^]]+)%]%]', '%1')
:gsub('%[[^ ]+ ([^]]+)%]', '%1')
:gsub('%[([^]]+)%]', '%1')
return text
return text
end
end


-- Remove HTML comments
-- Remove HTML comments
function removeComments(text)
local function removeComments(text)
text = mw.ustring.gsub(text, '<!%-%-.-%-%->', '')
text = string.gsub(text, '<!%-%-.-%-%->', '')
return text
return text
end
end


-- Remove behavior switches, such as __NOTOC__
-- Remove behavior switches, such as __NOTOC__
function removeBehaviorSwitches(text)
local function removeBehaviorSwitches(text)
text = mw.ustring.gsub(text, '__[A-Z]+__', '')
text = string.gsub(text, '__[A-Z]+__', '')
return text
return text
end
end


-- Remove bold text
-- Remove bold text
function removeBold(text)
local function removeBold(text)
text = mw.ustring.gsub(text, "'''", '')
text = string.gsub(text, "'''", '')
return text
return text
end
end
Line 571: Line 611:
if not options then options = {} end
if not options then options = {} end


-- Make sure the page exists
if not page then return throwError('no-page') end
if not page then return throwError('no-page') end
page = mw.text.trim(page)
page = mw.text.trim(page)
if page == '' then return throwError('no-page') end
if page == '' then return throwError('no-page') end
local page, hash, section = mw.ustring.match(page, '([^#]+)(#?)([^#]*)')
local page, hash, section = string.match(page, '([^#]+)(#?)(.*)')
local text, page = getText(page, options.noFollow)
local text, temp = getText(page, options.noFollow)
if not page then return throwError('no-page') end
if not temp then return throwError('invalid-title', page) end
page = temp
if not text then return throwError('page-not-found', page) end
if not text then return throwError('page-not-found', page) end
local full = text -- save the full text for fixReferences below
local full = text -- save the full text for fixReferences below
Line 620: Line 662:
-- Misc options
-- Misc options
if truthy(options.fixReferences) then text = fixReferences(text, page, full) end
if truthy(options.fixReferences) then text = fixReferences(text, page, full) end
if truthy(options.linkBold) then text = linkBold(text, page) end
if truthy(options.linkBold) and not truthy(section) then text = linkBold(text, page) end
if truthy(options.noBold) then text = removeBold(text) end
if truthy(options.noBold) then text = removeBold(text) end
if truthy(options.noLinks) then text = removeLinks(text) end
if truthy(options.noLinks) then text = removeLinks(text) end
Line 629: Line 671:


-- Remove multiple newlines left over from removing elements
-- Remove multiple newlines left over from removing elements
text = mw.ustring.gsub(text, '\n\n\n+', '\n\n')
text = string.gsub(text, '\n\n\n+', '\n\n')
text = mw.text.trim(text)
text = mw.text.trim(text)


Please note that all contributions to WoopMC may be edited, altered, or removed by other contributors. If you do not want your writing to be edited mercilessly, then do not submit it here.
You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource (see WoopMC:Copyrights for details). Do not submit copyrighted work without permission!
Cancel Editing help (opens in new window)