Module:Plain text: Difference between revisions
Jump to navigation
Jump to search
en>Johnuniq (since we're optimizing this, omit unnecessary mw.ustring.gsub; will explain on talk) |
m (1 revision imported) |
||
(16 intermediate revisions by 6 users not shown) | |||
Line 1: | Line 1: | ||
--converts text with wikilinks to plain text, e.g "[[foo|gah]] is [[bar]]" to "gah is bar" | --converts text with wikilinks to plain text, e.g "[[foo|gah]] is [[bar]]" to "gah is bar" | ||
--removes anything enclosed in tags that isn't nested, mediawiki strip markers (references etc), files, italic and bold markup | --removes anything enclosed in tags that isn't nested, mediawiki strip markers (references etc), files, italic and bold markup | ||
require[[strict]] | |||
local p = {} | local p = {} | ||
function p.main(frame) | function p.main(frame) | ||
local text = frame.args[1] | local text = frame.args[1] | ||
local encode = require('Module:yesno')(frame.args.encode) | |||
return p._main(text, encode) | |||
end | |||
function p._main(text, encode) | |||
if not text then return end | |||
text = mw.text.killMarkers(text) | text = mw.text.killMarkers(text) | ||
:gsub('<.->.-<.->', '') --strip out tags | :gsub(' ', ' ') --replace nbsp spaces with regular spaces | ||
:gsub('%[%[%s* | :gsub('<br ?/?>', ', ') --replace br with commas | ||
:gsub('%[%[.-|', '') --strip out piped link text | :gsub('<span.->(.-)</span>', '%1') --remove spans while keeping text inside | ||
:gsub('<i.->(.-)</i>', '%1') --remove italics while keeping text inside | |||
:gsub('<b.->(.-)</b>', '%1') --remove bold while keeping text inside | |||
:gsub('<em.->(.-)</em>', '%1') --remove emphasis while keeping text inside | |||
:gsub('<strong.->(.-)</strong>', '%1') --remove strong while keeping text inside | |||
:gsub('<sub.->(.-)</sub>', '%1') --remove subscript markup; retain contents | |||
:gsub('<sup.->(.-)</sup>', '%1') --remove superscript markup; retain contents | |||
:gsub('<u.->(.-)</u>', '%1') --remove underline markup; retain contents | |||
:gsub('<.->.-<.->', '') --strip out remaining tags and the text inside | |||
:gsub('<.->', '') --remove any other tag markup | |||
:gsub('%[%[%s*[Ff][Ii][Ll][Ee]%s*:.-%]%]', '') --strip out files | |||
:gsub('%[%[%s*[Ii][Mm][Aa][Gg][Ee]%s*:.-%]%]', '') --strip out use of image: | |||
:gsub('%[%[%s*[Cc][Aa][Tt][Ee][Gg][Oo][Rr][Yy]%s*:.-%]%]', '') --strip out categories | |||
:gsub('%[%[[^%]]-|', '') --strip out piped link text | |||
:gsub('([^%[])%[[^%[%]][^%]]-%s', '%1') --strip out external link text | |||
:gsub('^%[[^%[%]][^%]]-%s', '') --strip out external link text | |||
:gsub('[%[%]]', '') --then strip out remaining [ and ] | :gsub('[%[%]]', '') --then strip out remaining [ and ] | ||
:gsub("'''''", "") --strip out bold italic markup | :gsub("'''''", "") --strip out bold italic markup | ||
:gsub("'''?", "") --not stripping out '''' gives correct output for bolded text in quotes | :gsub("'''?", "") --not stripping out '''' gives correct output for bolded text in quotes | ||
return text | :gsub('----+', '') --remove ---- lines | ||
:gsub("^%s+", "") --strip leading | |||
:gsub("%s+$", "") --and trailing spaces | |||
:gsub("%s+", " ") --strip redundant spaces | |||
if encode then | |||
return mw.text.encode(text) | |||
else | |||
return text | |||
end | |||
end | end | ||
return p | return p |
Latest revision as of 21:06, 16 April 2024
Documentation for this module may be created at Module:Plain text/doc
--converts text with wikilinks to plain text, e.g "[[foo|gah]] is [[bar]]" to "gah is bar" --removes anything enclosed in tags that isn't nested, mediawiki strip markers (references etc), files, italic and bold markup require[[strict]] local p = {} function p.main(frame) local text = frame.args[1] local encode = require('Module:yesno')(frame.args.encode) return p._main(text, encode) end function p._main(text, encode) if not text then return end text = mw.text.killMarkers(text) :gsub(' ', ' ') --replace nbsp spaces with regular spaces :gsub('<br ?/?>', ', ') --replace br with commas :gsub('<span.->(.-)</span>', '%1') --remove spans while keeping text inside :gsub('<i.->(.-)</i>', '%1') --remove italics while keeping text inside :gsub('<b.->(.-)</b>', '%1') --remove bold while keeping text inside :gsub('<em.->(.-)</em>', '%1') --remove emphasis while keeping text inside :gsub('<strong.->(.-)</strong>', '%1') --remove strong while keeping text inside :gsub('<sub.->(.-)</sub>', '%1') --remove subscript markup; retain contents :gsub('<sup.->(.-)</sup>', '%1') --remove superscript markup; retain contents :gsub('<u.->(.-)</u>', '%1') --remove underline markup; retain contents :gsub('<.->.-<.->', '') --strip out remaining tags and the text inside :gsub('<.->', '') --remove any other tag markup :gsub('%[%[%s*[Ff][Ii][Ll][Ee]%s*:.-%]%]', '') --strip out files :gsub('%[%[%s*[Ii][Mm][Aa][Gg][Ee]%s*:.-%]%]', '') --strip out use of image: :gsub('%[%[%s*[Cc][Aa][Tt][Ee][Gg][Oo][Rr][Yy]%s*:.-%]%]', '') --strip out categories :gsub('%[%[[^%]]-|', '') --strip out piped link text :gsub('([^%[])%[[^%[%]][^%]]-%s', '%1') --strip out external link text :gsub('^%[[^%[%]][^%]]-%s', '') --strip out external link text :gsub('[%[%]]', '') --then strip out remaining [ and ] :gsub("'''''", "") --strip out bold italic markup :gsub("'''?", "") --not stripping out '''' gives correct output for bolded text in quotes :gsub('----+', '') --remove ---- lines :gsub("^%s+", "") --strip leading :gsub("%s+$", "") --and trailing spaces :gsub("%s+", " ") --strip redundant spaces if encode then return mw.text.encode(text) else return text end end return p