Module:String2: Difference between revisions
Undid revision 1027671352 by Hike395 (talk)- please obtain consensus before creating new functions in this core string-handling module |
Richardpruen (talk | contribs) m 1 revision imported: template update |
||
(8 intermediate revisions by 5 users not shown) | |||
Line 1: | Line 1: | ||
require ('strict'); | |||
local p = {} | local p = {} | ||
Line 12: | Line 13: | ||
p.ucfirst = function (frame ) | p.ucfirst = function (frame ) | ||
local s = mw.text.trim( frame.args[1] or "" ) | local s = frame.args[1]; | ||
if not s or '' == s or s:match ('^%s+$') then -- when <s> is nil, empty, or only whitespace | |||
return s; -- abandon because nothing to do | |||
end | |||
s = mw.text.trim( frame.args[1] or "" ) | |||
local s1 = "" | local s1 = "" | ||
-- | |||
local | local prefix_patterns_t = { -- sequence of prefix patterns | ||
'^\127[^\127]*UNIQ%-%-%a+%-%x+%-QINU[^\127]*\127', -- stripmarker | |||
'^([%*;:#]+)', -- various list markup | |||
'^(\'\'\'*)', -- bold / italic markup | |||
'^(%b<>)', -- html-like tags because some templates render these | |||
'^(&%a+;)', -- html character entities because some templates render these | |||
'^(&#%d+;)', -- html numeric (decimal) entities because some templates render these | |||
'^(&#x%x+;)', -- html numeric (hexadecimal) entities because some templates render these | |||
'^(%s+)', -- any whitespace characters | |||
'^([%(%)%-%+%?%.%%!~!@%$%^&_={}/`,‘’„“”ʻ|\"\'\\]+)', -- miscellaneous punctuation | |||
} | |||
local prefixes_t = {}; -- list, bold/italic, and html-like markup, & whitespace saved here | |||
local function prefix_strip (s) -- local function to strip prefixes from <s> | |||
for _, pattern in ipairs (prefix_patterns_t) do -- spin through <prefix_patterns_t> | |||
if s:match (pattern) then -- when there is a match | |||
local prefix = s:match (pattern); -- get a copy of the matched prefix | |||
table.insert (prefixes_t, prefix); -- save it | |||
s = s:sub (prefix:len() + 1); -- remove the prefix from <s> | |||
return s, true; -- return <s> without prefix and flag; force restart at top of sequence because misc punct removal can break stripmarker | |||
end | |||
end | |||
return s; -- no prefix found; return <s> with nil flag | |||
end | end | ||
-- | |||
-- | local prefix_removed; -- flag; boolean true as long as prefix_strip() finds and removes a prefix | ||
local | |||
repeat -- one by one remove list, bold/italic, html-like markup, whitespace, etc from start of <s> | |||
s, prefix_removed = prefix_strip (s); | |||
until (not prefix_removed); -- until <prefix_removed> is nil | |||
s1 = table.concat (prefixes_t); -- recreate the prefix string for later reattachment | |||
local first_text = mw.ustring.match (s, '^%[%[[^%]]+%]%]'); -- extract wikilink at start of string if present; TODO: this can be string.match()? | |||
if | |||
local upcased; | |||
if first_text then | |||
if first_text:match ('^%[%[[^|]+|[^%]]+%]%]') then -- if <first_text> is a piped link | |||
return s1 .. | upcased = mw.ustring.match (s, '^%[%[[^|]+|%W*(%w)'); -- get first letter character | ||
upcased = mw.ustring.upper (upcased); -- upcase first letter character | |||
s = mw.ustring.gsub (s, '^(%[%[[^|]+|%W*)%w', '%1' .. upcased); -- replace | |||
else -- here when <first_text> is a wikilink but not a piped link | |||
upcased = mw.ustring.match (s, '^%[%[%W*%w'); -- get '[[' and first letter | |||
upcased = mw.ustring.upper (upcased); -- upcase first letter character | |||
s = mw.ustring.gsub (s, '^%[%[%W*%w', upcased); -- replace; no capture needed here | |||
end | |||
elseif s:match ('^%[%S+%s+[^%]]+%]') then -- if <s> is a ext link of some sort; must have label text | |||
upcased = mw.ustring.match (s, '^%[%S+%s+%W*(%w)'); -- get first letter character | |||
upcased = mw.ustring.upper (upcased); -- upcase first letter character | |||
s = mw.ustring.gsub (s, '^(%[%S+%s+%W*)%w', '%1' .. upcased); -- replace | |||
elseif s:match ('^%[%S+%s*%]') then -- if <s> is a ext link without label text; nothing to do | |||
return s1 .. s; -- reattach prefix string (if present) and done | |||
else -- <s> is not a wikilink or ext link; assume plain text | |||
upcased = mw.ustring.match (s, '^%W*%w'); -- get the first letter character | |||
upcased = mw.ustring.upper (upcased); -- upcase first letter character | |||
s = mw.ustring.gsub (s, '^%W*%w', upcased); -- replace; no capture needed here | |||
end | end | ||
return s1 .. s; -- reattach prefix string (if present) and done | |||
end | end | ||
p.title = function (frame ) | p.title = function (frame ) | ||
Line 94: | Line 141: | ||
local str = mw.text.trim(frame.args[1] or "") | local str = mw.text.trim(frame.args[1] or "") | ||
return mw.text.nowiki(str) | return mw.text.nowiki(str) | ||
end | end | ||
Line 189: | Line 213: | ||
if plain:sub(1, 1) == "f" then plain = false else plain = true end | if plain:sub(1, 1) == "f" then plain = false else plain = true end | ||
-- get the page content and look for 'text' - return position or nomatch | -- get the page content and look for 'text' - return position or nomatch | ||
local content = titleobj:getContent() | local content = titleobj and titleobj:getContent() | ||
return mw.ustring.find(content, text, 1, plain) or nomatch | return content and mw.ustring.find(content, text, 1, plain) or nomatch | ||
end | end | ||
p.findpagetext = function(frame) | p.findpagetext = function(frame) | ||
Line 227: | Line 251: | ||
-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String | -- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String | ||
function p._escapePattern( pattern_str) | function p._escapePattern( pattern_str ) | ||
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ) | return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ) | ||
end | end | ||
Line 251: | Line 275: | ||
function p.strip( frame ) | function p.strip( frame ) | ||
local new_args = p._getParameters( frame.args, {'source', 'chars', 'plain'} ) | local new_args = p._getParameters( frame.args, {'source', 'chars', 'plain'} ) | ||
local source_str = new_args['source'] or '' | local source_str = new_args['source'] or '' | ||
local chars = new_args['chars'] or '' or 'characters' | local chars = new_args['chars'] or '' or 'characters' | ||
source_str = mw.text.trim(source_str) | source_str = mw.text.trim(source_str) | ||
if source_str == '' or chars == '' then | if source_str == '' or chars == '' then | ||
return source_str | return source_str | ||
end | end | ||
local l_plain = p._getBoolean( new_args['plain'] or true ) | local l_plain = p._getBoolean( new_args['plain'] or true ) | ||
if l_plain then | if l_plain then | ||
chars = p._escapePattern( chars ) | chars = p._escapePattern( chars ) | ||
end | end | ||
local result | local result | ||
result = mw.ustring.gsub(source_str, "["..chars.."]", '') | result = mw.ustring.gsub(source_str, "["..chars.."]", '') | ||
return result | return result | ||
end | end | ||
Line 289: | Line 313: | ||
end | end | ||
end | end | ||
end | |||
--[[--------------------------< H Y P H E N _ T O _ D A S H >-------------------------------------------------- | |||
Converts a hyphen to a dash under certain conditions. The hyphen must separate | |||
like items; unlike items are returned unmodified. These forms are modified: | |||
letter - letter (A - B) | |||
digit - digit (4-5) | |||
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5) | |||
letterdigit - letterdigit (A1-A5) (an optional separator between letter and | |||
digit is supported – a.1-a.5 or a-1-a-5) | |||
digitletter - digitletter (5a - 5d) (an optional separator between letter and | |||
digit is supported – 5.a-5.d or 5-a-5-d) | |||
any other forms are returned unmodified. | |||
str may be a comma- or semicolon-separated list | |||
]] | |||
function p.hyphen_to_dash( str, spacing ) | |||
if (str == nil or str == '') then | |||
return str | |||
end | |||
local accept | |||
str = mw.text.decode(str, true ) -- replace html entities with their characters; semicolon mucks up the text.split | |||
local out = {} | |||
local list = mw.text.split (str, '%s*[,;]%s*') -- split str at comma or semicolon separators if there are any | |||
for _, item in ipairs (list) do -- for each item in the list | |||
item = mw.text.trim(item) -- trim whitespace | |||
item, accept = item:gsub ('^%(%((.+)%)%)$', '%1') | |||
if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators | |||
if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit) | |||
item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter) | |||
item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or -- digit separator digit hyphen digit separator digit | |||
item:match ('^%d+%s*%-%s*%d+$') or -- digit hyphen digit | |||
item:match ('^%a+%s*%-%s*%a+$') then -- letter hyphen letter | |||
item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2') -- replace hyphen, remove extraneous space characters | |||
else | |||
item = mw.ustring.gsub (item, '%s*[–—]%s*', '–') -- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace | |||
end | |||
end | |||
table.insert (out, item) -- add the (possibly modified) item to the output table | |||
end | |||
local temp_str = table.concat (out, ',' .. spacing) -- concatenate the output table into a comma separated string | |||
temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1') -- remove accept-this-as-written markup when it wraps all of concatenated out | |||
if accept ~= 0 then | |||
temp_str = str:gsub ('^%(%((.+)%)%)$', '%1') -- when global markup removed, return original str; do it this way to suppress boolean second return value | |||
end | |||
return temp_str | |||
end | |||
function p.hyphen2dash( frame ) | |||
local str = frame.args[1] or '' | |||
local spacing = frame.args[2] or ' ' -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing | |||
return p.hyphen_to_dash(str, spacing) | |||
end | |||
-- Similar to [[Module:String#endswith]] | |||
function p.startswith(frame) | |||
return (frame.args[1]:sub(1, frame.args[2]:len()) == frame.args[2]) and 'yes' or '' | |||
end | |||
-- Implements [[Template:Isnumeric]] | |||
function p.isnumeric(frame) | |||
local s = frame.args[1] or frame:getParent().args[1] | |||
local boolean = (frame.args.boolean or frame:getParent().args.boolean) == 'true' | |||
if type(s) == 'string' and mw.getContentLanguage():parseFormattedNumber( s ) then | |||
return boolean and 1 or s | |||
end | |||
return boolean and 0 or '' | |||
end | end | ||
return p | return p |