Anonymous user
Module:Text: Difference between revisions
update date
Richardpruen (talk | contribs) m (1 revision imported) |
(update date) |
||
Line 1: | Line 1: | ||
local Text = { serial = " | local yesNo = require("Module:Yesno") | ||
local Text = { serial = "2022-07-21", | |||
suite = "Text" } | suite = "Text" } | ||
--[=[ | --[=[ | ||
Line 17: | Line 18: | ||
local SeekQuote = false | local SeekQuote = false | ||
local function initLatinData() | |||
if not RangesLatin then | |||
RangesLatin = { { 7, 687 }, | |||
{ 7531, 7578 }, | |||
{ 7680, 7935 }, | |||
{ 8194, 8250 } } | |||
end | |||
if not PatternLatin then | |||
local range | |||
PatternLatin = "^[" | |||
for i = 1, #RangesLatin do | |||
range = RangesLatin[ i ] | |||
PatternLatin = PatternLatin .. | |||
mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) | |||
end -- for i | |||
PatternLatin = PatternLatin .. "]*$" | |||
end | |||
end | |||
local function initQuoteData() | |||
local function | |||
-- Create quote definitions | -- Create quote definitions | ||
QuoteLang = { af = "bd", | if not QuoteLang then | ||
QuoteLang = | |||
{ af = "bd", | |||
ar = "la", | ar = "la", | ||
be = "labd", | be = "labd", | ||
Line 76: | Line 96: | ||
["zh-tw"] = "x300C", | ["zh-tw"] = "x300C", | ||
["zh-cn"] = "ld" } | ["zh-cn"] = "ld" } | ||
QuoteType = { bd = { { 8222, 8220 }, { 8218, 8217 } }, | end | ||
if not QuoteType then | |||
QuoteType = | |||
{ bd = { { 8222, 8220 }, { 8218, 8217 } }, | |||
bdla = { { 8222, 8220 }, { 171, 187 } }, | bdla = { { 8222, 8220 }, { 171, 187 } }, | ||
bx = { { 8222, 8221 }, { 8218, 8217 } }, | bx = { { 8222, 8221 }, { 8218, 8217 } }, | ||
Line 89: | Line 112: | ||
x300C = { { 0x300C, 0x300D }, | x300C = { { 0x300C, 0x300D }, | ||
{ 0x300E, 0x300F } } } | { 0x300E, 0x300F } } } | ||
end | |||
end -- | end -- initQuoteData() | ||
Line 100: | Line 123: | ||
-- alien -- string, with language code | -- alien -- string, with language code | ||
-- advance -- number, with level 1 or 2 | -- advance -- number, with level 1 or 2 | ||
local r = apply | local r = apply and tostring(apply) or "" | ||
alien = alien or "en" | |||
advance = tonumber(advance) or 0 | |||
local suite | local suite | ||
initQuoteData() | |||
local slang = alien:match( "^(%l+)-" ) | |||
suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] | |||
if suite then | if suite then | ||
local quotes = QuoteType[ suite ] | local quotes = QuoteType[ suite ] | ||
Line 149: | Line 164: | ||
-- accept -- true, if no error messages to be appended | -- accept -- true, if no error messages to be appended | ||
-- Returns: string | -- Returns: string | ||
local r | local r = "" | ||
apply = type(apply) == "table" and apply or {} | |||
again = math.floor(tonumber(again) or 1) | |||
if again < 1 then | |||
return "" | |||
end | |||
local bad = { } | |||
local codes = { } | |||
for _, v in ipairs( apply ) do | |||
local n = tonumber(v) | |||
if not n or (n < 32 and n ~= 9 and n ~= 10) then | |||
table.insert(bad, tostring(v)) | |||
else | |||
table.insert(codes, math.floor(n)) | |||
end | |||
end | |||
if #bad > 0 then | |||
if not accept then | |||
r = tostring( mw.html.create( "span" ) | |||
:addClass( "error" ) | |||
:wikitext( "bad codepoints: " .. table.concat( bad, " " )) ) | |||
end | |||
return r | |||
end | end | ||
return r | if #codes > 0 then | ||
r = mw.ustring.char( unpack( codes ) ) | |||
if again > 1 then | |||
r = r:rep(again) | |||
end | |||
end | |||
return r | |||
end -- Text.char() | end -- Text.char() | ||
local function trimAndFormat(args, fmt) | |||
local result = {} | |||
if type(args) ~= 'table' then | |||
args = {args} | |||
end | |||
for _, v in ipairs(args) do | |||
v = mw.text.trim(tostring(v)) | |||
if v ~= "" then | |||
table.insert(result,fmt and mw.ustring.format(fmt, v) or v) | |||
end | |||
end | |||
return result | |||
end | |||
Text.concatParams = function ( args, apply, adapt ) | Text.concatParams = function ( args, apply, adapt ) | ||
Line 210: | Line 219: | ||
-- Returns: string | -- Returns: string | ||
local collect = { } | local collect = { } | ||
return table.concat(trimAndFormat(args,adapt), apply or "|") | |||
end -- Text.concatParams() | end -- Text.concatParams() | ||
Text.containsCJK = function ( | Text.containsCJK = function ( s ) | ||
-- Is any CJK code within? | -- Is any CJK code within? | ||
-- Parameter: | -- Parameter: | ||
-- | -- s -- string | ||
-- Returns: true, if CJK detected | -- Returns: true, if CJK detected | ||
s = s and tostring(s) or "" | |||
if not patternCJK then | if not patternCJK then | ||
patternCJK = mw.ustring.char( 91, | patternCJK = mw.ustring.char( 91, | ||
4352, 45, 4607, | |||
131072, 45, | 11904, 45, 42191, | ||
43072, 45, 43135, | |||
44032, 45, 55215, | |||
63744, 45, 64255, | |||
65072, 45, 65103, | |||
65381, 45, 65500, | |||
131072, 45, 196607, | |||
93 ) | 93 ) | ||
end | end | ||
return mw.ustring.find( s, patternCJK ) ~= nil | |||
end -- Text.containsCJK() | end -- Text.containsCJK() | ||
Text.removeDelimited = function (s, prefix, suffix) | |||
-- Remove all text in s delimited by prefix and suffix (inclusive) | |||
-- Arguments: | |||
-- s = string to process | |||
-- prefix = initial delimiter | |||
-- suffix = ending delimiter | |||
-- Returns: stripped string | |||
s = s and tostring(s) or "" | |||
prefix = prefix and tostring(prefix) or "" | |||
suffix = suffix and tostring(suffix) or "" | |||
local prefixLen = mw.ustring.len(prefix) | |||
local suffixLen = mw.ustring.len(suffix) | |||
if prefixLen == 0 or suffixLen == 0 then | |||
return s | |||
end | |||
local i = s:find(prefix, 1, true) | |||
local r = s | |||
local j | |||
while i do | |||
j = r:find(suffix, i + prefixLen) | |||
if j then | |||
r = r:sub(1, i - 1)..r:sub(j+suffixLen) | |||
else | |||
r = r:sub(1, i - 1) | |||
end | |||
i = r:find(prefix, 1, true) | |||
end | |||
return r | |||
end | |||
Text.getPlain = function ( adjust ) | Text.getPlain = function ( adjust ) | ||
Line 253: | Line 280: | ||
-- adjust -- string | -- adjust -- string | ||
-- Returns: string | -- Returns: string | ||
local | local r = Text.removeDelimited(adjust,"<!--","-->") | ||
r = r:gsub( "(</?%l[^>]*>)", "" ) | r = r:gsub( "(</?%l[^>]*>)", "" ) | ||
:gsub( " | :gsub( "'''", "" ) | ||
:gsub( " | :gsub( "''", "" ) | ||
:gsub( " ", " " ) | :gsub( " ", " " ) | ||
return r | return r | ||
end -- Text.getPlain() | end -- Text.getPlain() | ||
Text.isLatinRange = function (s) | |||
Text.isLatinRange = function ( | |||
-- Are characters expected to be latin or symbols within latin texts? | -- Are characters expected to be latin or symbols within latin texts? | ||
-- | -- Arguments: | ||
-- | -- s = string to analyze | ||
-- Returns: true, if valid for latin only | -- Returns: true, if valid for latin only | ||
s = s and tostring(s) or "" --- ensure input is always string | |||
initLatinData() | |||
return mw.ustring.match(s, PatternLatin) ~= nil | |||
end -- Text.isLatinRange() | end -- Text.isLatinRange() | ||
Text.isQuote = function ( | Text.isQuote = function ( s ) | ||
-- Is this character any quotation mark? | -- Is this character any quotation mark? | ||
-- Parameter: | -- Parameter: | ||
-- | -- s = single character to analyze | ||
-- Returns: true, if | -- Returns: true, if s is quotation mark | ||
s = s and tostring(s) or "" | |||
if s == "" then | |||
return false | |||
end | |||
if not SeekQuote then | if not SeekQuote then | ||
SeekQuote = mw.ustring.char( 34, -- " | SeekQuote = mw.ustring.char( 34, -- " | ||
Line 332: | Line 327: | ||
0x300F ) -- CJK | 0x300F ) -- CJK | ||
end | end | ||
return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil | |||
end -- Text.isQuote() | end -- Text.isQuote() | ||
Line 350: | Line 338: | ||
-- adapt -- string (optional); format including "%s" | -- adapt -- string (optional); format including "%s" | ||
-- Returns: string | -- Returns: string | ||
return mw.text.listToText(trimAndFormat(args, adapt)) | |||
end -- Text.listToText() | end -- Text.listToText() | ||
Line 374: | Line 350: | ||
-- advance -- number, with level 1 or 2, or nil | -- advance -- number, with level 1 or 2, or nil | ||
-- Returns: quoted string | -- Returns: quoted string | ||
apply = apply and tostring(apply) or "" | |||
local mode, slang | local mode, slang | ||
if type( alien ) == "string" then | if type( alien ) == "string" then | ||
Line 401: | Line 378: | ||
-- advance -- number, with level 1 or 2, or nil | -- advance -- number, with level 1 or 2, or nil | ||
-- Returns: string; possibly quoted | -- Returns: string; possibly quoted | ||
local r = mw.text.trim( apply ) | local r = mw.text.trim( apply and tostring(apply) or "" ) | ||
local s = mw.ustring.sub( r, 1, 1 ) | local s = mw.ustring.sub( r, 1, 1 ) | ||
if s ~= "" and not Text.isQuote( s, advance ) then | if s ~= "" and not Text.isQuote( s, advance ) then | ||
Line 429: | Line 406: | ||
93 ) | 93 ) | ||
end | end | ||
decomposed = mw.ustring.toNFD( adjust ) | decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" ) | ||
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) | cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) | ||
return mw.ustring.toNFC( cleanup ) | return mw.ustring.toNFC( cleanup ) | ||
Line 461: | Line 438: | ||
Text.ucfirstAll = function ( adjust ) | Text.ucfirstAll = function ( adjust) | ||
-- Capitalize all words | -- Capitalize all words | ||
-- | -- Arguments: | ||
-- adjust | -- adjust = string to adjust | ||
-- Returns: string with all first letters in upper case | -- Returns: string with all first letters in upper case | ||
local r = | adjust = adjust and tostring(adjust) or "" | ||
local r = mw.text.decode(adjust,true) | |||
local i = 1 | local i = 1 | ||
local c, j, m | local c, j, m | ||
m = (r ~= adjust) | |||
r = " "..r | |||
while i do | while i do | ||
i = mw.ustring.find( r, "%W%l", i ) | i = mw.ustring.find( r, "%W%l", i ) | ||
Line 495: | Line 463: | ||
r = r:sub( 2 ) | r = r:sub( 2 ) | ||
if m then | if m then | ||
r = mw.text.encode(r) | |||
end | end | ||
return r | return r | ||
end -- Text.ucfirstAll() | end -- Text.ucfirstAll() | ||
Line 518: | Line 476: | ||
-- Returns: string with non-latin parts enclosed in <span> | -- Returns: string with non-latin parts enclosed in <span> | ||
local r | local r | ||
initLatinData() | |||
if mw.ustring.match( adjust, PatternLatin ) then | if mw.ustring.match( adjust, PatternLatin ) then | ||
-- latin only, horizontal dashes, quotes | -- latin only, horizontal dashes, quotes | ||
Line 606: | Line 564: | ||
return r | return r | ||
end -- Text.uprightNonlatin() | end -- Text.uprightNonlatin() | ||
Line 612: | Line 569: | ||
local r | local r | ||
if about == "quote" then | if about == "quote" then | ||
initQuoteData() | |||
r = { } | r = { } | ||
r.QuoteLang = QuoteLang | r.QuoteLang = QuoteLang | ||
Line 624: | Line 581: | ||
-- Export | -- Export | ||
local p = { } | local p = { } | ||
for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do | |||
p[func] = function (frame) | |||
return Text[func]( frame.args[ 1 ] or "" ) and "1" or "" | |||
end | |||
end | |||
for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do | |||
p[func] = function (frame) | |||
return Text[func]( frame.args[ 1 ] or "" ) | |||
end | |||
end | |||
function p.char( frame ) | function p.char( frame ) | ||
Line 634: | Line 603: | ||
end | end | ||
if story then | if story then | ||
local items = mw.text.split( story, "%s+" ) | local items = mw.text.split( mw.text.trim(story), "%s+" ) | ||
if #items > 0 then | if #items > 0 then | ||
local j | local j | ||
lenient = ( params.errors == | lenient = (yesNo(params.errors) == false) | ||
codes = { } | codes = { } | ||
multiple = tonumber( params[ "*" ] ) | multiple = tonumber( params[ "*" ] ) | ||
for | for _, v in ipairs( items ) do | ||
j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v) | |||
table.insert( codes, j or v ) | |||
end | |||
end | |||
end | end | ||
end | end | ||
Line 673: | Line 634: | ||
frame.args.format ) | frame.args.format ) | ||
end | end | ||
Line 770: | Line 714: | ||
tonumber( frame.args[3] ) ) | tonumber( frame.args[3] ) ) | ||
end | end | ||