@@ Line 1: / Line 1: @@
-local Text = { serial = "2017-11-01",
+local yesNo = require("Module:Yesno")
+local Text = { serial = "2022-07-21",
                 suite  = "Text" }
 --[=[
@@ Line 17: / Line 18: @@
 local SeekQuote         = false
+local function initLatinData()
+    if not RangesLatin then
+        RangesLatin = { {    7,  687 },
+                        { 7531, 7578 },
+                        { 7680, 7935 },
+                        { 8194, 8250 } }
+    end
+    if not PatternLatin then
+        local range
+        PatternLatin = "^["
+        for i = 1, #RangesLatin do
+            range = RangesLatin[ i ]
+            PatternLatin = PatternLatin ..
+                           mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
+        end    -- for i
+        PatternLatin = PatternLatin .. "]*$"
+    end
+end
+local function initQuoteData()
-local function factoryQuote()
      -- Create quote definitions
-     QuoteLang = { af        = "bd",
+     if not QuoteLang then
+    	QuoteLang =
+    	        { af        = "bd",
                    ar        = "la",
                    be        = "labd",
@@ Line 76: / Line 96: @@
                    ["zh-tw"] = "x300C",
                    ["zh-cn"] = "ld" }
-     QuoteType = { bd    = { { 8222, 8220 },  { 8218, 8217 } },
+     end
+    if not QuoteType then
+    	QuoteType =
+    	        { bd    = { { 8222, 8220 },  { 8218, 8217 } },
                    bdla  = { { 8222, 8220 },  {  171,  187 } },
                    bx    = { { 8222, 8221 },  { 8218, 8217 } },
@@ Line 89: / Line 112: @@
                    x300C = { { 0x300C, 0x300D },
                              { 0x300E, 0x300F } } }
-     return r
+     end
-end -- factoryQuote()
+end -- initQuoteData()
@@ Line 100: / Line 123: @@
      --     alien    -- string, with language code
      --     advance  -- number, with level 1 or 2
-     local r = apply
+     local r = apply and tostring(apply) or ""
+    alien = alien or "en"
+    advance = tonumber(advance) or 0
      local suite
-     if not QuoteLang then
+     initQuoteData()
-        factoryQuote()
+     local slang = alien:match( "^(%l+)-" )
-     end
+    suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
-    suite = QuoteLang[ alien ]
-    if not suite then
-        local slang = alien:match( "^(%l+)-" )
-        if slang then
-            suite = QuoteLang[ slang ]
-        end
-        if not suite then
-            suite = QuoteLang[ "en" ]
-        end
-    end
      if suite then
          local quotes = QuoteType[ suite ]
@@ Line 149: / Line 164: @@
      --     accept  -- true, if no error messages to be appended
      -- Returns: string
-     local r
+     local r = ""
-     if type( apply ) == "table" then
+     apply = type(apply) == "table" and apply or {}
-        local bad   = { }
+    again = math.floor(tonumber(again) or 1)
-        local codes = { }
+    if again < 1 then
-        local s
+    	return ""
-        for k, v in pairs( apply ) do
+    end
-            s = type( v )
+    local bad   = { }
-            if s == "number" then
+    local codes = { }
-                if v < 32  and  v ~= 9  and  v ~= 10 then
+    for _, v in ipairs( apply ) do
-                    v = tostring( v )
+    	local n = tonumber(v)
-                else
+    	if not n or (n < 32 and n ~= 9 and n ~= 10) then
-                    v = math.floor( v )
+    		table.insert(bad, tostring(v))
-                    s = false
+    	else
-                end
+    		table.insert(codes, math.floor(n))
-            elseif s ~= "string" then
+		end
-                v = tostring( v )
+    end
-            end
+    if #bad > 0 then
-            if s then
+    	if not accept then
-                table.insert( bad, v )
+    		r = tostring(  mw.html.create( "span" )
-            else
+                    		:addClass( "error" )
-                table.insert( codes, v )
+                    		:wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
-            end
+    	end
-        end -- for k, v
+    	return r
-        if #bad == 0 then
-            if #codes > 0 then
-                r = mw.ustring.char( unpack( codes ) )
-                if again then
-                    if type( again ) == "number" then
-                        local n = math.floor( again )
-                        if n > 1 then
-                            r = r:rep( n )
-                        elseif n < 1 then
-                            r = ""
-                        end
-                    else
-                        s = "bad repetitions: " .. tostring( again )
-                    end
-                end
-            end
-        else
-            s = "bad codepoints: " .. table.concat( bad, " " )
-        end
-        if s  and  not accept then
-            r = tostring(  mw.html.create( "span" )
-                                  :addClass( "error" )
-                                  :wikitext( s ) )
-        end
      end
-     return r or ""
+    if #codes > 0 then
+    	r = mw.ustring.char( unpack( codes ) )
+    	if again > 1 then
+    		r = r:rep(again)
+    	end
+	end
+     return r
 end -- Text.char()
+local function trimAndFormat(args, fmt)
+	local result = {}
+	if type(args) ~= 'table' then
+		args = {args}
+	end
+	for _, v in ipairs(args) do
+		v = mw.text.trim(tostring(v))
+		if v ~= "" then
+			table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
+		end
+	end
+	return result
+end
 Text.concatParams = function ( args, apply, adapt )
@@ Line 210: / Line 219: @@
      -- Returns: string
      local collect = { }
-     for k, v in pairs( args ) do
+     return table.concat(trimAndFormat(args,adapt), apply or "|")
-        if type( k ) == "number" then
-            v = mw.text.trim( v )
-            if v ~= "" then
-                if adapt then
-                    v = mw.ustring.format( adapt, v )
-                end
-                table.insert( collect, v )
-            end
-        end
-    end -- for k, v
-    return table.concat( collect,  apply or "|" )
 end -- Text.concatParams()
-Text.containsCJK = function ( analyse )
+Text.containsCJK = function ( s )
      -- Is any CJK code within?
      -- Parameter:
-     --     analyse  -- string
+     --     s  -- string
      -- Returns: true, if CJK detected
-     local r
+     s = s and tostring(s) or ""
      if not patternCJK then
          patternCJK = mw.ustring.char( 91,
-, 45,  40959,
+, 45,   4607,
-, 45, 178207,
+, 45,  42191,
+, 45,  43135,
+, 45,  55215,
+, 45,  64255,
+, 45,  65103,
+, 45,  65500,
+, 45, 196607,
 )
      end
-     if mw.ustring.find( analyse, patternCJK ) then
+     return mw.ustring.find( s, patternCJK ) ~= nil
-        r = true
-    else
-        r = false
-    end
-    return r
 end -- Text.containsCJK()
+Text.removeDelimited = function (s, prefix, suffix)
+	-- Remove all text in s delimited by prefix and suffix (inclusive)
+	-- Arguments:
+	--    s = string to process
+	--    prefix = initial delimiter
+	--    suffix = ending delimiter
+	-- Returns: stripped string
+	s = s and tostring(s) or ""
+	prefix = prefix and tostring(prefix) or ""
+	suffix = suffix and tostring(suffix) or ""
+	local prefixLen = mw.ustring.len(prefix)
+	local suffixLen = mw.ustring.len(suffix)
+	if prefixLen == 0 or suffixLen == 0 then
+		return s
+	end
+	local i = s:find(prefix, 1, true)
+	local r = s
+	local j
+	while i do
+		j = r:find(suffix, i + prefixLen)
+		if j then
+			r = r:sub(1, i - 1)..r:sub(j+suffixLen)
+		else
+			r = r:sub(1, i - 1)
+		end
+		i = r:find(prefix, 1, true)
+	end
+	return r
+end
 Text.getPlain = function ( adjust )
@@ Line 253: / Line 280: @@
      --     adjust  -- string
      -- Returns: string
-     local i = adjust:find( "<!--", 1, true )
+     local r = Text.removeDelimited(adjust,"<!--","-->")
-    local r = adjust
-    local j
-    while i do
-        j = r:find( "-->",  i + 3,  true )
-        if j then
-            r = r:sub( 1, i ) .. r:sub( j + 3 )
-        else
-            r = r:sub( 1, i )
-        end
-        i = r:find( "<!--", i, true )
-    end    -- "<!--"
      r = r:gsub( "(</?%l[^>]*>)", "" )
-          :gsub( "'''(.+)'''", "%1" )
+          :gsub( "'''", "" )
-          :gsub( "''(.+)''", "%1" )
+          :gsub( "''", "" )
           :gsub( "&nbsp;", " " )
      return r
 end -- Text.getPlain()
+Text.isLatinRange = function (s)
-Text.isLatinRange = function ( adjust )
      -- Are characters expected to be latin or symbols within latin texts?
-     -- Precondition:
+     -- Arguments:
-     --     adjust  -- string, or nil for initialization
+     --  s = string to analyze
      -- Returns: true, if valid for latin only
-     local r
+     s = s and tostring(s) or ""  --- ensure input is always string
-    if not RangesLatin then
+     initLatinData()
-        RangesLatin = { {    7,  687 },
+     return mw.ustring.match(s, PatternLatin) ~= nil
-                        { 7531, 7578 },
-                        { 7680, 7935 },
-                        { 8194, 8250 } }
-    end
-    if not PatternLatin then
-        local range
-        PatternLatin = "^["
-        for i = 1, #RangesLatin do
-            range = RangesLatin[ i ]
-            PatternLatin = PatternLatin ..
-                           mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
-        end    -- for i
-        PatternLatin = PatternLatin .. "]*$"
-     end
-     if adjust then
-        if mw.ustring.match( adjust, PatternLatin ) then
-            r = true
-        else
-            r = false
-        end
-    end
-    return r
 end -- Text.isLatinRange()
-Text.isQuote = function ( ask )
+Text.isQuote = function ( s )
      -- Is this character any quotation mark?
      -- Parameter:
-     --     ask  -- string, with single character
+     --     s = single character to analyze
-     -- Returns: true, if ask is quotation mark
+     -- Returns: true, if s is quotation mark
-     local r
+     s = s and tostring(s) or ""
+    if s == "" then
+    	return false
+    end
      if not SeekQuote then
          SeekQuote = mw.ustring.char(   34,       -- "
@@ Line 332: / Line 327: @@
 x300F )    -- CJK
      end
-     if ask == "" then
+     return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
-        r = false
-    elseif mw.ustring.find( SeekQuote, ask, 1, true ) then
-        r = true
-    else
-        r = false
-    end
-    return r
 end -- Text.isQuote()
@@ Line 350: / Line 338: @@
      --     adapt  -- string (optional); format including "%s"
      -- Returns: string
-     local collect = { }
+     return mw.text.listToText(trimAndFormat(args, adapt))
-    for k, v in pairs( args ) do
-        if type( k ) == "number" then
-            v = mw.text.trim( v )
-            if v ~= "" then
-                if adapt then
-                    v = mw.ustring.format( adapt, v )
-                end
-                table.insert( collect, v )
-            end
-        end
-    end -- for k, v
-    return mw.text.listToText( collect )
 end -- Text.listToText()
@@ Line 374: / Line 350: @@
      --     advance  -- number, with level 1 or 2, or nil
      -- Returns: quoted string
+    apply = apply and tostring(apply) or ""
      local mode, slang
      if type( alien ) == "string" then
@@ Line 401: / Line 378: @@
      --     advance  -- number, with level 1 or 2, or nil
      -- Returns: string; possibly quoted
-     local r = mw.text.trim( apply )
+     local r = mw.text.trim( apply and tostring(apply) or "" )
      local s = mw.ustring.sub( r, 1, 1 )
      if s ~= ""  and  not Text.isQuote( s, advance ) then
@@ Line 429: / Line 406: @@
 )
      end
-     decomposed = mw.ustring.toNFD( adjust )
+     decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
      cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
      return mw.ustring.toNFC( cleanup )
@@ Line 461: / Line 438: @@
-Text.ucfirstAll = function ( adjust )
+Text.ucfirstAll = function ( adjust)
      -- Capitalize all words
-     -- Precondition:
+     -- Arguments:
-     --     adjust  -- string
+     --     adjust = string to adjust
      -- Returns: string with all first letters in upper case
-     local r = " " .. adjust
+    adjust = adjust and tostring(adjust) or ""
+     local r = mw.text.decode(adjust,true)
      local i = 1
      local c, j, m
-     if adjust:find( "&" ) then
+     m = (r ~= adjust)
-        r = r:gsub( "&amp;",      "&#38;" )
+    r = " "..r
-             :gsub( "&lt;",       "&#60;" )
-             :gsub( "&gt;",       "&#62;" )
-             :gsub( "&nbsp;",    "&#160;" )
-             :gsub( "&thinsp;", "&#8201;" )
-             :gsub( "&zwnj;",   "&#8204;" )
-             :gsub( "&zwj;",    "&#8205;" )
-             :gsub( "&lrm;",    "&#8206;" )
-             :gsub( "&rlm;",    "&#8207;" )
-        m = true
-    end
      while i do
          i = mw.ustring.find( r, "%W%l", i )
@@ Line 495: / Line 463: @@
      r = r:sub( 2 )
      if m then
-        r = r:gsub(     "&#38;", "&amp;" )
+    	r = mw.text.encode(r)
-             :gsub(     "&#60;", "&lt;" )
-             :gsub(     "&#62;", "&gt;" )
-             :gsub(    "&#160;", "&nbsp;" )
-             :gsub(   "&#8201;", "&thinsp;" )
-             :gsub(   "&#8204;", "&zwnj;" )
-             :gsub(   "&#8205;", "&zwj;" )
-             :gsub(   "&#8206;", "&lrm;" )
-             :gsub(   "&#8207;", "&rlm;" )
-             :gsub( "&#X(%x+);", "&#x%1;" )
      end
      return r
 end -- Text.ucfirstAll()
@@ Line 518: / Line 476: @@
      -- Returns: string with non-latin parts enclosed in <span>
      local r
-     Text.isLatinRange()
+     initLatinData()
      if mw.ustring.match( adjust, PatternLatin ) then
          -- latin only, horizontal dashes, quotes
@@ Line 606: / Line 564: @@
      return r
 end -- Text.uprightNonlatin()
@@ Line 612: / Line 569: @@
      local r
      if about == "quote" then
-         factoryQuote()
+         initQuoteData()
          r = { }
          r.QuoteLang = QuoteLang
@@ Line 624: / Line 581: @@
 -- Export
 local p = { }
+for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
+	p[func] = function (frame)
+		return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
+	end
+end
+for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
+	p[func] = function (frame)
+		return Text[func]( frame.args[ 1 ] or "" )
+	end
+end
 function p.char( frame )
@@ Line 634: / Line 603: @@
      end
      if story then
-         local items = mw.text.split( story, "%s+" )
+         local items = mw.text.split( mw.text.trim(story), "%s+" )
          if #items > 0 then
              local j
-             lenient  = ( params.errors == "0" )
+             lenient  = (yesNo(params.errors) == false)
              codes    = { }
              multiple = tonumber( params[ "*" ] )
-             for k, v in pairs( items ) do
+             for _, v in ipairs( items ) do
-                if v:sub( 1, 1 ) == "x" then
+            	j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v)
-                    j = tonumber( "0" .. v )
+                 table.insert( codes,  j or v )
-                 elseif v == "" then
+             end
-                    v = false
-                else
-                    j = tonumber( v )
-                end
-                if v then
-                    table.insert( codes,  j or v )
-                end
-             end -- for k, v
          end
      end
@@ Line 673: / Line 634: @@
                                frame.args.format )
 end
-function p.containsCJK( frame )
-    return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
-end
-function p.getPlain( frame )
-    return Text.getPlain( frame.args[ 1 ] or "" )
-end
-function p.isLatinRange( frame )
-    return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""
-end
-function p.isQuote( frame )
-    return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""
-end
@@ Line 770: / Line 714: @@
                                 tonumber( frame.args[3] ) )
 end
-function p.removeDiacritics( frame )
-    return Text.removeDiacritics( frame.args[ 1 ] or "" )
-end
-function p.sentenceTerminated( frame )
-    return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
-end
-function p.ucfirstAll( frame )
-    return Text.ucfirstAll( frame.args[ 1 ] or "" )
-end
-function p.uprightNonlatin( frame )
-    return Text.uprightNonlatin( frame.args[ 1 ] or "" )
-end

Module:Text: Difference between revisions

Module:Text (view source)

Revision as of 17:43, 21 July 2022