Module:Unicode data: Difference between revisions
Richardpruen (talk | contribs) m 1 revision imported: Templates and CSS files |
Richardpruen (talk | contribs) m 1 revision imported: template update |
||
(3 intermediate revisions by 3 users not shown) | |||
Line 98: | Line 98: | ||
{ 0x30000, 0x3134A, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension G | { 0x30000, 0x3134A, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension G | ||
{ 0x31350, 0x323AF, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension H | { 0x31350, 0x323AF, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension H | ||
{ 0x2EBF0, 0x2EE5D, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension I | |||
{ 0xF0000, 0xFFFFD, "<private-use-%04X>" }, -- Plane 15 Private Use | { 0xF0000, 0xFFFFD, "<private-use-%04X>" }, -- Plane 15 Private Use | ||
{ 0x100000, 0x10FFFD, "<private-use-%04X>" } -- Plane 16 Private Use | { 0x100000, 0x10FFFD, "<private-use-%04X>" } -- Plane 16 Private Use | ||
Line 125: | Line 126: | ||
--]] | --]] | ||
function p.is_noncharacter(codepoint) | |||
function p. | |||
-- U+FDD0-U+FDEF and all code points ending in FFFE or FFFF are Unassigned | -- U+FDD0-U+FDEF and all code points ending in FFFE or FFFF are Unassigned | ||
-- (Cn) and specifically noncharacters: | -- (Cn) and specifically noncharacters: | ||
-- https://www.unicode.org/faq/private_use.html#nonchar4 | -- https://www.unicode.org/faq/private_use.html#nonchar4 | ||
return 0xFDD0 <= codepoint and (codepoint <= 0xFDEF | |||
or floor(codepoint % 0x10000) >= 0xFFFE) then | or floor(codepoint % 0x10000) >= 0xFFFE) | ||
end | |||
-- https://www.unicode.org/versions/Unicode11.0.0/ch04.pdf, section 4.8 | |||
function p.lookup_name(codepoint) | |||
if p.is_noncharacter(codepoint) then | |||
return ("<noncharacter-%04X>"):format(codepoint) | return ("<noncharacter-%04X>"):format(codepoint) | ||
end | end | ||
Line 161: | Line 167: | ||
end | end | ||
function p.lookup_image(codepoint) | function p.lookup_image(codepoint) | ||
local data = loader[('images/%03X'):format(codepoint / 0x1000)] | local data = loader[('images/%03X'):format(codepoint / 0x1000)] | ||
Line 170: | Line 174: | ||
end | end | ||
end | end | ||
local planes = { | local planes = { | ||
Line 437: | Line 440: | ||
local Latn = false | local Latn = false | ||
local i = 0; -- indexer for use in error messages | |||
for codepoint in mw.ustring.gcodepoint(str) do | for codepoint in mw.ustring.gcodepoint(str) do | ||
i = i + 1; -- bump the indexer | |||
local script = lookup_script(codepoint) | local script = lookup_script(codepoint) | ||
Line 445: | Line 450: | ||
elseif not (script == "Zyyy" or script == "Zinh" | elseif not (script == "Zyyy" or script == "Zinh" | ||
or script == "Zzzz") then | or script == "Zzzz") then | ||
return false | return false, i -- abandon as not Latn; identify the offending character's position | ||
end | end | ||
end | end | ||
return Latn | return Latn, (not Latn and i) or nil -- when <Latn> false, return offending charactor's position as second return value; nil else | ||
end | end | ||
Line 481: | Line 486: | ||
return result | return result | ||
end | end | ||
--[[--------------------------< I S _ R T L _ F R A M E >------------------------------------------------------ | |||
external entry from an {{#invoke:}} to determine if a string of text is rtl. Strips html and html-like tags so | |||
that those tags don't corrupt the is-rtl-is-not-rtl determination; this added for the cases where the rtl text | |||
has <br /> tags. | |||
]] | |||
function p.is_rtl_frame (frame) | |||
local str = frame.args[1]; -- get the string from the {{#invoke:}} frame | |||
str = str:gsub ('%b<>', ''); -- strip any html and html-like tags | |||
return p.is_rtl (str); -- return if whatever remains rtl; false else | |||
end | |||
local function get_codepoint(args, arg) | local function get_codepoint(args, arg) |