Module:Chem2: Difference between revisions

Safer nicotine wiki Tobacco Harm Reduction
Jump to navigation Jump to search
m (1 revision imported)
imported>Pppery
(Per edit request)
 
(4 intermediate revisions by 3 users not shown)
Line 2: Line 2:
local p = {} -- module's table
local p = {} -- module's table


local am = {}  -- Elements with wiki links
-- Elements with wiki links
am.H="[[Hydrogen|H]]";am.He="[[Helium|He]]";
local am = {
am.Li="[[Lithium|Li]]";am.Be="[[Beryllium|Be]]";am.B="[[Boron|B]]";am.C="[[Carbon|C]]";am.N="[[Nitrogen|N]]";am.O="[[Oxygen|O]]";am.F="[[Fluorine|F]]";am.Ne="[[Neon|Ne]]";
H = "Hydrogen",
am.Na="[[Sodium|Na]]";am.Mg="[[Magnesium|Mg]]";am.Al="[[Aluminium |Al]]";am.Si="[[Silicon|Si]]";am.P="[[Phosphorus|P]]";am.S="[[Sulfur|S]]";am.Cl="[[Chlorine|Cl]]";am.Ar="[[Argon|Ar]]";
He = "Helium",
am.K="[[Potassium|K]]";am.Ca="[[Calcium|Ca]]";am.Sc="[[Scandium|Sc]]";am.Ti="[[Titanium|Ti]]";am.V="[[Vanadium|V]]";am.Cr="[[Chromium|Cr]]";am.Mn="[[Manganese|Mn]]";am.Fe="[[Iron|Fe]]";am.Co="[[Cobalt|Co]]";am.Ni="[[Nickel|Ni]]";am.Cu="[[Copper|Cu]]";am.Zn="[[Zinc|Zn]]";am.Ga="[[Gallium|Ga]]";am.Ge="[[Germanium|Ge]]";am.As="[[Arsenic|As]]";am.Se="[[Selenium|Se]]";am.Br="[[Bromine|Br]]";am.Kr="[[Krypton|Kr]]";am.Rb="[[Rubidium|Rb]]";
Li = "Lithium",
am.Sr="[[Strontium|Sr]]";am.Y="[[Yttrium|Y]]";am.Zr="[[Zirconium|Zr]]";am.Nb="[[Niobium|Nb]]";am.Mo="[[Molybdenum|Mo]]";am.Tc="[[Technetium|Tc]]";am.Ru="[[Ruthenium|Ru]]";am.Rh="[[Rhodium|Rh]]";am.Pd="[[Palladium|Pd]]";am.Ag="[[Silver|Ag]]";am.Cd="[[Cadmium|Cd]]";am.In="[[Indium|In]]";am.Sn="[[Tin|Sn]]";am.Sb="[[Antimony|Sb]]";am.Te="[[Tellurium|Te]]";am.I="[[Iodine|I]]";am.Xe="[[Xenon|Xe]]";
Be = "Beryllium",
am.Cs="[[Caesium|Cs]]";am.Ba="[[Barium|Ba]]";am.La="[[Lanthanum|La]]";am.Ce="[[Cerium|Ce]]";am.Pr="[[Praseodymium|Pr]]";am.Nd="[[Neodymium|Nd]]";am.Pm="[[Promethium|Pm]]";am.Sm="[[Samarium|Sm]]";am.Eu="[[Europium|Eu]]";am.Gd="[[Gadolinium|Gd]]";am.Tb="[[Terbium|Tb]]";am.Dy="[[Dysprosium|Dy]]";am.Ho="[[Holmium|Ho]]";am.Er="[[Erbium|Er]]";am.Tm="[[Thulium|Tm]]";am.Yb="[[Ytterbium|Yb]]";am.Lu="[[Lutetium|Lu]]";am.Hf="[[Hafnium|Hf]]";am.Ta="[[Tantalum|Ta]]";am.W="[[Tungsten|W]]";am.Re="[[Rhenium|Re]]";am.Os="[[Osmium|Os]]";am.Ir="[[Iridium|Ir]]";am.Pt="[[Platinum|Pt]]";am.Au="[[Gold|Au]]";am.Hg="[[Mercury (element)|Hg]]";am.Tl="[[Thallium|Tl]]";am.Pb="[[Lead|Pb]]";am.Bi="[[Bismuth|Bi]]";am.Po="[[Polonium|Po]]";am.At="[[Astatine|At]]";am.Rn="[[Radon|Rn]]";
B = "Boron",
am.Fr="[[Francium|Fr]]";am.Ra="[[Radium|Ra]]";am.Ac="[[Actinium|Ac]]";am.Th="[[Thorium|Th]]";am.Pa="[[Protactinium|Pa]]";am.U="[[Uranium|U]]";am.Np="[[Neptunium|Np]]";am.Pu="[[Plutonium|Pu]]";am.Am="[[Americium|Am]]";am.Cm="[[Curium|Cm]]";am.Bk="[[Berkelium|Bk]]";am.Cf="[[Californium|Cf]]";am.Es="[[Einsteinium|Es]]";am.Fm="[[Fermium|Fm]]";am.Md="[[Mendelevium|Md]]";am.No="[[Nobelium|No]]";am.Lr="[[Lawrencium|Lr]]";am.Rf="[[Rutherfordium|Rf]]";am.Db="[[Dubnium|Db]]";am.Sg="[[Seaborgium|Sg]]";am.Bh="[[Bohrium|Bh]]";am.Hs="[[Hassium|Hs]]";am.Mt="[[Meitnerium|Mt]]";am.Ds="[[Darmstadtium|Ds]]";am.Rg="[[Roentgenium|Rg]]";am.Cp="[[Copernicium|Cp]]";am.Nh="[[Nihonium|Nh]]";am.Fl="[[Flerovium|Fl]]";am.Mc="[[Moscovium|Mc]]";am.Lv="[[Livermorium|Lv]]";am.Ts="[[Tennessine|Ts]]";am.Og="[[Oganesson|Og]]";
C = "Carbon",
N = "Nitrogen",
O = "Oxygen",
F = "Fluorine",
Ne = "Neon",
Na = "Sodium",
Mg = "Magnesium",
Al = "Aluminium",
Si = "Silicon",
P = "Phosphorus",
S = "Sulfur",
Cl = "Chlorine",
Ar = "Argon",
K = "Potassium",
Ca = "Calcium",
Sc = "Scandium",
Ti = "Titanium",
V = "Vanadium",
Cr = "Chromium",
Mn = "Manganese",
Fe = "Iron",
Co = "Cobalt",
Ni = "Nickel",
Cu = "Copper",
Zn = "Zinc",
Ga = "Gallium",
Ge = "Germanium",
As = "Arsenic",
Se = "Selenium",
Br = "Bromine",
Kr = "Krypton",
Rb = "Rubidium",
Sr = "Strontium",
Y = "Yttrium",
Zr = "Zirconium",
Nb = "Niobium",
Mo = "Molybdenum",
Tc = "Technetium",
Ru = "Ruthenium",
Rh = "Rhodium",
Pd = "Palladium",
Ag = "Silver",
Cd = "Cadmium",
In = "Indium",
Sn = "Tin",
Sb = "Antimony",
Te = "Tellurium",
I = "Iodine",
Xe = "Xenon",
Cs = "Caesium",
Ba = "Barium",
La = "Lanthanum",
Ce = "Cerium",
Pr = "Praseodymium",
Nd = "Neodymium",
Pm = "Promethium",
Sm = "Samarium",
Eu = "Europium",
Gd = "Gadolinium",
Tb = "Terbium",
Dy = "Dysprosium",
Ho = "Holmium",
Er = "Erbium",
Tm = "Thulium",
Yb = "Ytterbium",
Lu = "Lutetium",
Hf = "Hafnium",
Ta = "Tantalum",
W = "Tungsten",
Re = "Rhenium",
Os = "Osmium",
Ir = "Iridium",
Pt = "Platinum",
Au = "Gold",
Hg = "Mercury (element)",
Tl = "Thallium",
Pb = "Lead",
Bi = "Bismuth",
Po = "Polonium",
At = "Astatine",
Rn = "Radon",
Fr = "Francium",
Ra = "Radium",
Ac = "Actinium",
Th = "Thorium",
Pa = "Protactinium",
U = "Uranium",
Np = "Neptunium",
Pu = "Plutonium",
Am = "Americium",
Cm = "Curium",
Bk = "Berkelium",
Cf = "Californium",
Es = "Einsteinium",
Fm = "Fermium",
Md = "Mendelevium",
No = "Nobelium",
Lr = "Lawrencium",
Rf = "Rutherfordium",
Db = "Dubnium",
Sg = "Seaborgium",
Bh = "Bohrium",
Hs = "Hassium",
Mt = "Meitnerium",
Ds = "Darmstadtium",
Rg = "Roentgenium",
Cp = "Copernicium",
Nh = "Nihonium",
Fl = "Flerovium",
Mc = "Moscovium",
Lv = "Livermorium",
Ts = "Tennessine",
Og = "Oganesson",
-- Groups etc with element-like names
Bn = 'Benzyl group',
Bz = 'Benzoyl group',
D = 'Deuterium',
Et = 'Ethyl group',
Ln = 'Lanthanide',
Nu = 'Nucleophile',
Ph = 'Phenyl group',
R = 'Substituent',
T = 'Tritium',
Tf = 'Trifluoromethylsulfonyl group',
X = 'Halogen',
}
 
-- Groups which are redirected from their normal target if wikilinked; never
-- autolinked.
local groups = {
CH3 = 'Methyl group',
CO3 = 'Carbonate',
COOH = 'Carboxyl group',
ClO = 'Hypochlorite',
ClO2 = 'Chlorite',
ClO3 = 'Chlorate',
ClO4 = 'Perchlorate',
H2O = 'Water of crystallization',
H3O = 'Hydronium',
NH2 = 'Amine group',
NH4 = 'Ammonium',
NO3 = 'Nitrate',
PO3 = 'Phosphite',
PO4 = 'Phosphate',
SH = 'Thiol group',
SO3 = 'Sulfite',
SO4 = 'Sulfate',
SeH = 'Selenol group'
}


local T_ELEM = 0        -- token types
local T_ELEM = 0        -- token types
Line 27: Line 176:
local T_UNDERSCORE = 19  -- _{ ... }
local T_UNDERSCORE = 19  -- _{ ... }
local T_CARET = 20      -- ^{ ... }
local T_CARET = 20      -- ^{ ... }
local T_NOCHANGE = 30       -- Anything else like ☃
local T_LINKOPEN = 21    -- Opening of link, always like "[[target|" even if the source wasn't
local T_NOCHANGE = 30   -- Anything else like ☃


function su(up, down) -- like template:su
function su(up, down)
  if (down == "") then  
if up == "" then
    return "<span style=\"display:inline-block; margin-bottom:-0.3em; vertical-align:0.8em; line-height:1.2em; font-size:70%; text-align:left;\">" .. up .. "<br /></span>";
return ('<sub class="template-chem2-sub">%s</sub>'):format(down)
  else
end
     return "<span style=\"display:inline-block; margin-bottom:-0.3em; vertical-align:-0.4em; line-height:1.2em; font-size:70%; text-align:left;\">" .. up .. "<br />" .. down .. "</span>";
if down == "" then
  end
return ('<sup class="template-chem2-sup">%s</sup>'):format(up)
end
     return ('<span class="template-chem2-su"><span>%s</span><span>%s</span></span>'):format(up, down)
end
end


function DotIt()
function DotIt()
  return '&middot;'
    return '&middot;'
end
end


function item(f) -- (iterator) returns one token (type, value) at a time from the formula 'f'
function item(f) -- (iterator) returns one token (type, value) at a time from the formula 'f'
  local i = 1
    local i = 1
  local first = "true";


  return function ()
    return function ()
local t, x = nil, nil
        local t, x = nil, nil


         if (first == "true" and f:match('^[0-9]', i)) then  
         if (i == 1) and f:match('^[0-9]', i) then  
                x = f:match('^[%d.]+', i); t = T_NOCHANGE; i = i + x:len();  -- matching coefficient (need a space first)
            x = f:match('^[%d.]+', i); t = T_NOCHANGE; i = i + x:len();  -- matching coefficient (need a space first)


         elseif i <= f:len() then
         elseif i <= f:len() then
                              x = f:match('^%s+[%d.]+', i); t = T_NOCHANGE;  -- matching coefficient (need a space first)
            x = f:match('^%s+[%d.]+', i); t = T_NOCHANGE;  -- matching coefficient (need a space first)
if not x then x = f:match('^%s[+]', i); t = T_NOCHANGE; end      -- matching + (H2O + H2O)
            if not x then x = f:match('^%s[+]', i); t = T_NOCHANGE; end      -- matching + (H2O + H2O)
if not x then x = f:match('^%&%#[%w%d]+%;', i); t = T_NOCHANGE; end      -- &#...;
            if not x then x = f:match('^%&%#[%w%d]+%;', i); t = T_NOCHANGE; end      -- &#...;
if not x then x = f:match('^%<%-%>', i); t = T_ARROW_EQ; end      -- matching <->
            if not x then x = f:match('^%<%-%>', i); t = T_ARROW_EQ; end      -- matching <->
if not x then x = f:match('^%-%>', i); t = T_ARROW_R; end      -- matching ->
            if not x then x = f:match('^%-%>', i); t = T_ARROW_R; end      -- matching ->
if not x then x = f:match('^%u%l*', i); t = T_ELEM; end        -- matching symbols like Aaaaa
            if not x then x = f:match('^%u%l*', i); t = T_ELEM; end        -- matching symbols like Aaaaa
if not x then x = f:match('^%d+[+-]', i); t = T_SUF_CHARGE; end        -- matching x+, x-
            if not x then x = f:match('^%d+[+-]', i); t = T_SUF_CHARGE; end        -- matching x+, x-
if not x then x = f:match('^%d+%(%d*[+-]%)', i); t = T_SUF_CHARGE2; end        -- matching x(y+/-), x(+/-)
            if not x then x = f:match('^%d+%(%d*[+-]%)', i); t = T_SUF_CHARGE2; end        -- matching x(y+/-), x(+/-)
if not x then x = f:match('^%(%d*[+-]%)', i); t = T_CHARGE; end        -- matching (x+) (xx+), (x-) (xx-)
            if not x then x = f:match('^%(%d*[+-]%)', i); t = T_CHARGE; end        -- matching (x+) (xx+), (x-) (xx-)
if not x then x = f:match('^[%d.]+', i); t = T_NUM; end        -- matching number
            if not x then x = f:match('^[%d.]+', i); t = T_NUM; end        -- matching number
if not x then x = f:match('^[(|{|%[]', i); t = T_OPEN; end    -- matching ({[
            if not x and (f:match('^%[%[%[[^[]', i) or f:match('^%[[^[]', i)) then
if not x then x = f:match('^[)|}|%]]', i); t = T_CLOSE; end           -- matching )}]
            i = i + 1; return T_OPEN, '&#91;' end -- escape [[[X or [X (relevant to auto-linking)
if not x then x = f:match('^[+-]', i); t = T_PM_CHARGE; end       -- matching + or -
            if not x and f:sub(i, i + 1) == '[[' then
if not x then x = f:match('^%*[%d.]*H2O', i); t = T_WATER; end -- Crystal water
            x = f:match('^%[%[([^]|]*)', i) -- link target
if not x then x = f:match('^%*[%d.]*', i); t = T_CRYSTAL; end -- Crystal
            local len = x:len() + 3
if not x then x = f:match('^[\\].{%d+}', i); t = T_SPECIAL2; end -- \y{x}
            x = '[[' .. (groups[x] or am[x] or x) .. '|'  -- override link target for common groups
if not x then x = f:match('^[\\].', i); t = T_SPECIAL; end -- \x
 
if not x then x = f:match('^_{[^}]*}', i); t = T_UNDERSCORE; end -- _{...}
            if f:sub(len + i, len + i) == ']' then
if not x then x = f:match('^\^{[^}]*}', i); t = T_CARET; end -- ^{...}
            -- We're going to read the link twice, once as target and once as
if not x then x = f:match('^.', i); t = T_NOCHANGE; end  --the rest - one by one
            -- chemical markup, e.g. [[CH3]] => "[[CH3|", "CH3]]"
if x then i = i + x:len(); else i = i + 999; error("Invalid character in formula!!!!!!! : "..f) end
            i = i + 2
end
            else
         first = "false"
            i = i + len
return t, x
            end
end
            return T_LINKOPEN, x
  end
            end
            if not x then x = f:match('^[(|{|%[]', i); t = T_OPEN; end    -- matching ({[
            if not x then x = f:match('^[)|}|%]]', i); t = T_CLOSE; end   -- matching )}]
            if not x then x = f:match('^[+-]', i); t = T_PM_CHARGE; end   -- matching + or -
            if not x then x = f:match('^%*[%d.]*H2O', i); t = T_WATER; end -- Crystal water
            if not x then x = f:match('^%*[%d.]*', i); t = T_CRYSTAL; end -- Crystal
            if not x then x = f:match('^[\\].{%d+}', i); t = T_SPECIAL2; end -- \y{x}
            if not x then x = f:match('^[\\].', i); t = T_SPECIAL; end -- \x
            if not x then x = f:match('^_{[^}]*}', i); t = T_UNDERSCORE; end -- _{...}
            if not x then x = f:match('^^{[^}]*}', i); t = T_CARET; end   -- ^{...}
            if not x then x = f:match('^.', i); t = T_NOCHANGE; end  --the rest - one by one
            if x then i = i + x:len(); else i = i + 999; error("Invalid character in formula! : "..f) end
        end
         return t, x
    end
end


function p._chem(args)
function p._chem(args)
 
local f = args[1] or ''
    local f = args[1] or ''


  f = string.gsub(f, "–", "-")  -- replace – with - (hyphen not ndash)
f = mw.text.decode( f, true ) -- handle entity input (like &minus;): decode right away
  f = string.gsub(f, "−", "-")  -- replace – with - (hyphen not minus sign)
    f = string.gsub(f, "–", "-")  -- replace – with - (hyphen not ndash)
    f = string.gsub(f, "−", "-")  -- replace – with - (hyphen not minus sign)


  local sumO = 0
    local formula = ''
  local formula = ''
    local t, x
  local t, x


  local link = args['link'] or ""
    local link = args['link'] or ""
  local auto = args['auto'] or ""
    local auto = args['auto'] or ""
    local seen = {}
    local _debug = false


  if not (link == '') then formula = formula .. "[[" .. link .. "|"; end  -- wikilink start [[link|
    if not (link == '') then formula = formula .. "[[" .. link .. "|"; end  -- wikilink start [[link|
   
   
  for t, x in item(f) do  
    for t, x in item(f) do  
      if     t == T_ELEM then if (auto == '') then formula = formula .. x elseif am[x] then formula = formula .. am[x]; am[x] = x else formula = formula .. x end  
    if _debug then
      elseif t == T_COEFFICIENT then formula = formula .. x
    formula = ("%s\n* %d %s"):format(formula, t, x)
      elseif t == T_NUM   then formula = formula .. su("", x);
        elseif t == T_ELEM then
      elseif t == T_OPEN  then formula = formula .. x; sumO = sumO + 1;       -- ( {
            if (auto == '') or (not am[x]) or seen[x] then formula = formula .. x
      elseif t == T_CLOSE then formula = formula .. x; sumO = sumO -1;        -- ) }
            else formula = ("%s[[%s|%s]]"):format(formula, am[x], x); seen[x] = true
      elseif t == T_PM_CHARGE   then formula = formula .. su(string.gsub(x, "-", "−"), "");
            end
      elseif t == T_SUF_CHARGE then  
        elseif t == T_COEFFICIENT then formula = formula .. x
          formula = formula .. su(string.gsub(string.match(x, "[+-]"), "-", "−"), string.match(x, "%d+"), "");
        elseif t == T_NUM       then formula = formula .. su("", x);
      elseif t == T_SUF_CHARGE2 then  
        elseif t == T_LINKOPEN  then formula = formula .. x;     -- [[Link|
          formula = formula .. su(string.sub(string.gsub(string.match(x, "%(%d*[+-]"), "-", "−"), 2, -1), string.match(x, "%d+"))
        elseif t == T_OPEN      then formula = formula .. x;         -- ([{
      elseif t == T_CHARGE then formula = formula .. "<sup>"; if string.match(x, "%d+") then formula = formula .. string.match(x, "%d+"); end formula = formula .. string.gsub(string.match(x, "[%+-]"), "-", "−") .. "</sup>"; -- can not concatenat a nil value from string.match(x, "%d+");
        elseif t == T_CLOSE     then formula = formula .. x;         -- )]}
      elseif t == T_CRYSTAL then formula = formula .. DotIt() .. string.gsub( x, "*", '', 1 );
        elseif t == T_PM_CHARGE then formula = formula .. su(x:gsub("-", "−"), "");
      elseif t == T_SPECIAL then
        elseif t == T_SUF_CHARGE then  
          parameter = string.sub(x, 2, 2) -- x fra \x   
            formula = formula .. su(x:match("[+-]"):gsub("-", "−"), x:match("%d+"), "");
          if       parameter == "s" then formula = formula .. "−"  -- single bond
        elseif t == T_SUF_CHARGE2 then  
            formula = formula .. su(x:match("%(%d*[+-]"):gsub("-", "−"):sub(2, -1), x:match("%d+"))
        elseif t == T_CHARGE then
        formula = formula .. "<sup>"
        if x:match("%d+") then formula = formula .. x:match("%d+"); end
        formula = formula .. x:match("[%+-]"):gsub("-", "−") .. "</sup>";
        -- Cannot concatenat a nil value from x:match("%d+");
        elseif t == T_CRYSTAL then formula = formula .. DotIt() .. string.gsub( x, "*", '', 1 );
        elseif t == T_SPECIAL then
            parameter = x:sub(2, 2) -- x fra \x   
            if     parameter == "s" then formula = formula .. "−"  -- single bond
             elseif parameter == "d" then formula = formula .. "="  -- double bond
             elseif parameter == "d" then formula = formula .. "="  -- double bond
             elseif parameter == "t" then formula = formula .. "≡"  -- tripple bond
             elseif parameter == "t" then formula = formula .. "≡"  -- tripple bond
Line 120: Line 297:
             elseif parameter == "\\" then formula = formula .. "\\"  -- \
             elseif parameter == "\\" then formula = formula .. "\\"  -- \
             elseif parameter == "\'" then formula = formula .. "&#39;"  -- html-code for '
             elseif parameter == "\'" then formula = formula .. "&#39;"  -- html-code for '
          end
            end
      elseif t == T_SPECIAL2 then  -- \y{x}
        elseif t == T_SPECIAL2 then  -- \y{x}
        parameter = string.sub(x, 2, 2) -- y fra \y{x}  
            parameter = x:sub(2, 2) -- y fra \y{x}  
          if parameter  == "h" then --[[Hapticity]]
            if parameter  == "h" then --[[Hapticity]]
            if (auto == '') then formula = formula .. "η<sup>" .. string.match(x, '%d+') .. "</sup>-"
                if (auto == '') then formula = formula .. "η<sup>" .. x:match('%d+') .. "</sup>-"
              else
                else
            formula = formula .. "[[Hapticity|η<sup>" .. string.match(x, '%d+') .. "</sup>]]-"
                    formula = formula .. "[[Hapticity|η<sup>" .. x:match('%d+') .. "</sup>]]-"
            end
                end
          elseif parameter == "m" then formula = formula .. "μ<sub>" .. string.match(x, '%d+') .. "</sub>-" -- mu ([[bridging ligand]])
            elseif parameter == "m" then formula = formula .. "μ<sub>" .. x:match('%d+') .. "</sub>-" -- mu ([[bridging ligand]])
          end
            end
      elseif t == T_WATER then  
        elseif t == T_WATER then  
        if string.match(x, "^%*[%d.]") then  
            if x:match("^%*[%d.]") then  
            formula = formula .. DotIt() .. string.match(x, "%f[%.%d]%d*%.?%d*%f[^%.%d%]]") .. "H<sub>2</sub>O";
                formula = formula .. DotIt() .. x:match("%f[%.%d]%d*%.?%d*%f[^%.%d%]]") .. "H<sub>2</sub>O";
        else
            else
          formula = formula .. DotIt() .. "H<sub>2</sub>O";
                formula = formula .. DotIt() .. "H<sub>2</sub>O";
        end
            end
      elseif t == T_UNDERSCORE  then formula = formula .. su("", string.sub(x,3,-2)) -- x contains _{string}
        elseif t == T_UNDERSCORE  then formula = formula .. su("", x:gsub("-", "−"):sub(3, -2)) -- x contains _{string}
      elseif t == T_CARET then formula = formula .. su(string.sub(x,3,-2), "") -- x contains ^{string}
        elseif t == T_CARET       then formula = formula .. su(x:gsub("-", "−"):sub(3, -2), "") -- x contains ^{string}
      elseif t == T_ARROW_R then formula = formula .. " → "
        elseif t == T_ARROW_R     then formula = formula .. " → "
      elseif t == T_ARROW_EQ then formula = formula .. " ⇌ "
        elseif t == T_ARROW_EQ   then formula = formula .. " ⇌ "
      elseif t == T_NOCHANGE then formula = formula .. x;  -- The rest - everything which isn't captured by the regular expresions. E.g. wikilinks and pipes
        elseif t == T_NOCHANGE   then formula = formula .. x;  -- The rest - everything which isn't captured by the regular expresions.
   
        else error('unreachable - ???') end -- in fact, unreachable
      else error('unreachable - ???') end -- in fact, unreachable
    end


    if not (link == nil or link == '') then formula = formula .. "]]"; end  -- wikilink closing ]]
    formula = mw.getCurrentFrame():preprocess('<templatestyles src="Module:Chem2/styles.css"/>') ..
        '<span class="chemf nowrap">' .. formula .. '</span>'
    if args[2] or args[3] or args[4] then
        formula = formula .. require('Module:If preview')._warning{
'&#123;&#123;chem2&#125;&#125; was called with multiple positional arguments. It should have just one, e.g. &#123;&#123;chem2&#124;H2O&#125;&#125;.'
        }
    end
    return formula
end
end


  if not (link == nil or link == '') then formula = formula .. "]]"; end  -- wikilink closing ]]
function p.chem(frame)
 
    local args = getArgs(frame)
  return '<span class="chemf nowrap">' .. formula .. '</span>'
    return p._chem(args)
end
end


function p.chem(frame)
-- PRIVATE function to generate documentation.
local args = getArgs(frame)
function p._autodoc(frame)
return p._chem(args)
local TableTools = require('Module:TableTools') -- we don't want to load this on articles for no reason
local result = {
'===Elements and element-style symbols===\nThese may be automatically linked or used as if they were redirects.\n',
'{| class="wikitable"\n! Symbol !! Link target\n'
}
for symbol, target in TableTools.sortedPairs(am) do
result[#result + 1] = ('|-\n| %s || [[%s]]\n'):format(symbol, target)
end
result[#result + 1] = '|}\n===Groups===\nThese must be linked manually; they work as if they were redirects.\n'
result[#result + 1] = '{| class="wikitable"\n! Symbol !! Link target\n'
for symbol, target in TableTools.sortedPairs(groups) do
result[#result + 1] = ('|-\n| %s || [[%s]]\n'):format(symbol, target)
end
result[#result + 1] = '|}'
return table.concat(result)
end
end


return p
return p

Latest revision as of 23:10, 4 March 2023

Documentation for this module may be created at Module:Chem2/doc

local getArgs = require('Module:Arguments').getArgs
local p = {} -- module's table

-- Elements with wiki links
local am = {
	H = "Hydrogen",
	He = "Helium",
	Li = "Lithium",
	Be = "Beryllium",
	B = "Boron",
	C = "Carbon",
	N = "Nitrogen",
	O = "Oxygen",
	F = "Fluorine",
	Ne = "Neon",
	Na = "Sodium",
	Mg = "Magnesium",
	Al = "Aluminium",
	Si = "Silicon",
	P = "Phosphorus",
	S = "Sulfur",
	Cl = "Chlorine",
	Ar = "Argon",
	K = "Potassium",
	Ca = "Calcium",
	Sc = "Scandium",
	Ti = "Titanium",
	V = "Vanadium",
	Cr = "Chromium",
	Mn = "Manganese",
	Fe = "Iron",
	Co = "Cobalt",
	Ni = "Nickel",
	Cu = "Copper",
	Zn = "Zinc",
	Ga = "Gallium",
	Ge = "Germanium",
	As = "Arsenic",
	Se = "Selenium",
	Br = "Bromine",
	Kr = "Krypton",
	Rb = "Rubidium",
	Sr = "Strontium",
	Y = "Yttrium",
	Zr = "Zirconium",
	Nb = "Niobium",
	Mo = "Molybdenum",
	Tc = "Technetium",
	Ru = "Ruthenium",
	Rh = "Rhodium",
	Pd = "Palladium",
	Ag = "Silver",
	Cd = "Cadmium",
	In = "Indium",
	Sn = "Tin",
	Sb = "Antimony",
	Te = "Tellurium",
	I = "Iodine",
	Xe = "Xenon",
	Cs = "Caesium",
	Ba = "Barium",
	La = "Lanthanum",
	Ce = "Cerium",
	Pr = "Praseodymium",
	Nd = "Neodymium",
	Pm = "Promethium",
	Sm = "Samarium",
	Eu = "Europium",
	Gd = "Gadolinium",
	Tb = "Terbium",
	Dy = "Dysprosium",
	Ho = "Holmium",
	Er = "Erbium",
	Tm = "Thulium",
	Yb = "Ytterbium",
	Lu = "Lutetium",
	Hf = "Hafnium",
	Ta = "Tantalum",
	W = "Tungsten",
	Re = "Rhenium",
	Os = "Osmium",
	Ir = "Iridium",
	Pt = "Platinum",
	Au = "Gold",
	Hg = "Mercury (element)",
	Tl = "Thallium",
	Pb = "Lead",
	Bi = "Bismuth",
	Po = "Polonium",
	At = "Astatine",
	Rn = "Radon",
	Fr = "Francium",
	Ra = "Radium",
	Ac = "Actinium",
	Th = "Thorium",
	Pa = "Protactinium",
	U = "Uranium",
	Np = "Neptunium",
	Pu = "Plutonium",
	Am = "Americium",
	Cm = "Curium",
	Bk = "Berkelium",
	Cf = "Californium",
	Es = "Einsteinium",
	Fm = "Fermium",
	Md = "Mendelevium",
	No = "Nobelium",
	Lr = "Lawrencium",
	Rf = "Rutherfordium",
	Db = "Dubnium",
	Sg = "Seaborgium",
	Bh = "Bohrium",
	Hs = "Hassium",
	Mt = "Meitnerium",
	Ds = "Darmstadtium",
	Rg = "Roentgenium",
	Cp = "Copernicium",
	Nh = "Nihonium",
	Fl = "Flerovium",
	Mc = "Moscovium",
	Lv = "Livermorium",
	Ts = "Tennessine",
	Og = "Oganesson",
	
	-- Groups etc with element-like names
	Bn = 'Benzyl group',
	Bz = 'Benzoyl group',
	D = 'Deuterium',
	Et = 'Ethyl group',
	Ln = 'Lanthanide',
	Nu = 'Nucleophile',
	Ph = 'Phenyl group',
	R = 'Substituent',
	T = 'Tritium',
	Tf = 'Trifluoromethylsulfonyl group',
	X = 'Halogen',
}

-- Groups which are redirected from their normal target if wikilinked; never
-- autolinked.
local groups = {
	CH3 = 'Methyl group',
	CO3 = 'Carbonate',
	COOH = 'Carboxyl group',
	ClO = 'Hypochlorite',
	ClO2 = 'Chlorite',
	ClO3 = 'Chlorate',
	ClO4 = 'Perchlorate',
	H2O = 'Water of crystallization',
	H3O = 'Hydronium',
	NH2 = 'Amine group',
	NH4 = 'Ammonium',
	NO3 = 'Nitrate',
	PO3 = 'Phosphite',
	PO4 = 'Phosphate',
	SH = 'Thiol group',
	SO3 = 'Sulfite',
	SO4 = 'Sulfate',
	SeH = 'Selenol group'
}

local T_ELEM = 0         -- token types
local T_NUM = 1          -- number
local T_OPEN = 2         -- open '('
local T_CLOSE = 3        -- close ')'
local T_PM_CHARGE = 4    -- + or −
local T_WATER = 6        -- .xH2O x number
local T_CRYSTAL = 9      -- .x
local T_CHARGE = 8       -- charge (x+), (x-)
local T_SUF_CHARGE = 10  -- suffix and charge e.g. 2+ from H2+
local T_SUF_CHARGE2 = 12 -- suffix and (charge) e.g. 2(2+) from He2(2+)
local T_SPECIAL = 14     -- starting with \ e.g. \d for double bond (=)
local T_SPECIAL2 = 16    -- starting with \y{x} e.g. \i{12} for isotope with mass number 12
local T_ARROW_R = 17     -- match: ->
local T_ARROW_EQ = 18    -- match: <->
local T_UNDERSCORE = 19  -- _{ ... }
local T_CARET = 20       -- ^{ ... }
local T_LINKOPEN = 21    -- Opening of link, always like "[[target|" even if the source wasn't
local T_NOCHANGE = 30    -- Anything else like ☃

function su(up, down)
	if up == "" then
		return ('<sub class="template-chem2-sub">%s</sub>'):format(down)
	end
	if down == "" then
		return ('<sup class="template-chem2-sup">%s</sup>'):format(up)
	end
    return ('<span class="template-chem2-su"><span>%s</span><span>%s</span></span>'):format(up, down)
end

function DotIt()
    return '&middot;'
end

function item(f) -- (iterator) returns one token (type, value) at a time from the formula 'f'
    local i = 1

    return function ()
        local t, x = nil, nil

        if (i == 1) and f:match('^[0-9]', i) then 
            x = f:match('^[%d.]+', i); t = T_NOCHANGE; i = i + x:len();   -- matching coefficient (need a space first)

        elseif i <= f:len() then
            x = f:match('^%s+[%d.]+', i); t = T_NOCHANGE;  -- matching coefficient (need a space first)
            if not x then x = f:match('^%s[+]', i); t = T_NOCHANGE; end       -- matching + (H2O + H2O)
            if not x then x = f:match('^%&%#[%w%d]+%;', i); t = T_NOCHANGE; end       -- &#...;
            if not x then x = f:match('^%<%-%>', i); t = T_ARROW_EQ; end       -- matching <->
            if not x then x = f:match('^%-%>', i); t = T_ARROW_R; end       -- matching ->
            if not x then x = f:match('^%u%l*', i); t = T_ELEM; end        -- matching symbols like Aaaaa
            if not x then x = f:match('^%d+[+-]', i); t = T_SUF_CHARGE; end        -- matching x+, x-
            if not x then x = f:match('^%d+%(%d*[+-]%)', i); t = T_SUF_CHARGE2; end        -- matching x(y+/-), x(+/-)
            if not x then x = f:match('^%(%d*[+-]%)', i); t = T_CHARGE; end        -- matching (x+) (xx+), (x-) (xx-)
            if not x then x = f:match('^[%d.]+', i); t = T_NUM; end        -- matching number
            if not x and (f:match('^%[%[%[[^[]', i) or f:match('^%[[^[]', i)) then
            	i = i + 1; return T_OPEN, '&#91;' end -- escape [[[X or [X (relevant to auto-linking)
            if not x and f:sub(i, i + 1) == '[[' then
            	x = f:match('^%[%[([^]|]*)', i) -- link target
            	local len = x:len() + 3
            	x = '[[' .. (groups[x] or am[x] or x) .. '|'  -- override link target for common groups

            	if f:sub(len + i, len + i) == ']' then
            		-- We're going to read the link twice, once as target and once as
            		-- chemical markup, e.g. [[CH3]] => "[[CH3|", "CH3]]"
            		i = i + 2
            	else
            		i = i + len
            	end
            	return T_LINKOPEN, x
            end
            if not x then x = f:match('^[(|{|%[]', i); t = T_OPEN; end     -- matching ({[
            if not x then x = f:match('^[)|}|%]]', i); t = T_CLOSE; end    -- matching )}]
            if not x then x = f:match('^[+-]', i); t = T_PM_CHARGE; end    -- matching + or -
            if not x then x = f:match('^%*[%d.]*H2O', i); t = T_WATER; end -- Crystal water
            if not x then x = f:match('^%*[%d.]*', i); t = T_CRYSTAL; end  -- Crystal
            if not x then x = f:match('^[\\].{%d+}', i); t = T_SPECIAL2; end -- \y{x}
            if not x then x = f:match('^[\\].', i); t = T_SPECIAL; end -- \x
            if not x then x = f:match('^_{[^}]*}', i); t = T_UNDERSCORE; end -- _{...}
            if not x then x = f:match('^^{[^}]*}', i); t = T_CARET; end   -- ^{...}
            if not x then x = f:match('^.', i); t = T_NOCHANGE; end  --the rest - one by one
            if x then i = i + x:len(); else i = i + 999; error("Invalid character in formula! : "..f) end
        end
        return t, x
    end
end

function p._chem(args)
	
    local f = args[1] or ''

	f = mw.text.decode( f, true ) -- handle entity input (like &minus;): decode right away
    f = string.gsub(f, "–", "-")  -- replace – with - (hyphen not ndash)
    f = string.gsub(f, "−", "-")  -- replace – with - (hyphen not minus sign)

    local formula = ''
    local t, x

    local link = args['link'] or ""
    local auto = args['auto'] or ""
    local seen = {}
    local _debug = false

    if not (link == '') then formula = formula .. "[[" .. link .. "|"; end   -- wikilink start [[link|
 
    for t, x in item(f) do 
    	if _debug then
    		formula = ("%s\n* %d %s"):format(formula, t, x)
        elseif t == T_ELEM then
            if (auto == '') or (not am[x]) or seen[x] then formula = formula .. x
            else formula = ("%s[[%s|%s]]"):format(formula, am[x], x); seen[x] = true
            end
        elseif t == T_COEFFICIENT then formula = formula .. x
        elseif t == T_NUM        then formula = formula .. su("", x);
        elseif t == T_LINKOPEN   then formula = formula .. x;      -- [[Link|
        elseif t == T_OPEN       then formula = formula .. x;          -- ([{
        elseif t == T_CLOSE      then formula = formula .. x;          -- )]}
        elseif t == T_PM_CHARGE  then formula = formula .. su(x:gsub("-", "−"), "");
        elseif t == T_SUF_CHARGE then 
            formula = formula .. su(x:match("[+-]"):gsub("-", "−"), x:match("%d+"), "");
        elseif t == T_SUF_CHARGE2 then 
            formula = formula .. su(x:match("%(%d*[+-]"):gsub("-", "−"):sub(2, -1), x:match("%d+"))
        elseif t == T_CHARGE then
        	formula = formula .. "<sup>"
        	if x:match("%d+") then formula = formula .. x:match("%d+"); end
        	formula = formula .. x:match("[%+-]"):gsub("-", "−") .. "</sup>";
        	-- Cannot concatenat a nil value from x:match("%d+");
        elseif t == T_CRYSTAL then formula = formula .. DotIt() .. string.gsub( x, "*", '', 1 );
        elseif t == T_SPECIAL then
            parameter = x:sub(2, 2) -- x fra \x  
            if     parameter == "s" then formula = formula .. "−"   -- single bond
            elseif parameter == "d" then formula = formula .. "="   -- double bond
            elseif parameter == "t" then formula = formula .. "≡"   -- tripple bond
            elseif parameter == "q" then formula = formula .. "≣"   -- Quadruple bond
            elseif parameter == "h" then formula = formula .. "η"   -- η, hapticity
            elseif parameter == "*" then formula = formula .. "*"   -- *, normal *
            elseif parameter == "-" then formula = formula .. "-"   -- -
            elseif parameter == "\\" then formula = formula .. "\\"   -- \
            elseif parameter == "\'" then formula = formula .. "&#39;"   -- html-code for '
            end
        elseif t == T_SPECIAL2 then  -- \y{x}
            parameter = x:sub(2, 2) -- y fra \y{x} 
            if parameter  == "h" then --[[Hapticity]]
                if (auto == '') then formula = formula .. "η<sup>" .. x:match('%d+') .. "</sup>-"
                else
                    formula = formula .. "[[Hapticity|η<sup>" .. x:match('%d+') .. "</sup>]]-"
                end
            elseif parameter == "m" then formula = formula .. "μ<sub>" .. x:match('%d+') .. "</sub>-" -- mu ([[bridging ligand]])
            end
        elseif t == T_WATER then 
            if x:match("^%*[%d.]") then 
                formula = formula .. DotIt() .. x:match("%f[%.%d]%d*%.?%d*%f[^%.%d%]]") .. "H<sub>2</sub>O";
            else
                formula = formula .. DotIt() .. "H<sub>2</sub>O";
            end
        elseif t == T_UNDERSCORE  then formula = formula .. su("", x:gsub("-", "−"):sub(3, -2)) -- x contains _{string}
        elseif t == T_CARET       then formula = formula .. su(x:gsub("-", "−"):sub(3, -2), "") -- x contains ^{string}
        elseif t == T_ARROW_R     then formula = formula .. " → "
        elseif t == T_ARROW_EQ    then formula = formula .. " ⇌ "
        elseif t == T_NOCHANGE    then formula = formula .. x;  -- The rest - everything which isn't captured by the regular expresions.
        else error('unreachable - ???') end -- in fact, unreachable
    end

    if not (link == nil or link == '') then formula = formula .. "]]"; end   -- wikilink closing ]]
    formula = mw.getCurrentFrame():preprocess('<templatestyles src="Module:Chem2/styles.css"/>') ..
        '<span class="chemf nowrap">' .. formula .. '</span>'
    if args[2] or args[3] or args[4] then
        formula = formula .. require('Module:If preview')._warning{
'&#123;&#123;chem2&#125;&#125; was called with multiple positional arguments. It should have just one, e.g. &#123;&#123;chem2&#124;H2O&#125;&#125;.'
        }
    end
    return formula
end

function p.chem(frame)
    local args = getArgs(frame)
    return p._chem(args)
end

-- PRIVATE function to generate documentation.
function p._autodoc(frame)
	local TableTools = require('Module:TableTools') -- we don't want to load this on articles for no reason
	local result = {
		'===Elements and element-style symbols===\nThese may be automatically linked or used as if they were redirects.\n',
		'{| class="wikitable"\n! Symbol !! Link target\n'
	}
	for symbol, target in TableTools.sortedPairs(am) do
		result[#result + 1] = ('|-\n| %s || [[%s]]\n'):format(symbol, target)
	end
	result[#result + 1] = '|}\n===Groups===\nThese must be linked manually; they work as if they were redirects.\n'
	result[#result + 1] = '{| class="wikitable"\n! Symbol !! Link target\n'
	for symbol, target in TableTools.sortedPairs(groups) do
		result[#result + 1] = ('|-\n| %s || [[%s]]\n'):format(symbol, target)
	end
	result[#result + 1] = '|}'
	return table.concat(result)
end

return p