Module:Chem2: Difference between revisions

Jump to navigation Jump to search
m (1 revision imported)
(From the sandbox; remove special recognition of OH and O2 as groups due to ambiguity)
Line 4: Line 4:
-- Elements with wiki links
-- Elements with wiki links
local am = {
local am = {
H = "[[Hydrogen|H]]",
H = "Hydrogen",
He = "[[Helium|He]]",
He = "Helium",
Li = "[[Lithium|Li]]",
Li = "Lithium",
Be = "[[Beryllium|Be]]",
Be = "Beryllium",
B = "[[Boron|B]]",
B = "Boron",
C = "[[Carbon|C]]",
C = "Carbon",
N = "[[Nitrogen|N]]",
N = "Nitrogen",
O = "[[Oxygen|O]]",
O = "Oxygen",
F = "[[Fluorine|F]]",
F = "Fluorine",
Ne = "[[Neon|Ne]]",
Ne = "Neon",
Na = "[[Sodium|Na]]",
Na = "Sodium",
Mg = "[[Magnesium|Mg]]",
Mg = "Magnesium",
Al = "[[Aluminium |Al]]",
Al = "Aluminium",
Si = "[[Silicon|Si]]",
Si = "Silicon",
P = "[[Phosphorus|P]]",
P = "Phosphorus",
S = "[[Sulfur|S]]",
S = "Sulfur",
Cl = "[[Chlorine|Cl]]",
Cl = "Chlorine",
Ar = "[[Argon|Ar]]",
Ar = "Argon",
K = "[[Potassium|K]]",
K = "Potassium",
Ca = "[[Calcium|Ca]]",
Ca = "Calcium",
Sc = "[[Scandium|Sc]]",
Sc = "Scandium",
Ti = "[[Titanium|Ti]]",
Ti = "Titanium",
V = "[[Vanadium|V]]",
V = "Vanadium",
Cr = "[[Chromium|Cr]]",
Cr = "Chromium",
Mn = "[[Manganese|Mn]]",
Mn = "Manganese",
Fe = "[[Iron|Fe]]",
Fe = "Iron",
Co = "[[Cobalt|Co]]",
Co = "Cobalt",
Ni = "[[Nickel|Ni]]",
Ni = "Nickel",
Cu = "[[Copper|Cu]]",
Cu = "Copper",
Zn = "[[Zinc|Zn]]",
Zn = "Zinc",
Ga = "[[Gallium|Ga]]",
Ga = "Gallium",
Ge = "[[Germanium|Ge]]",
Ge = "Germanium",
As = "[[Arsenic|As]]",
As = "Arsenic",
Se = "[[Selenium|Se]]",
Se = "Selenium",
Br = "[[Bromine|Br]]",
Br = "Bromine",
Kr = "[[Krypton|Kr]]",
Kr = "Krypton",
Rb = "[[Rubidium|Rb]]",
Rb = "Rubidium",
Sr = "[[Strontium|Sr]]",
Sr = "Strontium",
Y = "[[Yttrium|Y]]",
Y = "Yttrium",
Zr = "[[Zirconium|Zr]]",
Zr = "Zirconium",
Nb = "[[Niobium|Nb]]",
Nb = "Niobium",
Mo = "[[Molybdenum|Mo]]",
Mo = "Molybdenum",
Tc = "[[Technetium|Tc]]",
Tc = "Technetium",
Ru = "[[Ruthenium|Ru]]",
Ru = "Ruthenium",
Rh = "[[Rhodium|Rh]]",
Rh = "Rhodium",
Pd = "[[Palladium|Pd]]",
Pd = "Palladium",
Ag = "[[Silver|Ag]]",
Ag = "Silver",
Cd = "[[Cadmium|Cd]]",
Cd = "Cadmium",
In = "[[Indium|In]]",
In = "Indium",
Sn = "[[Tin|Sn]]",
Sn = "Tin",
Sb = "[[Antimony|Sb]]",
Sb = "Antimony",
Te = "[[Tellurium|Te]]",
Te = "Tellurium",
I = "[[Iodine|I]]",
I = "Iodine",
Xe = "[[Xenon|Xe]]",
Xe = "Xenon",
Cs = "[[Caesium|Cs]]",
Cs = "Caesium",
Ba = "[[Barium|Ba]]",
Ba = "Barium",
La = "[[Lanthanum|La]]",
La = "Lanthanum",
Ce = "[[Cerium|Ce]]",
Ce = "Cerium",
Pr = "[[Praseodymium|Pr]]",
Pr = "Praseodymium",
Nd = "[[Neodymium|Nd]]",
Nd = "Neodymium",
Pm = "[[Promethium|Pm]]",
Pm = "Promethium",
Sm = "[[Samarium|Sm]]",
Sm = "Samarium",
Eu = "[[Europium|Eu]]",
Eu = "Europium",
Gd = "[[Gadolinium|Gd]]",
Gd = "Gadolinium",
Tb = "[[Terbium|Tb]]",
Tb = "Terbium",
Dy = "[[Dysprosium|Dy]]",
Dy = "Dysprosium",
Ho = "[[Holmium|Ho]]",
Ho = "Holmium",
Er = "[[Erbium|Er]]",
Er = "Erbium",
Tm = "[[Thulium|Tm]]",
Tm = "Thulium",
Yb = "[[Ytterbium|Yb]]",
Yb = "Ytterbium",
Lu = "[[Lutetium|Lu]]",
Lu = "Lutetium",
Hf = "[[Hafnium|Hf]]",
Hf = "Hafnium",
Ta = "[[Tantalum|Ta]]",
Ta = "Tantalum",
W = "[[Tungsten|W]]",
W = "Tungsten",
Re = "[[Rhenium|Re]]",
Re = "Rhenium",
Os = "[[Osmium|Os]]",
Os = "Osmium",
Ir = "[[Iridium|Ir]]",
Ir = "Iridium",
Pt = "[[Platinum|Pt]]",
Pt = "Platinum",
Au = "[[Gold|Au]]",
Au = "Gold",
Hg = "[[Mercury (element)|Hg]]",
Hg = "Mercury (element)",
Tl = "[[Thallium|Tl]]",
Tl = "Thallium",
Pb = "[[Lead|Pb]]",
Pb = "Lead",
Bi = "[[Bismuth|Bi]]",
Bi = "Bismuth",
Po = "[[Polonium|Po]]",
Po = "Polonium",
At = "[[Astatine|At]]",
At = "Astatine",
Rn = "[[Radon|Rn]]",
Rn = "Radon",
Fr = "[[Francium|Fr]]",
Fr = "Francium",
Ra = "[[Radium|Ra]]",
Ra = "Radium",
Ac = "[[Actinium|Ac]]",
Ac = "Actinium",
Th = "[[Thorium|Th]]",
Th = "Thorium",
Pa = "[[Protactinium|Pa]]",
Pa = "Protactinium",
U = "[[Uranium|U]]",
U = "Uranium",
Np = "[[Neptunium|Np]]",
Np = "Neptunium",
Pu = "[[Plutonium|Pu]]",
Pu = "Plutonium",
Am = "[[Americium|Am]]",
Am = "Americium",
Cm = "[[Curium|Cm]]",
Cm = "Curium",
Bk = "[[Berkelium|Bk]]",
Bk = "Berkelium",
Cf = "[[Californium|Cf]]",
Cf = "Californium",
Es = "[[Einsteinium|Es]]",
Es = "Einsteinium",
Fm = "[[Fermium|Fm]]",
Fm = "Fermium",
Md = "[[Mendelevium|Md]]",
Md = "Mendelevium",
No = "[[Nobelium|No]]",
No = "Nobelium",
Lr = "[[Lawrencium|Lr]]",
Lr = "Lawrencium",
Rf = "[[Rutherfordium|Rf]]",
Rf = "Rutherfordium",
Db = "[[Dubnium|Db]]",
Db = "Dubnium",
Sg = "[[Seaborgium|Sg]]",
Sg = "Seaborgium",
Bh = "[[Bohrium|Bh]]",
Bh = "Bohrium",
Hs = "[[Hassium|Hs]]",
Hs = "Hassium",
Mt = "[[Meitnerium|Mt]]",
Mt = "Meitnerium",
Ds = "[[Darmstadtium|Ds]]",
Ds = "Darmstadtium",
Rg = "[[Roentgenium|Rg]]",
Rg = "Roentgenium",
Cp = "[[Copernicium|Cp]]",
Cp = "Copernicium",
Nh = "[[Nihonium|Nh]]",
Nh = "Nihonium",
Fl = "[[Flerovium|Fl]]",
Fl = "Flerovium",
Mc = "[[Moscovium|Mc]]",
Mc = "Moscovium",
Lv = "[[Livermorium|Lv]]",
Lv = "Livermorium",
Ts = "[[Tennessine|Ts]]",
Ts = "Tennessine",
Og = "[[Oganesson|Og]]",
Og = "Oganesson",
-- Groups etc with element-like names
Bn = 'Benzyl group',
Bz = 'Benzoyl group',
D = 'Deuterium',
Et = 'Ethyl group',
Ln = 'Lanthanide',
Nu = 'Nucleophile',
Ph = 'Phenyl group',
R = 'Substituent',
T = 'Tritium',
Tf = 'Trifluoromethylsulfonyl group',
X = 'Halogen',
}
 
-- Groups which are redirected from their normal target if wikilinked; never
-- autolinked.
local groups = {
CH3 = 'Methyl group',
CO3 = 'Carbonate',
COOH = 'Carboxyl group',
ClO = 'Hypochlorite',
ClO2 = 'Chlorite',
ClO3 = 'Chlorate',
ClO4 = 'Perchlorate',
H2O = 'Water of crystallization',
H3O = 'Hydronium',
NH2 = 'Amine group',
NH4 = 'Ammonium',
NO3 = 'Nitrate',
PO3 = 'Phosphite',
PO4 = 'Phosphate',
SH = 'Thiol group',
SO3 = 'Sulfite',
SO4 = 'Sulfate',
SeH = 'Selenol group'
}
}


Line 140: Line 176:
local T_UNDERSCORE = 19  -- _{ ... }
local T_UNDERSCORE = 19  -- _{ ... }
local T_CARET = 20      -- ^{ ... }
local T_CARET = 20      -- ^{ ... }
local T_LINKOPEN = 21    -- Opening of link, always like "[[target|" even if the source wasn't
local T_NOCHANGE = 30    -- Anything else like ☃
local T_NOCHANGE = 30    -- Anything else like ☃


Line 158: Line 195:
function item(f) -- (iterator) returns one token (type, value) at a time from the formula 'f'
function item(f) -- (iterator) returns one token (type, value) at a time from the formula 'f'
     local i = 1
     local i = 1
    local first = true


     return function ()
     return function ()
         local t, x = nil, nil
         local t, x = nil, nil


         if (first and f:match('^[0-9]', i)) then  
         if (i == 1) and f:match('^[0-9]', i) then  
             x = f:match('^[%d.]+', i); t = T_NOCHANGE; i = i + x:len();  -- matching coefficient (need a space first)
             x = f:match('^[%d.]+', i); t = T_NOCHANGE; i = i + x:len();  -- matching coefficient (need a space first)


Line 177: Line 213:
             if not x then x = f:match('^%(%d*[+-]%)', i); t = T_CHARGE; end        -- matching (x+) (xx+), (x-) (xx-)
             if not x then x = f:match('^%(%d*[+-]%)', i); t = T_CHARGE; end        -- matching (x+) (xx+), (x-) (xx-)
             if not x then x = f:match('^[%d.]+', i); t = T_NUM; end        -- matching number
             if not x then x = f:match('^[%d.]+', i); t = T_NUM; end        -- matching number
            if not x and (f:match('^%[%[%[[^[]', i) or f:match('^%[[^[]', i)) then
            i = i + 1; return T_OPEN, '[' end -- escape [[[X or [X (relevant to auto-linking)
            if not x and f:sub(i, i + 1) == '[[' then
            x = f:match('^%[%[([^]|]*)', i) -- link target
            local len = x:len() + 3
            x = '[[' .. (groups[x] or am[x] or x) .. '|'  -- override link target for common groups
            if f:sub(len + i, len + i) == ']' then
            -- We're going to read the link twice, once as target and once as
            -- chemical markup, e.g. [[CH3]] => "[[CH3|", "CH3]]"
            i = i + 2
            else
            i = i + len
            end
            return T_LINKOPEN, x
            end
             if not x then x = f:match('^[(|{|%[]', i); t = T_OPEN; end    -- matching ({[
             if not x then x = f:match('^[(|{|%[]', i); t = T_OPEN; end    -- matching ({[
             if not x then x = f:match('^[)|}|%]]', i); t = T_CLOSE; end           -- matching )}]
             if not x then x = f:match('^[)|}|%]]', i); t = T_CLOSE; end   -- matching )}]
             if not x then x = f:match('^[+-]', i); t = T_PM_CHARGE; end       -- matching + or -
             if not x then x = f:match('^[+-]', i); t = T_PM_CHARGE; end   -- matching + or -
             if not x then x = f:match('^%*[%d.]*H2O', i); t = T_WATER; end -- Crystal water
             if not x then x = f:match('^%*[%d.]*H2O', i); t = T_WATER; end -- Crystal water
             if not x then x = f:match('^%*[%d.]*', i); t = T_CRYSTAL; end -- Crystal
             if not x then x = f:match('^%*[%d.]*', i); t = T_CRYSTAL; end -- Crystal
             if not x then x = f:match('^[\\].{%d+}', i); t = T_SPECIAL2; end -- \y{x}
             if not x then x = f:match('^[\\].{%d+}', i); t = T_SPECIAL2; end -- \y{x}
             if not x then x = f:match('^[\\].', i); t = T_SPECIAL; end -- \x
             if not x then x = f:match('^[\\].', i); t = T_SPECIAL; end -- \x
             if not x then x = f:match('^_{[^}]*}', i); t = T_UNDERSCORE; end -- _{...}
             if not x then x = f:match('^_{[^}]*}', i); t = T_UNDERSCORE; end -- _{...}
             if not x then x = f:match('^\^{[^}]*}', i); t = T_CARET; end -- ^{...}
             if not x then x = f:match('^\^{[^}]*}', i); t = T_CARET; end   -- ^{...}
             if not x then x = f:match('^.', i); t = T_NOCHANGE; end  --the rest - one by one
             if not x then x = f:match('^.', i); t = T_NOCHANGE; end  --the rest - one by one
             if x then i = i + x:len(); else i = i + 999; error("Invalid character in formula! : "..f) end
             if x then i = i + x:len(); else i = i + 999; error("Invalid character in formula! : "..f) end
         end
         end
        first = false
         return t, x
         return t, x
     end
     end
Line 200: Line 251:
     f = string.gsub(f, "−", "-")  -- replace – with - (hyphen not minus sign)
     f = string.gsub(f, "−", "-")  -- replace – with - (hyphen not minus sign)


    local sumO = 0
     local formula = ''
     local formula = ''
     local t, x
     local t, x
Line 206: Line 256:
     local link = args['link'] or ""
     local link = args['link'] or ""
     local auto = args['auto'] or ""
     local auto = args['auto'] or ""
    local seen = {}
    local _debug = false


     if not (link == '') then formula = formula .. "[[" .. link .. "|"; end  -- wikilink start [[link|
     if not (link == '') then formula = formula .. "[[" .. link .. "|"; end  -- wikilink start [[link|
   
   
     for t, x in item(f) do  
     for t, x in item(f) do  
         if t == T_ELEM then
    if _debug then
              if (auto == '') then formula = formula .. x
    formula = ("%s\n* %d %s"):format(formula, t, x)
              elseif am[x] then formula = formula .. am[x]; am[x] = x
         elseif t == T_ELEM then
              else formula = formula .. x
            if (auto == '') or (not am[x]) or seen[x] then formula = formula .. x
            else formula = ("%s[[%s|%s]]"):format(formula, am[x], x); seen[x] = true
             end
             end
         elseif t == T_COEFFICIENT then formula = formula .. x
         elseif t == T_COEFFICIENT then formula = formula .. x
         elseif t == T_NUM  then formula = formula .. su("", x);
         elseif t == T_NUM  then formula = formula .. su("", x);
         elseif t == T_OPEN then formula = formula .. x; sumO = sumO + 1;       -- ( {
         elseif t == T_LINKOPEN then formula = formula .. x;     -- [[Link|
         elseif t == T_CLOSE then formula = formula .. x; sumO = sumO -1;        -- ) }
        elseif t == T_OPEN  then formula = formula .. x;         -- ([{
         elseif t == T_PM_CHARGE    then formula = formula .. su(string.gsub(x, "-", "−"), "");
         elseif t == T_CLOSE then formula = formula .. x;         -- )]}
         elseif t == T_PM_CHARGE    then formula = formula .. su(x:gsub("-", "−"), "");
         elseif t == T_SUF_CHARGE then  
         elseif t == T_SUF_CHARGE then  
             formula = formula .. su(string.gsub(string.match(x, "[+-]"), "-", "−"), string.match(x, "%d+"), "");
             formula = formula .. su(x:match("[+-]"):gsub("-", "−"), x:match("%d+"), "");
         elseif t == T_SUF_CHARGE2 then  
         elseif t == T_SUF_CHARGE2 then  
             formula = formula .. su(string.sub(string.gsub(string.match(x, "%(%d*[+-]"), "-", "−"), 2, -1), string.match(x, "%d+"))
             formula = formula .. su(x:match("%(%d*[+-]"):gsub("-", "−"):sub(2, -1), x:match("%d+"))
         elseif t == T_CHARGE then formula = formula .. "<sup>"; if string.match(x, "%d+") then formula = formula .. string.match(x, "%d+"); end formula = formula .. string.gsub(string.match(x, "[%+-]"), "-", "−") .. "</sup>"; -- can not concatenat a nil value from string.match(x, "%d+");
         elseif t == T_CHARGE then
        formula = formula .. "<sup>"
        if x:match("%d+") then formula = formula .. x:match("%d+"); end
        formula = formula .. x:match("[%+-]"):gsub("-", "−") .. "</sup>";
        -- Cannot concatenat a nil value from x:match("%d+");
         elseif t == T_CRYSTAL then formula = formula .. DotIt() .. string.gsub( x, "*", '', 1 );
         elseif t == T_CRYSTAL then formula = formula .. DotIt() .. string.gsub( x, "*", '', 1 );
         elseif t == T_SPECIAL then
         elseif t == T_SPECIAL then
             parameter = string.sub(x, 2, 2) -- x fra \x   
             parameter = x:sub(2, 2) -- x fra \x   
             if    parameter == "s" then formula = formula .. "−"  -- single bond
             if    parameter == "s" then formula = formula .. "−"  -- single bond
             elseif parameter == "d" then formula = formula .. "="  -- double bond
             elseif parameter == "d" then formula = formula .. "="  -- double bond
Line 239: Line 297:
             end
             end
         elseif t == T_SPECIAL2 then  -- \y{x}
         elseif t == T_SPECIAL2 then  -- \y{x}
             parameter = string.sub(x, 2, 2) -- y fra \y{x}  
             parameter = x:sub(2, 2) -- y fra \y{x}  
             if parameter  == "h" then --[[Hapticity]]
             if parameter  == "h" then --[[Hapticity]]
                 if (auto == '') then formula = formula .. "η<sup>" .. string.match(x, '%d+') .. "</sup>-"
                 if (auto == '') then formula = formula .. "η<sup>" .. x:match('%d+') .. "</sup>-"
                 else
                 else
                     formula = formula .. "[[Hapticity|η<sup>" .. string.match(x, '%d+') .. "</sup>]]-"
                     formula = formula .. "[[Hapticity|η<sup>" .. x:match('%d+') .. "</sup>]]-"
                 end
                 end
             elseif parameter == "m" then formula = formula .. "μ<sub>" .. string.match(x, '%d+') .. "</sub>-" -- mu ([[bridging ligand]])
             elseif parameter == "m" then formula = formula .. "μ<sub>" .. x:match('%d+') .. "</sub>-" -- mu ([[bridging ligand]])
             end
             end
         elseif t == T_WATER then  
         elseif t == T_WATER then  
             if string.match(x, "^%*[%d.]") then  
             if x:match("^%*[%d.]") then  
                 formula = formula .. DotIt() .. string.match(x, "%f[%.%d]%d*%.?%d*%f[^%.%d%]]") .. "H<sub>2</sub>O";
                 formula = formula .. DotIt() .. x:match("%f[%.%d]%d*%.?%d*%f[^%.%d%]]") .. "H<sub>2</sub>O";
             else
             else
                 formula = formula .. DotIt() .. "H<sub>2</sub>O";
                 formula = formula .. DotIt() .. "H<sub>2</sub>O";
             end   
             end   
         elseif t == T_UNDERSCORE  then formula = formula .. su("", string.sub(x,3,-2)) -- x contains _{string}
         elseif t == T_UNDERSCORE  then formula = formula .. su("", x:sub(3,-2)) -- x contains _{string}
         elseif t == T_CARET then formula = formula .. su(string.sub(x,3,-2), "") -- x contains ^{string}
         elseif t == T_CARET then formula = formula .. su(x:sub(3,-2), "") -- x contains ^{string}
         elseif t == T_ARROW_R then formula = formula .. " → "
         elseif t == T_ARROW_R then formula = formula .. " → "
         elseif t == T_ARROW_EQ then formula = formula .. " ⇌ "
         elseif t == T_ARROW_EQ then formula = formula .. " ⇌ "
         elseif t == T_NOCHANGE  then formula = formula .. x;  -- The rest - everything which isn't captured by the regular expresions. E.g. wikilinks and pipes
         elseif t == T_NOCHANGE  then formula = formula .. x;  -- The rest - everything which isn't captured by the regular expresions.
      
      
         else error('unreachable - ???') end -- in fact, unreachable
         else error('unreachable - ???') end -- in fact, unreachable
Line 267: Line 325:
     if args[2] or args[3] or args[4] then
     if args[2] or args[3] or args[4] then
         formula = formula .. require('Module:If preview')._warning{
         formula = formula .. require('Module:If preview')._warning{
            '&#123;&#123;chem2&#125;&#125; was called with multiple positional arguments. It should have just one, e.g. &#123;&#123;chem2&#124;H2O&#125;&#125;.'
'&#123;&#123;chem2&#125;&#125; was called with multiple positional arguments. It should have just one, e.g. &#123;&#123;chem2&#124;H2O&#125;&#125;.'
         }
         }
     end
     end
Line 276: Line 334:
     local args = getArgs(frame)
     local args = getArgs(frame)
     return p._chem(args)
     return p._chem(args)
end
-- PRIVATE function to generate documentation.
function p._autodoc(frame)
local TableTools = require('Module:TableTools') -- we don't want to load this on articles for no reason
local result = {
'===Elements and element-style symbols===\nThese may be automatically linked or used as if they were redirects.\n',
'{| class="wikitable"\n! Symbol !! Link target\n'
}
for symbol, target in TableTools.sortedPairs(am) do
result[#result + 1] = ('|-\n| %s || [[%s]]\n'):format(symbol, target)
end
result[#result + 1] = '|}\n===Groups===\nThese must be linked manually; they work as if they were redirects.\n'
result[#result + 1] = '{| class="wikitable"\n! Symbol !! Link target\n'
for symbol, target in TableTools.sortedPairs(groups) do
result[#result + 1] = ('|-\n| %s || [[%s]]\n'):format(symbol, target)
end
result[#result + 1] = '|}'
return table.concat(result)
end
end


return p
return p