Module:Lang/data: Difference between revisions

+hno, pnb, skr as ISO 15919;
m 1 revision imported: template update
 
(3 intermediate revisions by 3 users not shown)
Line 22: Line 22:
local function key_to_lower (module, src_type)
local function key_to_lower (module, src_type)
local out = {};
local out = {};
local source = (('var_sup' == src_type) and require (module)) or mw.loadData (module); -- fetch data from this module; require() avoids metatable trap for variant data
local source_t = (('var_sup' == src_type) and require (module)) or mw.loadData (module); -- fetch data from this module; require() avoids metatable trap for variant data
 
if 'var_sup' == src_type then
if 'var_sup' == src_type then
for k, v in pairs (source) do
for k, v in pairs (source_t) do
out[k:lower()] = v; -- for variant and suppressed everything is needed
out[k:lower()] = v; -- for variant and suppressed everything is needed
end
end


elseif 'lang' == src_type and source.active then -- for ~/iana_languages (active)
elseif 'lang' == src_type and source_t.active then -- for ~/iana_languages (active)
for k, v in pairs (source.active) do
for k, v in pairs (source_t.active) do
out[k:lower()] = v[1]; -- ignore multiple names; take first name only
out[k:lower()] = v[1]; -- ignore multiple names; take first name only
end
end


elseif 'lang_dep' == src_type and source.deprecated then -- for ~/iana_languages (deprecated)
elseif 'lang_dep' == src_type and source_t.deprecated then -- for ~/iana_languages (deprecated)
for k, v in pairs (source.deprecated) do
for k, v in pairs (source_t.deprecated) do
out[k:lower()] = v[1]; -- ignore multiple names; take first name only
out[k:lower()] = v[1]; -- ignore multiple names; take first name only
end
end


else -- here for all other sources
else -- here for all other sources
for k, v in pairs (source) do
for k, v in pairs (source_t) do
out[k:lower()] = v[1]; -- ignore multiple names; take first name only
out[k:lower()] = v[1]; -- ignore multiple names; take first name only
end
end
Line 47: Line 48:


local lang_name_table_t = {
local lang_name_table_t = {
lang = key_to_lower ('Module:Language/data/iana languages', 'lang'),
lang = key_to_lower ('Module:Lang/data/iana languages', 'lang'),
lang_dep = key_to_lower ('Module:Language/data/iana languages', 'lang_dep'),
lang_dep = key_to_lower ('Module:Lang/data/iana languages', 'lang_dep'),
script = key_to_lower ('Module:Language/data/iana scripts'), -- script keys are capitalized; set to lower
script = key_to_lower ('Module:Lang/data/iana scripts'), -- script keys are capitalized; set to lower
region = key_to_lower ('Module:Language/data/iana regions'), -- region keys are uppercase; set to lower
region = key_to_lower ('Module:Lang/data/iana regions'), -- region keys are uppercase; set to lower
variant = key_to_lower ('Module:Language/data/iana variants', 'var_sup'),
variant = key_to_lower ('Module:Lang/data/iana variants', 'var_sup'),
suppressed = key_to_lower ('Module:Language/data/iana suppressed scripts', 'var_sup'), -- script keys are capitalized; set to lower
suppressed = key_to_lower ('Module:Lang/data/iana suppressed scripts', 'var_sup'), -- script keys are capitalized; set to lower
}
}


Line 118: Line 119:
------------------------------< I S O _ 6 3 9 - 1 >------------------------------------------------------------
------------------------------< I S O _ 6 3 9 - 1 >------------------------------------------------------------


["ab"] = "Abkhaz",                                                          -- to match en.wiki article name
["ca-valencia"] = "Valencian",
["ca-valencia"] = "Valencian",
["cu"] = "Church Slavonic", -- 2nd IANA name;
["cu"] = "Church Slavonic", -- 2nd IANA name;
Line 131: Line 133:
["en-us"] = "American English",
["en-us"] = "American English",
["en-za"] = "South African English",
["en-za"] = "South African English",
["fr-ca"] = "Quebec French",
["fr-gallo"] = "Gallo",
["fy"] = "West Frisian", -- Western Frisian
["fy"] = "West Frisian", -- Western Frisian
["mo"] = "Moldovan", -- Moldavian (deprecated code); to match en.wiki article title
["mo"] = "Moldovan", -- Moldavian (deprecated code); to match en.wiki article title
["nl-be"] = "Flemish", -- match MediaWiki
["nl-be"] = "Flemish", -- match MediaWiki
["oc-gascon"] = "Gascon",
["oc-provenc"] = "Provençal",
["oc-provenc"] = "Provençal",
["ps"] = "Pashto", -- Pushto
["ps"] = "Pashto", -- Pushto
["pt-br"] = "Brazilian Portuguese", -- match MediaWiki
["pt-br"] = "Brazilian Portuguese", -- match MediaWiki
["ro-md"] = "Moldovan", -- 'not deprecated' form
["ro-cyrl-md"] = "Moldovan", -- 'not deprecated' form
["tw-asante"] = "Asante Twi",
["tw-asante"] = "Asante Twi",
["ug"] = "Uyghur", -- 2nd IANA name; to match en.wiki article name


-- these ISO 639-1 language-name overrides imported from Module:Language/data/wp_languages
-- these ISO 639-1 language-name overrides imported from Module:Language/data/wp_languages (since deleted)
--<begin do-not-edit except to comment out>--
--<begin do-not-edit except to comment out>--
["av"] = "Avar", -- Avaric
["av"] = "Avar", -- Avaric
Line 177: Line 185:
["arc"] = "Imperial Aramaic (700-300 BCE)", -- Official Aramaic (700-300 BCE), Imperial Aramaic (700-300 BCE); to match en.wiki article title uses ISO639-2 'preferred' name
["arc"] = "Imperial Aramaic (700-300 BCE)", -- Official Aramaic (700-300 BCE), Imperial Aramaic (700-300 BCE); to match en.wiki article title uses ISO639-2 'preferred' name
["art"] = "constructed", -- to match en.wiki article; lowercase for category name
["art"] = "constructed", -- to match en.wiki article; lowercase for category name
["ast-es"] = "Leonese", -- ast in IANA is Asturian; Leonese is a dialect
["bea"] = "Dane-zaa", -- Beaver; to match en.wiki article title
["bha"] = "Bhariati", -- Bharia; to match en.wiki article title
["bhd"] = "Bhadarwahi", -- Bhadrawahi; to match en.wiki article title
["bhd"] = "Bhadarwahi", -- Bhadrawahi; to match en.wiki article title
["bla"] = "Blackfoot", -- Siksika; to match en.wiki article title
["bla"] = "Blackfoot", -- Siksika; to match en.wiki article title
["blc"] = "Nuxalk", -- Bella Coola; to match en.wiki article title
["bua"] = "Buryat", -- Buriat; this is a macro language; these four use wp preferred transliteration;
["bua"] = "Buryat", -- Buriat; this is a macro language; these four use wp preferred transliteration;
["bxm"] = "Mongolian Buryat", -- Mongolia Buriat; these three all redirect to Buryat
["bxm"] = "Mongolian Buryat", -- Mongolia Buriat; these three all redirect to Buryat
Line 184: Line 196:
["bxu"] = "Chinese Buryat", -- China Buriat;
["bxu"] = "Chinese Buryat", -- China Buriat;
["byr"] = "Yipma", -- Baruya, Yipma
["byr"] = "Yipma", -- Baruya, Yipma
["clm"] = "Klallam", -- Clallam; to match en.wiki article title
["egy"] = "Ancient Egyptian", -- Egyptian (Ancient); distinguish from contemporary arz: Egyptian Arabic  
["egy"] = "Ancient Egyptian", -- Egyptian (Ancient); distinguish from contemporary arz: Egyptian Arabic  
["ems"] = "Alutiiq", -- Pacific Gulf Yupik; to match en.wiki article title
["ems"] = "Alutiiq", -- Pacific Gulf Yupik; to match en.wiki article title
Line 191: Line 204:
["gsw-fr"] = "Alsatian", -- match MediaWiki
["gsw-fr"] = "Alsatian", -- match MediaWiki
["haa"] = "Hän", -- Han; to match en.wiki article title
["haa"] = "Hän", -- Han; to match en.wiki article title
["hei"] = "Heiltsuk–Oowekyala", -- Heiltsuk; to match en.wiki article title
["hmx"] = "Hmong–Mien languages", -- to match en.wiki article title (endash)
["hmx"] = "Hmong–Mien languages", -- to match en.wiki article title (endash)
["ilo"] = "Ilocano", -- Iloko; to match en.wiki article title
["ilo"] = "Ilocano", -- Iloko; to match en.wiki article title
["jam"] = "Jamaican Patois", -- Jamaican Creole English
["jam"] = "Jamaican Patois", -- Jamaican Creole English
["lij-mc"] = "Monégasque", -- Ligurian as spoken in Monaco; this one for proper tool tip; also in <article_name> table
["luo"] = "Dholuo", -- IANA (primary) /ISO 639-3: Luo (Kenya and Tanzania); IANA (secondary): Dholuo
["luo"] = "Dholuo", -- IANA (primary) /ISO 639-3: Luo (Kenya and Tanzania); IANA (secondary): Dholuo
["mhr"] = "Meadow Mari", -- Eastern Mari
["mhr"] = "Meadow Mari", -- Eastern Mari
Line 216: Line 231:
["pms"] = "Piedmontese", -- Piemontese; to match en.wiki article title
["pms"] = "Piedmontese", -- Piemontese; to match en.wiki article title
["pnb"] = "Punjabi (Western)", -- Western Panjabi; dab added to override import from ~/wp languages and distinguish pnb from pa in reverse look up tag_from_name()
["pnb"] = "Punjabi (Western)", -- Western Panjabi; dab added to override import from ~/wp languages and distinguish pnb from pa in reverse look up tag_from_name()
['qwm'] = "Cuman", -- Kuman (Russia); to match en.wiki article name
["rop"] = "Australian Kriol", -- Kriol; en.wiki article is a dab; point to correct en.wiki article
["rop"] = "Australian Kriol", -- Kriol; en.wiki article is a dab; point to correct en.wiki article
["sco-ulster"] = "Ulster Scots",
["sdo"] = "Bukar–Sadong", -- Bukar-Sadung Bidayuh; to match en.wiki article title
["sdo"] = "Bukar–Sadong", -- Bukar-Sadung Bidayuh; to match en.wiki article title
["smp"] = "Samaritan Hebrew", -- to match en.wiki article title
["stq"] = "Saterland Frisian", -- Saterfriesisch
["stq"] = "Saterland Frisian", -- Saterfriesisch
["und"] = "undetermined", -- capitalization to match existing category
["und"] = "undetermined", -- capitalization to match existing category
Line 226: Line 244:
["zxx"] = "no linguistic content", -- capitalization
["zxx"] = "no linguistic content", -- capitalization


-- these ISO 639-2, -3 language-name overrides imported from Module:Language/data/wp_languages
-- these ISO 639-2, -3 language-name overrides imported from Module:Language/data/wp_languages (since deleted)
--<begin do-not-edit except to comment out>--
--<begin do-not-edit except to comment out>--
["ace"] = "Acehnese", -- Achinese
["ace"] = "Acehnese", -- Achinese
Line 258: Line 276:
["kpo"] = "Kposo", -- Ikposo
["kpo"] = "Kposo", -- Ikposo
["krj"] = "Kinaray-a", -- Kinaray-A
["krj"] = "Kinaray-a", -- Kinaray-A
["ktz"] = "Juǀ'hoan", -- Juǀʼhoan
-- ["ktz"] = "Juǀ'hoan", -- Juǀʼhoan
["lez"] = "Lezgian", -- Lezghian
["lez"] = "Lezgian", -- Lezghian
["liv"] = "Livonian", -- Liv
["liv"] = "Livonian", -- Liv
Line 304: Line 322:
------------------------------< P R I V A T E _ U S E _ T A G S >----------------------------------------------
------------------------------< P R I V A T E _ U S E _ T A G S >----------------------------------------------


["akk-x-latbabyl"] = "Late Babylonian Akkadian",
["akk-x-midassyr"] = "Middle Assyrian Akkadian",
["akk-x-midbabyl"] = "Middle Babylonian Akkadian",
["akk-x-neoassyr"] = "Neo-Assyrian Akkadian",
["akk-x-neobabyl"] = "Neo-Babylonian Akkadian",
["akk-x-old"] = "Old Akkadian",
["akk-x-oldassyr"] = "Old Assyrian Akkadian",
["akk-x-oldbabyl"] = "Old Babylonian Akkadian",
["alg-x-proto"] = "Proto-Algonquian", -- alg in IANA is Algonquian languages
["alg-x-proto"] = "Proto-Algonquian", -- alg in IANA is Algonquian languages
["cel-x-proto"] = "Proto-Celtic", -- cel in IANA is Celtic languages
["ca-x-old"] = "Old Catalan",
["cel-x-combrit"] = "Common Brittonic", -- cel in IANA is Celtic languages
["cel-x-proto"] = "Proto-Celtic",
["egy-x-demotic"] = "Demotic Egyptian",
["egy-x-late"] = "Late Egyptian",
["egy-x-middle"] = "Middle Egyptian",
["egy-x-old"] = "Old Egyptian",
["gem-x-proto"] = "Proto-Germanic", -- gem in IANA is Germanic languages
["gem-x-proto"] = "Proto-Germanic", -- gem in IANA is Germanic languages
["gmw-x-ecg"] = "East Central German",
["gmw-x-ecg"] = "East Central German",
Line 321: Line 353:
["grk-x-proto"] = "Proto-Greek", -- grk in IANA is Greek languages
["grk-x-proto"] = "Proto-Greek", -- grk in IANA is Greek languages
["iir-x-proto"] = "Proto-Indo-Iranian", -- iir in IANA is Indo-Iranian Languages
["iir-x-proto"] = "Proto-Indo-Iranian", -- iir in IANA is Indo-Iranian Languages
["inc-x-mitanni"] = "Mitanni-Aryan", -- inc in IANA is Indic languages
["inc-x-proto"] = "Proto-Indo-Aryan",
["ine-x-anatolia"] = "Anatolian languages",
["ine-x-proto"] = "Proto-Indo-European",
["ine-x-proto"] = "Proto-Indo-European",
["ira-x-proto"] = "Proto-Iranian", -- ira in IANA is Iranian languages
["ira-x-proto"] = "Proto-Iranian", -- ira in IANA is Iranian languages
Line 326: Line 361:
["ksh-x-colog"] = "Colognian", -- en.wiki article is Colognian; ksh (Kölsch) redirects there
["ksh-x-colog"] = "Colognian", -- en.wiki article is Colognian; ksh (Kölsch) redirects there
["la-x-medieval"] = "Medieval Latin",
["la-x-medieval"] = "Medieval Latin",
["la-x-new"] = "New Latin",
["lmo-x-berg"] = "Bergamasque", -- lmo in IANA is Lombard; Bergamasque is a dialect
["lmo-x-cremish"] = "Cremish", -- lmo in IANA is Lombard; Cremish is a dialect
["lmo-x-milanese"] = "Milanese", -- lmo in IANA is Lombard; Milanese is a dialect
["mis-x-ripuar"] = "Ripuarian", -- replaces improper use of ksh in wp_languages
["mis-x-ripuar"] = "Ripuarian", -- replaces improper use of ksh in wp_languages
["prg-x-old"] = "Old Prussian",
["sem-x-ammonite"] = "Ammonite",
["sem-x-aramaic"] = "Aramaic",
["sem-x-canaan"] = "Canaanite languages",
["sem-x-dumaitic"] = "Dumaitic",
["sem-x-egurage"] = "Eastern Gurage",
["sem-x-hatran"] = "Hatran Aramaic",
["sem-x-oldsoara"] = "Old South Arabian",
["sem-x-palmyren"] = "Palmyrene Aramaic",
["sem-x-proto"] = "Proto-Semitic",
["sem-x-proto"] = "Proto-Semitic",
["sem-x-taymanit"] = "Taymanitic",
["sla-x-proto"] = "Proto-Slavic", -- sla in IANA is Slavic languages
["sla-x-proto"] = "Proto-Slavic", -- sla in IANA is Slavic languages
["yuf-x-hav"] = "Havasupai", -- IANA name for these three is Havasupai-Walapai-Yavapai
["yuf-x-hav"] = "Havasupai", -- IANA name for these three is Havasupai-Walapai-Yavapai
["yuf-x-wal"] = "Walapai",
["yuf-x-wal"] = "Walapai",
["yuf-x-yav"] = "Yavapai",
["yuf-x-yav"] = "Yavapai",
["xsc-x-pontic"] = "Pontic Scythian", -- xsc in IANA is Scythian
["xsc-x-saka"] = "Saka",
["xsc-x-sarmat"] = "Sarmatian",
}
}


Line 337: Line 389:
--[[--------------------------< A R T I C L E _ L I N K >------------------------------------------------------
--[[--------------------------< A R T I C L E _ L I N K >------------------------------------------------------


for those rare occasions when article titles don't fit with the normal '<language name>-language', this table
for those rare occasions when article titles don't fit with the normal '<language name> language', this table
maps language code to article title. Use of this table should be avoided and the use of redirects preferred as
maps language code to article title. Use of this table should be avoided and the use of redirects preferred as
that is the long-standing method of handling article names that don't fit with the normal pattern
that is the long-standing method of handling article names that don't fit with the normal pattern
Line 344: Line 396:


local article_name = {
local article_name = {
["lij"] = "Ligurian (Romance language)", -- Ligurian; see Template_talk:Lang#Ligurian_dab
['kue'] = "Kuman language (New Guinea)", -- Kuman (Papua New Guinea); to avoid Kuman dab page
["lij-mc"] = "Monégasque dialect", -- Ligurian as spoken in Monaco
['mbo'] = "Mbo language (Cameroon)", -- Mbo (Cameroon)
['mnh'] = "Mono language (Congo)", -- Mono (Democratic Republic of Congo); see Template_talk:Lang#Mono_languages
['mnh'] = "Mono language (Congo)", -- Mono (Democratic Republic of Congo); see Template_talk:Lang#Mono_languages
['mnr'] = "Mono language (California)", -- Mono (USA)
['mnr'] = "Mono language (California)", -- Mono (USA)
['mru'] = "Mono language (Cameroon)", -- Mono (Cameroon)
['mru'] = "Mono language (Cameroon)", -- Mono (Cameroon)
["xlg"] = "Ligurian (ancient language)", -- see Template_talk:Lang#Ligurian_dab
["snq"] = "Sangu language (Gabon)", -- Sangu (Gabon)
["toi"] = "Tonga language (Zambia and Zimbabwe)",                          -- Tonga (Zambia and Zimbabwe); to avoid Tonga language dab page
["vwa"] = "Awa language (China)", -- Awa (China); to avoid Awa dab page
["xlg"] = "Ligurian language (ancient)", -- see Template_talk:Lang#Ligurian_dab
["zmw"] = "Mbo language (Congo)", -- Mbo (Democratic Republic of Congo)
}
}


Line 468: Line 526:
['zh'] = 'ISO 7098 Chinese',
['zh'] = 'ISO 7098 Chinese',
['chi'] = 'ISO 7098 Chinese',
['chi'] = 'ISO 7098 Chinese',
['pny'] = 'ISO 7098 Chinese',
['cmn'] = 'ISO 7098 Chinese',
['zho'] = 'ISO 7098 Chinese',
['zho'] = 'ISO 7098 Chinese',
-- ['han'] = 'ISO 7098 Chinese', -- unicode alias of Hani? doesn't belong here? should be Hani?
-- ['han'] = 'ISO 7098 Chinese', -- unicode alias of Hani? doesn't belong here? should be Hani?
Line 591: Line 649:
['default'] = 'Hans Wehr transliteration',
['default'] = 'Hans Wehr transliteration',
},
},
['yaleko'] = {
['default'] = 'Yale romanization of Korean',
}
};
--[[--------------------------< E N G _ V A R >----------------------------------------------------------------
Used at en.wiki so that spelling of 'romanized' (US, default) can be changed to 'romanised' to match the envar
specified by a {{Use xxx English}}.
This is accomplished by setting |engvar=gb; can, should be omitted in articles that use American English; no
need for the clutter.
]]
local engvar_sel_t = { -- select either UK English or US English
['au'] = 'gb_t', -- these match IANA region codes (except in lower case)
['ca'] = 'us_t',
['gb'] = 'gb_t',
['ie'] = 'gb_t',
['in'] = 'gb_t',
['nz'] = 'gb_t',
['us'] = 'us_t', -- default engvar
['za'] = 'gb_t'
};
};


local engvar_t = {
['gb_t'] = {
['romanisz_lc'] = 'romanisation', -- lower case
['romanisz_uc'] = 'Romanisation', -- upper case
['romanisz_pt'] = 'romanised', -- past tense
},
['us_t'] = { -- default engvar
['romanisz_lc'] = 'romanization', -- lower case
['romanisz_uc'] = 'Romanization', -- upper case
['romanisz_pt'] = 'romanized', -- past tense
}
}
--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]


return
return
Line 600: Line 699:
article_name = article_name,
article_name = article_name,
engvar_t = engvar_t,
engvar_sel_t = engvar_sel_t,
lang_name_table = lang_name_table_t,
lang_name_table = lang_name_table_t,
override = override,
override = override,