Anonymous user
Module:Citation/CS1/Date validation: Difference between revisions
sync from sandbox;
Richardpruen (talk | contribs) m (1 revision imported: Template for journal ) |
(sync from sandbox;) |
||
Line 1: | Line 1: | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
]] | ]] | ||
Line 55: | Line 54: | ||
return false; -- accessdate out of range | return false; -- accessdate out of range | ||
end | end | ||
end | end | ||
Line 195: | Line 175: | ||
Function gets current year from the server and compares it to year from a citation parameter. Years more than one | Function gets current year from the server and compares it to year from a citation parameter. Years more than one | ||
year in the future are not acceptable. | year in the future are not acceptable. | ||
Special case for |pmc-embargo-date=: years more than two years in the future are not acceptable | |||
]] | ]] | ||
local function is_valid_year (year) | local function is_valid_year (year, param) | ||
if not is_set(year_limit) then | if not is_set (year_limit) then | ||
year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once | year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once | ||
end | end | ||
year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to | year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to number for the comparison; | ||
if 'pmc-embargo-date' == param then -- special case for |pmc-embargo-date= | |||
return year and (year <= tonumber(os.date("%Y"))+2) or false; -- years more than two years in the future are not accepted | |||
end | |||
return year and (year <= year_limit) or false; | return year and (year <= year_limit) or false; | ||
end | end | ||
Line 219: | Line 205: | ||
]] | ]] | ||
local function is_valid_date (year, month, day) | local function is_valid_date (year, month, day, param) | ||
local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; | local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; | ||
local month_length; | local month_length; | ||
if not is_valid_year(year) then | if not is_valid_year (year, param) then -- no farther into the future than next year except |pmc-embargo-date= no more than two years in the future | ||
return false; | return false; | ||
end | end | ||
month = tonumber(month); -- required for YYYY-MM-DD dates | month = tonumber (month); -- required for YYYY-MM-DD dates | ||
if (2 == month) then -- if February | if (2 == month) then -- if February | ||
Line 253: | Line 239: | ||
Months in a range are expected to have the same style: Jan–Mar or October–December but not February–Mar or Jul–August. | Months in a range are expected to have the same style: Jan–Mar or October–December but not February–Mar or Jul–August. | ||
This function looks in cfg.date_names{} to see if both month names are listed in the long subtable or both are | |||
listed in the short subtable. When both have the same style (both are listed in the same table), returns true; false else | |||
]] | ]] | ||
local function is_valid_month_range_style (month1, month2) | local function is_valid_month_range_style (month1, month2) | ||
if (cfg.date_names.en.long[month1] and cfg.date_names.en.long[month2]) or -- are both English names listed in the long subtable? | |||
(cfg.date_names.en.short[month1] and cfg.date_names.en.short[month2]) or -- are both English names listed in the short subtable? | |||
(cfg.date_names['local'].long[month1] and cfg.date_names['local'].long[month2]) or -- are both local names listed in the long subtable? | |||
(cfg.date_names['local'].short[month1] and cfg.date_names['local'].short[month2]) then -- are both local names listed in the short subtable? | |||
return true; | |||
end | end | ||
return false; -- names are mixed | |||
end | end | ||
Line 453: | Line 434: | ||
['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ||
['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash | ['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash | ||
['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY | ['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY | ||
} | } | ||
--[[--------------------------< I S _ V A L I D _ E M B A R G O _ D A T E >------------------------------------ | |||
returns true and date value if that value has proper dmy, mdy, ymd format. | |||
returns false and 9999 (embargoed forever) when date value is not proper format; assumes that when |pmc-embargo-date= is | |||
set, the editor intended to embargo a PMC but |pmc-embargo-date= does not hold a single date. | |||
]] | |||
local function is_valid_embargo_date (v) | |||
if v:match (patterns['ymd'][1]) or -- ymd | |||
v:match (patterns['Mdy'][1]) or -- dmy | |||
v:match (patterns['dMy'][1]) then -- mdy | |||
return true, v; | |||
end | |||
return false, '9999'; -- if here not good date so return false and set embargo date to long time in future | |||
end | |||
Line 495: | Line 494: | ||
anchor_year = year; | anchor_year = year; | ||
elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year | elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year | ||
month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]); | month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]); | ||
Line 575: | Line 569: | ||
elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash | elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash | ||
month, year, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy-y'][1]); | month, year, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy-y'][1]); | ||
if 'Winter' ~= | month = get_season_number (month, param); -- <month> can only be winter or summer; also for metadata | ||
anchor_year = year .. '–' .. anchor_year; | if (month ~= cfg.date_names['en'].season['Winter']) and (month ~= cfg.date_names['en'].season['Summer']) then | ||
return false; -- not Summer or Winter; abandon | |||
end | |||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | |||
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash | elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash | ||
Line 627: | Line 623: | ||
if in_array (param, {'date', 'publication-date', 'year'}) then | if in_array (param, {'date', 'publication-date', 'year'}) then | ||
add_prop_cat (' | add_prop_cat ('year-range-abbreviated'); | ||
end | end | ||
Line 658: | Line 654: | ||
local result=true; -- check whole dates for validity; assume true because not all dates will go through this test | local result=true; -- check whole dates for validity; assume true because not all dates will go through this test | ||
if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then -- YMD (simple whole date) | if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then -- YMD (simple whole date) | ||
result = is_valid_date(year, month, day); | result = is_valid_date (year, month, day, param); -- <param> for |pmc-embargo-date= | ||
elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 ~= day2 then -- YMD-d (day range) | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 ~= day2 then -- YMD-d (day range) | ||
result = is_valid_date(year, month, day); | result = is_valid_date (year, month, day); | ||
result = result and is_valid_date(year, month, day2); | result = result and is_valid_date (year, month, day2); | ||
elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-md (day month range) | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-md (day month range) | ||
result = is_valid_date(year, month, day); | result = is_valid_date (year, month, day); | ||
result = result and is_valid_date(year, month2, day2); | result = result and is_valid_date (year, month2, day2); | ||
elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 ~= year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-ymd (day month year range) | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 ~= year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-ymd (day month year range) | ||
Line 723: | Line 719: | ||
good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)"); | good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)"); | ||
end | end | ||
elseif 'pmc-embargo-date' == k then | elseif 'pmc-embargo-date' == k then -- if the parameter is |pmc-embargo-date= | ||
good_date = check_date (v.val, k); -- go test the date | good_date = check_date (v.val, k); -- go test the date | ||
if true == good_date then -- if the date is a valid date | if true == good_date then -- if the date is a valid date | ||
good_date, embargo_date = is_valid_embargo_date (v.val); -- is |pmc-embargo-date= date a single dmy, mdy, or ymd formatted date? yes: returns embargo; no: returns 9999 | good_date, embargo_date = is_valid_embargo_date (v.val); -- is |pmc-embargo-date= date a single dmy, mdy, or ymd formatted date? yes: returns embargo date; no: returns 9999 | ||
end | end | ||
else -- any other date-holding parameter | else -- any other date-holding parameter | ||
Line 747: | Line 743: | ||
2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx) | 2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx) | ||
the | the numeric value in <result> determines the 'output' if any from this function: | ||
0 – adds error message to error_list sequence table | 0 – adds error message to error_list sequence table | ||
1 – adds maint cat | 1 – adds maint cat | ||
Line 902: | Line 898: | ||
end | end | ||
-- yMd is not supported at en.wiki; | -- yMd is not supported at en.wiki; when yMd is supported at your wiki, uncomment the next line | ||
-- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki | -- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki | ||
if 'yMd' == format_param then -- yMd not supported at en.wiki; when yMd is supported at your wiki, remove or comment-out this line | |||
if 'yMd' == format_param then -- yMd not supported at en.wiki | |||
return; -- not a reformattable date | return; -- not a reformattable date | ||
end | end | ||
Line 922: | Line 917: | ||
}; | }; | ||
if t.a then -- if this date has an anchor year capture | if t.a then -- if this date has an anchor year capture (all convertable date formats except ymd) | ||
t.y = t.a; | if t.y2 then -- for year range date formats | ||
t.y2 = t.a; -- use the anchor year capture when reassembling the date | |||
else -- here for single date formats (except ymd) | |||
t.y = t.a; -- use the anchor year capture when reassembling the date | |||
end | |||
end | end | ||
if tonumber(t.m) then -- if raw month is a number (converting from ymd) | if tonumber(t.m) then -- if raw month is a number (converting from ymd) | ||
if 's' == mon_len then -- if we are to use abbreviated month names | if 's' == mon_len then -- if we are to use abbreviated month names | ||
t.m = cfg.date_names[' | t.m = cfg.date_names['inv_local_short'][tonumber(t.m)]; -- convert it to a month name | ||
else | else | ||
t.m = cfg.date_names[' | t.m = cfg.date_names['inv_local_long'][tonumber(t.m)]; -- convert it to a month name | ||
end | end | ||
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present | t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present | ||
elseif 'ymd' == format_param then -- when converting to ymd | elseif 'ymd' == format_param then -- when converting to ymd | ||
t.y = t.y:gsub ('%a', ''); -- strip CITREF disambiguator if present; anchor year already known so process can proceed | t.y = t.y:gsub ('%a', ''); -- strip CITREF disambiguator if present; anchor year already known so process can proceed; TODO: maint message? | ||
if 1582 > tonumber (t.y) then -- ymd format dates not allowed before 1582 | if 1582 > tonumber (t.y) then -- ymd format dates not allowed before 1582 | ||
return; | return; | ||
Line 945: | Line 944: | ||
t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic) | t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic) | ||
if 0 == t[mon] then return; end -- seasons and named dates can't be converted | if 0 == t[mon] then return; end -- seasons and named dates can't be converted | ||
t[mon] = (('s' == mon_len) and cfg.date_names[' | t[mon] = (('s' == mon_len) and cfg.date_names['inv_local_short'][t[mon]]) or cfg.date_names['inv_local_long'][t[mon]]; -- fetch month name according to length | ||
end | end | ||
end | end | ||
Line 1,032: | Line 1,031: | ||
end -- if | end -- if | ||
end -- for | end -- for | ||
return result; | return result; -- declare boolean result and done | ||
end | end | ||
Line 1,059: | Line 1,058: | ||
end | end | ||
return result; -- so we know if any hyphens were replaced | return result; -- so we know if any hyphens were replaced | ||
end | end | ||
Line 1,091: | Line 1,063: | ||
--[[-------------------------< D A T E _ N A M E _ X L A T E >------------------------------------------------ | --[[-------------------------< D A T E _ N A M E _ X L A T E >------------------------------------------------ | ||
Attempts to translate English | Attempts to translate English date names to local-language date names using names supplied by MediaWiki's | ||
date parser function. This is simple name-for-name replacement and may not work for all languages. | date parser function. This is simple name-for-name replacement and may not work for all languages. | ||
Line 1,105: | Line 1,077: | ||
local date; | local date; | ||
local sources_t = { | |||
{cfg.date_names.en.long, cfg.date_names.inv_local_long}, -- for translating long English month names to long local month names | |||
{cfg.date_names.en.short, cfg.date_names.inv_local_short}, -- short month names | |||
{cfg.date_names.en.quarter, cfg.date_names.inv_local_quarter}, -- quarter date names | |||
{cfg.date_names.en.season, cfg.date_names.inv_local_season}, -- season date nam | |||
{cfg.date_names.en.named, cfg.date_names.inv_local_named}, -- named dates | |||
} | |||
local function is_xlateable (month) -- local function to get local date name that replaces existing English-language date name | |||
for _, date_names_t in ipairs (sources_t) do -- for each sequence table in date_names_t | |||
if date_names_t[1][month] then -- if date name is English month (long or short), quarter, season or named and | |||
if date_names_t[2][date_names_t[1][month]] then -- if there is a matching local date name | |||
return date_names_t[2][date_names_t[1][month]]; -- return the local date name | |||
end | |||
end | |||
end | |||
end | |||
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set(param_val.val) then -- if the parameter has a value | if is_set(param_val.val) then -- if the parameter has a value | ||
date = param_val.val; | date = param_val.val; | ||
for month in mw.ustring.gmatch (date, '%a+') do | for month in mw.ustring.gmatch (date, '[%a ]+') do -- iterate through all date names in the date (single date or date range) | ||
if cfg.date_names.en.long[month] then | month = mw.text.trim (month); -- this because quarterly dates contain whitespace | ||
mode = 'F'; -- English name is long so use long local name | xlate = is_xlateable (month); -- get translate <month>; returns translation or nil | ||
mode = 'M'; -- English name is short so use short local name | -- if cfg.date_names.en.long[month] then -- long month dates | ||
else | -- if cfg.date_names.inv_local_long[cfg.date_names.en.long[month]] then | ||
-- xlate = cfg.date_names.inv_local_long[cfg.date_names.en.long[month]]; | |||
-- end | |||
---- mode = 'F'; -- English name is long so use long local name | |||
if mode then -- might be a season | -- elseif cfg.date_names.en.short[month] then -- short month dates | ||
-- if cfg.date_names.inv_local_short[cfg.date_names.en.short[month]] then | |||
-- xlate = cfg.date_names.inv_local_short[cfg.date_names.en.short[month]]; | |||
-- end | |||
---- mode = 'M'; -- English name is short so use short local name | |||
-- elseif cfg.date_names.en.quarter[month] then -- quarter dates | |||
-- if cfg.date_names.inv_local_quarter[cfg.date_names.en.quarter[month]] then | |||
-- xlate = cfg.date_names.inv_local_quarter[cfg.date_names.en.quarter[month]]; | |||
-- end | |||
-- elseif cfg.date_names.en.season[month] then -- season dates | |||
-- if cfg.date_names.inv_local_season[cfg.date_names.en.season[month]] then | |||
-- xlate = cfg.date_names.inv_local_season[cfg.date_names.en.season[month]]; | |||
-- end | |||
-- elseif cfg.date_names.en.named[month] then -- named dates | |||
-- if cfg.date_names.inv_local_named[cfg.date_names.en.named[month]] then | |||
-- xlate = cfg.date_names.inv_local_named[cfg.date_names.en.named[month]]; | |||
-- end | |||
-- else | |||
-- xlate=nil; -- not an English month name; could be local language month name | |||
---- mode = nil; -- not an English month name; could be local language month name or an English season name | |||
-- end | |||
if xlate then | |||
-- if mode then -- might be a season | |||
-- xlate = lang_object:formatDate(mode, '1' .. month); -- translate the month name to this local language | |||
date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation | date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation | ||
date_parameters_list[param_name].val = date; -- save the translated date | date_parameters_list[param_name].val = date; -- save the translated date | ||
Line 1,164: | Line 1,177: | ||
date_hyphen_to_dash = date_hyphen_to_dash, | date_hyphen_to_dash = date_hyphen_to_dash, | ||
date_name_xlate = date_name_xlate, | date_name_xlate = date_name_xlate, | ||
set_selected_modules = set_selected_modules | set_selected_modules = set_selected_modules | ||
} | } |