Module:Citation/CS1/Date validation: Difference between revisions
Richardpruen (talk | contribs) m 1 revision imported: Template for journal |
Richardpruen (talk | contribs) m 1 revision imported: template update |
||
(4 intermediate revisions by 3 users not shown) | |||
Line 1: | Line 1: | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
]] | ]] | ||
Line 55: | Line 54: | ||
return false; -- accessdate out of range | return false; -- accessdate out of range | ||
end | end | ||
end | end | ||
Line 195: | Line 175: | ||
Function gets current year from the server and compares it to year from a citation parameter. Years more than one | Function gets current year from the server and compares it to year from a citation parameter. Years more than one | ||
year in the future are not acceptable. | year in the future are not acceptable. | ||
Special case for |pmc-embargo-date=: years more than two years in the future are not acceptable | |||
]] | ]] | ||
local function is_valid_year (year) | local function is_valid_year (year, param) | ||
if not is_set(year_limit) then | if not is_set (year_limit) then | ||
year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once | year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once | ||
end | end | ||
year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to | year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to number for the comparison | ||
if year and (100 > year) then -- years less than 100 not supported | |||
return false; | |||
end | |||
if 'pmc-embargo-date' == param then -- special case for |pmc-embargo-date= | |||
return year and (year <= tonumber(os.date("%Y"))+2) or false; -- years more than two years in the future are not accepted | |||
end | |||
return year and (year <= year_limit) or false; | return year and (year <= year_limit) or false; | ||
end | end | ||
Line 219: | Line 208: | ||
]] | ]] | ||
local function is_valid_date (year, month, day) | local function is_valid_date (year, month, day, param) | ||
local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; | local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; | ||
local month_length; | local month_length; | ||
if not is_valid_year(year) then | if not is_valid_year (year, param) then -- no farther into the future than next year except |pmc-embargo-date= no more than two years in the future | ||
return false; | return false; | ||
end | end | ||
month = tonumber(month); -- required for YYYY-MM-DD dates | month = tonumber (month); -- required for YYYY-MM-DD dates | ||
if (2 == month) then -- if February | if (2 == month) then -- if February | ||
Line 253: | Line 242: | ||
Months in a range are expected to have the same style: Jan–Mar or October–December but not February–Mar or Jul–August. | Months in a range are expected to have the same style: Jan–Mar or October–December but not February–Mar or Jul–August. | ||
This function looks in cfg.date_names{} to see if both month names are listed in the long subtable or both are | |||
listed in the short subtable. When both have the same style (both are listed in the same table), returns true; false else | |||
]] | ]] | ||
local function is_valid_month_range_style (month1, month2) | local function is_valid_month_range_style (month1, month2) | ||
if (cfg.date_names.en.long[month1] and cfg.date_names.en.long[month2]) or -- are both English names listed in the long subtable? | |||
(cfg.date_names.en.short[month1] and cfg.date_names.en.short[month2]) or -- are both English names listed in the short subtable? | |||
(cfg.date_names['local'].long[month1] and cfg.date_names['local'].long[month2]) or -- are both local names listed in the long subtable? | |||
(cfg.date_names['local'].short[month1] and cfg.date_names['local'].short[month2]) then -- are both local names listed in the short subtable? | |||
return true; | |||
end | end | ||
return false; -- names are mixed | |||
end | end | ||
Line 340: | Line 324: | ||
local date; -- one date or first date in a range | local date; -- one date or first date in a range | ||
local date2 = ''; -- end of range date | local date2 = ''; -- end of range date | ||
input.year = tonumber (input.year) or lang_object:parseFormattedNumber (input.year); -- language-aware tonumber() | |||
input.year2 = tonumber (input.year2) or lang_object:parseFormattedNumber (input.year2); -- COinS dates are pseudo-ISO 8601 so convert to Arabic numerals | |||
if ((1582 == input.year) and (10 > tonumber(input.month))) or (1582 > input.year) then -- if a Julian calendar date | |||
tCOinS_date.rftdate = tostring (input.year); -- &rft.date gets year only | |||
return; -- done | |||
end | end | ||
-- | -- here for all forms of Gregorian dates | ||
if 20 < tonumber (input.month) then -- if season, quarter, or proper-name date | |||
if | date = input.year; -- &rft.date gets year only | ||
date = input.year; | |||
if 0 ~= input.year2 and input.year ~= input.year2 then -- if a range, only the second year portion when not the same as range start year | if 0 ~= input.year2 and input.year ~= input.year2 then -- if a range, only the second year portion when not the same as range start year | ||
date = string.format ('%.4d/%.4d', | date = string.format ('%.4d/%.4d', input.year, input.year2) -- assemble the date range | ||
end | end | ||
local season = {[24] = 'winter', [21] = 'spring', [22] = 'summer', [23] = 'fall', [33] = '1', [34] = '2', [35] = '3', [36] = '4', [98] = 'Easter', [99] = 'Christmas'}; -- seasons lowercase, no autumn; proper-names use title case | |||
if 0 == input.month2 then -- single season, quarter, or proper-name date | |||
if 40 < tonumber(input.month) then | |||
tCOinS_date.rftchron = season[input.month]; -- proper-name date; used in journal metadata only | |||
elseif 30 < tonumber(input.month) then | |||
tCOinS_date.rftquarter = season[input.month]; -- quarter date; used in journal metadata only | |||
else | |||
tCOinS_date.rftssn = season[input.month]; -- season date; used in journal metadata only | |||
end | |||
else -- season ranges are lumped into &rft.chron; &rft.ssn and &rft.quarter are left blank | |||
if input.year ~= input.year2 then -- season year – season year range or season year–year | |||
if 0 ~= input.month2 then | |||
tCOinS_date.rftchron = string.format ('%s %s – %s %s', season[input.month], input.year, season[input.month2], input.year2); -- used in journal metadata only | |||
end | end | ||
else -- season–season year range | |||
tCOinS_date.rftchron = season[input.month] .. '–' .. season[input.month2]; -- season–season year range; used in journal metadata only | |||
end | end | ||
end | end | ||
tCOinS_date.rftdate = date; | |||
tCOinS_date.rftdate = tostring (date); | |||
return; -- done | return; -- done | ||
end | end | ||
-- here for gregorian calendar dates | |||
if 0 ~= input.day then | if 0 ~= input.day then | ||
date = string.format ('%s-%.2d-%.2d', input.year, tonumber(input.month), tonumber(input.day)); -- whole date | date = string.format ('%s-%.2d-%.2d', input.year, tonumber(input.month), tonumber(input.day)); -- whole date | ||
Line 406: | Line 384: | ||
--[[--------------------------< P A T T E R N S > | --[[--------------------------< P A T T E R N S _ T >---------------------------------------------------------- | ||
this is the list of patterns for date formats that this module recognizes. Approximately the first half of these | this is the list of patterns for date formats that this module recognizes. Approximately the first half of these | ||
Line 418: | Line 396: | ||
]] | ]] | ||
local | local patterns_t = { | ||
-- year-initial numerical year-month-day | -- year-initial numerical year-month-day | ||
['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, | ['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, | ||
Line 453: | Line 431: | ||
['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ||
['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash | ['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash | ||
['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY | ['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY | ||
} | } | ||
--[[--------------------------< I S _ V A L I D _ E M B A R G O _ D A T E >------------------------------------ | |||
returns true and date value if that value has proper dmy, mdy, ymd format. | |||
returns false and 9999 (embargoed forever) when date value is not proper format; assumes that when |pmc-embargo-date= is | |||
set, the editor intended to embargo a PMC but |pmc-embargo-date= does not hold a single date. | |||
]] | |||
local function is_valid_embargo_date (v) | |||
if v:match (patterns_t['ymd'][1]) or -- ymd | |||
v:match (patterns_t['Mdy'][1]) or -- dmy | |||
v:match (patterns_t['dMy'][1]) then -- mdy | |||
return true, v; | |||
end | |||
return false, '9999'; -- if here not good date so return false and set embargo date to long time in future | |||
end | |||
Line 490: | Line 486: | ||
local coins_date; | local coins_date; | ||
if date_string:match ( | if date_string:match (patterns_t['ymd'][1]) then -- year-initial numerical year month day format | ||
year, month, day = date_string:match ( | year, month, day = date_string:match (patterns_t['ymd'][1]); | ||
if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or 0 == tonumber(day) then return false; end -- month or day number not valid or not Gregorian calendar | if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or 0 == tonumber(day) then return false; end -- month or day number not valid or not Gregorian calendar | ||
anchor_year = year; | anchor_year = year; | ||
elseif mw.ustring.match(date_string, patterns_t['Mdy'][1]) then -- month-initial: month day, year | |||
month, day, anchor_year, year = mw.ustring.match(date_string, patterns_t['Mdy'][1]); | |||
elseif mw.ustring.match(date_string, | |||
month, day, anchor_year, year = mw.ustring.match(date_string, | |||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['Md-dy'][1]) then -- month-initial day range: month day–day, year; days are separated by endash | ||
month, day, day2, anchor_year, year = mw.ustring.match(date_string, | month, day, day2, anchor_year, year = mw.ustring.match(date_string, patterns_t['Md-dy'][1]); | ||
if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | ||
month = get_month_number (month); | month = get_month_number (month); | ||
Line 513: | Line 504: | ||
year2 = year; | year2 = year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['dMy'][1]) then -- day-initial: day month year | ||
day, month, anchor_year, year = mw.ustring.match(date_string, | day, month, anchor_year, year = mw.ustring.match(date_string, patterns_t['dMy'][1]); | ||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
--[[ NOT supported at en.wiki | --[[ NOT supported at en.wiki | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['yMd'][1]) then -- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed | ||
anchor_year, year, month, day = mw.ustring.match(date_string, | anchor_year, year, month, day = mw.ustring.match(date_string, patterns_t['yMd'][1]); | ||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
-- end NOT supported at en.wiki ]] | -- end NOT supported at en.wiki ]] | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['d-dMy'][1]) then -- day-range-initial: day–day month year; days are separated by endash | ||
day, day2, month, anchor_year, year = mw.ustring.match(date_string, | day, day2, month, anchor_year, year = mw.ustring.match(date_string, patterns_t['d-dMy'][1]); | ||
if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | ||
month = get_month_number (month); | month = get_month_number (month); | ||
Line 533: | Line 524: | ||
year2 = year; | year2 = year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['dM-dMy'][1]) then -- day initial month-day-range: day month - day month year; uses spaced endash | ||
day, month, day2, month2, anchor_year, year = mw.ustring.match(date_string, | day, month, day2, month2, anchor_year, year = mw.ustring.match(date_string, patterns_t['dM-dMy'][1]); | ||
if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; | if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; | ||
month = get_month_number (month); -- for metadata | month = get_month_number (month); -- for metadata | ||
Line 540: | Line 531: | ||
year2 = year; | year2 = year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['Md-Mdy'][1]) then -- month initial month-day-range: month day – month day, year; uses spaced endash | ||
month, day, month2, day2, anchor_year, year = mw.ustring.match(date_string, | month, day, month2, day2, anchor_year, year = mw.ustring.match(date_string, patterns_t['Md-Mdy'][1]); | ||
if (not is_valid_month_season_range(month, month2, param)) or not is_valid_year(year) then return false; end | if (not is_valid_month_season_range(month, month2, param)) or not is_valid_year(year) then return false; end | ||
month = get_month_number (month); -- for metadata | month = get_month_number (month); -- for metadata | ||
Line 547: | Line 538: | ||
year2 = year; | year2 = year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['dMy-dMy'][1]) then -- day initial month-day-year-range: day month year - day month year; uses spaced endash | ||
day, month, year, day2, month2, anchor_year, year2 = mw.ustring.match(date_string, | day, month, year, day2, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns_t['dMy-dMy'][1]); | ||
if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | ||
Line 555: | Line 546: | ||
if 0 == month or 0 == month2 then return false; end -- both must be valid | if 0 == month or 0 == month2 then return false; end -- both must be valid | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['Mdy-Mdy'][1]) then -- month initial month-day-year-range: month day, year – month day, year; uses spaced endash | ||
month, day, year, month2, day2, anchor_year, year2 = mw.ustring.match(date_string, | month, day, year, month2, day2, anchor_year, year2 = mw.ustring.match(date_string, patterns_t['Mdy-Mdy'][1]); | ||
if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | ||
Line 563: | Line 554: | ||
if 0 == month or 0 == month2 then return false; end -- both must be valid | if 0 == month or 0 == month2 then return false; end -- both must be valid | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['Sy4-y2'][1]) then -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash | ||
local century; | local century; | ||
month, year, century, anchor_year, year2 = mw.ustring.match(date_string, | month, year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns_t['Sy4-y2'][1]); | ||
if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer | if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer | ||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | ||
Line 573: | Line 564: | ||
month = get_season_number(month, param); | month = get_season_number(month, param); | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash | ||
month, year, anchor_year, year2 = mw.ustring.match(date_string, | month, year, anchor_year, year2 = mw.ustring.match(date_string, patterns_t['Sy-y'][1]); | ||
if 'Winter' ~= | month = get_season_number (month, param); -- <month> can only be winter or summer; also for metadata | ||
anchor_year = year .. '–' .. anchor_year; | if (month ~= cfg.date_names['en'].season['Winter']) and (month ~= cfg.date_names['en'].season['Summer']) then | ||
return false; -- not Summer or Winter; abandon | |||
end | |||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | |||
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash | ||
month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, | month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns_t['My-My'][1]); | ||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | ||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
Line 596: | Line 589: | ||
end | end | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['M-My'][1]) then -- month/season range year; months separated by endash | ||
month, month2, anchor_year, year = mw.ustring.match(date_string, | month, month2, anchor_year, year = mw.ustring.match(date_string, patterns_t['M-My'][1]); | ||
if (not is_valid_month_season_range(month, month2, param)) or (not is_valid_year(year)) then return false; end | if (not is_valid_month_season_range(month, month2, param)) or (not is_valid_year(year)) then return false; end | ||
if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season | if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season | ||
Line 609: | Line 602: | ||
year2 = year; | year2 = year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['My'][1]) then -- month/season/quarter/proper-name year | ||
month, anchor_year, year = mw.ustring.match(date_string, | month, anchor_year, year = mw.ustring.match(date_string, patterns_t['My'][1]); | ||
if not is_valid_year(year) then return false; end | if not is_valid_year(year) then return false; end | ||
month = get_element_number(month, param); -- get month season quarter proper-name number or nil | month = get_element_number(month, param); -- get month season quarter proper-name number or nil | ||
if not month then return false; end -- not valid whatever it is | if not month then return false; end -- not valid whatever it is | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ||
year, anchor_year, year2 = mw.ustring.match(date_string, | year, anchor_year, year2 = mw.ustring.match(date_string, patterns_t['y-y'][1]); | ||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years | anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years | ||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns_t['y4-y2'][1]) then -- Year range: YYYY–YY; separated by unspaced endash | ||
local century; | local century; | ||
year, century, anchor_year, year2 = mw.ustring.match(date_string, | year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns_t['y4-y2'][1]); | ||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years | anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years | ||
if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003 | if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003 | ||
year2 = century .. year2; | year2 = century .. year2; -- add the century to year2 for comparisons | ||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif mw.ustring.match(date_string, | if in_array (param, {'date', 'publication-date', 'year'}) then -- here when 'valid' abbreviated year range; if one of these parameters | ||
anchor_year, year = mw.ustring.match(date_string, | add_prop_cat ('year-range-abbreviated'); -- add properties cat | ||
end | |||
elseif mw.ustring.match(date_string, patterns_t['y'][1]) then -- year; here accept either YYY or YYYY | |||
anchor_year, year = mw.ustring.match(date_string, patterns_t['y'][1]); | |||
if false == is_valid_year(year) then | if false == is_valid_year(year) then | ||
return false; | return false; | ||
Line 645: | Line 638: | ||
end | end | ||
if 'access-date' == param then -- test | if param ~= 'date' then -- CITEREF disambiguation only allowed in |date=; |year= & |publication-date= promote to date | ||
if anchor_year:match ('%l$') then | |||
return false; | |||
end | |||
end | |||
if 'access-date' == param then -- test access-date here because we have numerical date parts | |||
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required | if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required | ||
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; | 0 == year2 and 0 == month2 and 0 == day2 then -- none of these; access-date must not be a range | ||
if not is_valid_accessdate(year .. '-' .. month .. '-' .. day) then | if not is_valid_accessdate(year .. '-' .. month .. '-' .. day) then | ||
return false; -- return false when | return false; -- return false when access-date out of bounds | ||
end | end | ||
else | else | ||
return false; -- return false when | return false; -- return false when access-date is a range of two dates | ||
end | |||
end | |||
if 'archive-date' == param then -- test archive-date here because we have numerical date parts | |||
if not (0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required | |||
0 == year2 and 0 == month2 and 0 == day2) then -- none of these; archive-date must not be a range | |||
return false; -- return false when archive-date is a range of two dates | |||
end | end | ||
end | end | ||
Line 658: | Line 664: | ||
local result=true; -- check whole dates for validity; assume true because not all dates will go through this test | local result=true; -- check whole dates for validity; assume true because not all dates will go through this test | ||
if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then -- YMD (simple whole date) | if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then -- YMD (simple whole date) | ||
result = is_valid_date(year, month, day); | result = is_valid_date (year, month, day, param); -- <param> for |pmc-embargo-date= | ||
elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 ~= day2 then -- YMD-d (day range) | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 ~= day2 then -- YMD-d (day range) | ||
result = is_valid_date(year, month, day); | result = is_valid_date (year, month, day); | ||
result = result and is_valid_date(year, month, day2); | result = result and is_valid_date (year, month, day2); | ||
elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-md (day month range) | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-md (day month range) | ||
result = is_valid_date(year, month, day); | result = is_valid_date (year, month, day); | ||
result = result and is_valid_date(year, month2, day2); | result = result and is_valid_date (year, month2, day2); | ||
elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 ~= year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-ymd (day month year range) | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 ~= year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-ymd (day month year range) | ||
Line 723: | Line 729: | ||
good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)"); | good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)"); | ||
end | end | ||
elseif 'pmc-embargo-date' == k then | elseif 'pmc-embargo-date' == k then -- if the parameter is |pmc-embargo-date= | ||
good_date = check_date (v.val, k); -- go test the date | good_date = check_date (v.val, k); -- go test the date | ||
if true == good_date then -- if the date is a valid date | if true == good_date then -- if the date is a valid date | ||
good_date, embargo_date = is_valid_embargo_date (v.val); -- is |pmc-embargo-date= date a single dmy, mdy, or ymd formatted date? yes: returns embargo; no: returns 9999 | good_date, embargo_date = is_valid_embargo_date (v.val); -- is |pmc-embargo-date= date a single dmy, mdy, or ymd formatted date? yes: returns embargo date; no: returns 9999 | ||
end | end | ||
else -- any other date-holding parameter | else -- any other date-holding parameter | ||
Line 737: | Line 743: | ||
end | end | ||
return anchor_year, embargo_date; -- and done | return anchor_year, embargo_date; -- and done | ||
end | |||
--[[--------------------------< Y E A R _ C H E C K >---------------------------------------------------------- | |||
Temporary function to test |year= for acceptable values: | |||
YYY, YYYY, year-only ranges, their circa forms, with or without CITEREF disambiguators. | |||
When |year= holds some form of date that is not one of these year-only dates, emit a maintenance message. | |||
This function necessary because many non-cs1|2 templates have a |year= parameter so cirrus searches are more-or- | |||
less useless | |||
]] | |||
local function year_check (year) | |||
year = year:gsub ('c%. *', ''); -- remove circa annotation (if present) before testing <year> | |||
for _, index in ipairs ({'y-y', 'y4-y2', 'y'}) do -- spin through these indexes into patterns_t | |||
if mw.ustring.match (year, patterns_t[index][1]) then | |||
return; -- if a match then |year= holds a valid 'year' | |||
end | |||
end | |||
set_message ('maint_year'); -- if here, |year= value is not an accepted value; add a maint cat | |||
end | end | ||
Line 747: | Line 778: | ||
2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx) | 2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx) | ||
the | the numeric value in <result> determines the 'output' if any from this function: | ||
0 – adds error message to error_list sequence table | 0 – adds error message to error_list sequence table | ||
1 – adds maint cat | 1 – adds maint cat | ||
Line 810: | Line 841: | ||
format string used by string.format() | format string used by string.format() | ||
identifier letters ('d', 'm', 'y', 'd2', 'm2', 'y2') that serve as indexes into a table t{} that holds captures | identifier letters ('d', 'm', 'y', 'd2', 'm2', 'y2') that serve as indexes into a table t{} that holds captures | ||
from mw.ustring.match() for the various date parts specified by | from mw.ustring.match() for the various date parts specified by patterns_t[pattern_idx][1] | ||
Items in | Items in patterns_t{} have the general form: | ||
['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, where: | ['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, where: | ||
['ymd'] is pattern_idx | ['ymd'] is pattern_idx | ||
patterns_t['ymd'][1] is the match pattern with captures for mw.ustring.match() | |||
patterns_t['ymd'][2] is an indicator letter identifying the content of the first capture | |||
patterns_t['ymd'][3] ... the second capture etc. | |||
when a pattern matches a date, the captures are loaded into table t{} in capture order using the idemtifier | when a pattern matches a date, the captures are loaded into table t{} in capture order using the idemtifier | ||
Line 826: | Line 857: | ||
format_param set to the desired format. This function loads table t{} as described and then calls string.format() | format_param set to the desired format. This function loads table t{} as described and then calls string.format() | ||
with the format string specified by re_format[pattern_idx][format_param][1] using values taken from t{} according | with the format string specified by re_format[pattern_idx][format_param][1] using values taken from t{} according | ||
to the capture identifier letters specified by | to the capture identifier letters specified by patterns_t[pattern_idx][format_param][n] where n is 2.. | ||
]] | ]] | ||
Line 891: | Line 922: | ||
local function reformatter (date, pattern_idx, format_param, mon_len) | local function reformatter (date, pattern_idx, format_param, mon_len) | ||
if not in_array (pattern_idx, {'ymd', 'Mdy', 'Md-dy', 'dMy', 'yMd', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then | if not in_array (pattern_idx, {'ymd', 'Mdy', 'Md-dy', 'dMy', 'yMd', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then | ||
return; -- not in this set of date format | return; -- not in this set of date format patterns_t then not a reformattable date | ||
end | end | ||
Line 902: | Line 933: | ||
end | end | ||
-- yMd is not supported at en.wiki; | -- yMd is not supported at en.wiki; when yMd is supported at your wiki, uncomment the next line | ||
-- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki | -- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki | ||
if 'yMd' == format_param then -- yMd not supported at en.wiki; when yMd is supported at your wiki, remove or comment-out this line | |||
if 'yMd' == format_param then -- yMd not supported at en.wiki | |||
return; -- not a reformattable date | return; -- not a reformattable date | ||
end | end | ||
local c1, c2, c3, c4, c5, c6, c7; -- these hold the captures specified in | local c1, c2, c3, c4, c5, c6, c7; -- these hold the captures specified in patterns_t[pattern_idx][1] | ||
c1, c2, c3, c4, c5, c6, c7 = mw.ustring.match (date, | c1, c2, c3, c4, c5, c6, c7 = mw.ustring.match (date, patterns_t[pattern_idx][1]); -- get the captures | ||
local t = { -- table that holds k/v pairs of date parts from the captures and | local t = { -- table that holds k/v pairs of date parts from the captures and patterns_t[pattern_idx][2..] | ||
[ | [patterns_t[pattern_idx][2]] = c1; -- at minimum there is always one capture with a matching indicator letter | ||
[ | [patterns_t[pattern_idx][3] or 'x'] = c2; -- patterns_t can have a variable number of captures; each capture requires an indicator letter; | ||
[ | [patterns_t[pattern_idx][4] or 'x'] = c3; -- where there is no capture, there is no indicator letter so n in patterns_t[pattern_idx][n] will be nil; | ||
[ | [patterns_t[pattern_idx][5] or 'x'] = c4; -- the 'x' here spoofs an indicator letter to prevent 'table index is nil' error | ||
[ | [patterns_t[pattern_idx][6] or 'x'] = c5; | ||
[ | [patterns_t[pattern_idx][7] or 'x'] = c6; | ||
[ | [patterns_t[pattern_idx][8] or 'x'] = c7; | ||
}; | }; | ||
if t.a then -- if this date has an anchor year capture | if t.a then -- if this date has an anchor year capture (all convertable date formats except ymd) | ||
t.y = t.a; | if t.y2 then -- for year range date formats | ||
t.y2 = t.a; -- use the anchor year capture when reassembling the date | |||
else -- here for single date formats (except ymd) | |||
t.y = t.a; -- use the anchor year capture when reassembling the date | |||
end | |||
end | end | ||
if tonumber(t.m) then -- if raw month is a number (converting from ymd) | if tonumber(t.m) then -- if raw month is a number (converting from ymd) | ||
if 's' == mon_len then -- if we are to use abbreviated month names | if 's' == mon_len then -- if we are to use abbreviated month names | ||
t.m = cfg.date_names[' | t.m = cfg.date_names['inv_local_short'][tonumber(t.m)]; -- convert it to a month name | ||
else | else | ||
t.m = cfg.date_names[' | t.m = cfg.date_names['inv_local_long'][tonumber(t.m)]; -- convert it to a month name | ||
end | end | ||
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present | t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present | ||
elseif 'ymd' == format_param then -- when converting to ymd | elseif 'ymd' == format_param then -- when converting to ymd | ||
t.y = t.y:gsub ('%a', ''); -- strip CITREF disambiguator if present; anchor year already known so process can proceed | t.y = t.y:gsub ('%a', ''); -- strip CITREF disambiguator if present; anchor year already known so process can proceed; TODO: maint message? | ||
if 1582 > tonumber (t.y) then -- ymd format dates not allowed before 1582 | if 1582 > tonumber (t.y) then -- ymd format dates not allowed before 1582 | ||
return; | return; | ||
Line 945: | Line 979: | ||
t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic) | t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic) | ||
if 0 == t[mon] then return; end -- seasons and named dates can't be converted | if 0 == t[mon] then return; end -- seasons and named dates can't be converted | ||
t[mon] = (('s' == mon_len) and cfg.date_names[' | t[mon] = (('s' == mon_len) and cfg.date_names['inv_local_short'][t[mon]]) or cfg.date_names['inv_local_long'][t[mon]]; -- fetch month name according to length | ||
end | end | ||
end | end | ||
Line 1,015: | Line 1,049: | ||
if is_set (param_val.val) then -- if the parameter has a value | if is_set (param_val.val) then -- if the parameter has a value | ||
if not (not all and in_array (param_name, {'access-date', 'archive-date'})) then -- skip access- or archive-date unless format is xxx-all; yeah, ugly; TODO: find a better way | if not (not all and in_array (param_name, {'access-date', 'archive-date'})) then -- skip access- or archive-date unless format is xxx-all; yeah, ugly; TODO: find a better way | ||
for pattern_idx, pattern in pairs ( | for pattern_idx, pattern in pairs (patterns_t) do | ||
if mw.ustring.match (param_val.val, pattern[1]) then | if mw.ustring.match (param_val.val, pattern[1]) then | ||
if all and in_array (param_name, {'access-date', 'archive-date'}) then -- if this date is an access- or archive-date | if all and in_array (param_name, {'access-date', 'archive-date'}) then -- if this date is an access- or archive-date | ||
Line 1,026: | Line 1,060: | ||
date_parameters_list[param_name].val = new_date; -- update date in date list | date_parameters_list[param_name].val = new_date; -- update date in date list | ||
result = true; -- and announce that changes have been made | result = true; -- and announce that changes have been made | ||
break; | |||
end | end | ||
end -- if | end -- if | ||
Line 1,032: | Line 1,067: | ||
end -- if | end -- if | ||
end -- for | end -- for | ||
return result; | return result; -- declare boolean result and done | ||
end | end | ||
Line 1,050: | Line 1,085: | ||
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set (param_val.val) and | if is_set (param_val.val) and | ||
not mw.ustring.match (param_val.val, | not mw.ustring.match (param_val.val, patterns_t.ymd[1]) then -- for those that are not ymd dates (ustring because here digits may not be Western) | ||
param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash | param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash | ||
if 0 ~= n then | if 0 ~= n then | ||
Line 1,059: | Line 1,094: | ||
end | end | ||
return result; -- so we know if any hyphens were replaced | return result; -- so we know if any hyphens were replaced | ||
end | end | ||
Line 1,091: | Line 1,099: | ||
--[[-------------------------< D A T E _ N A M E _ X L A T E >------------------------------------------------ | --[[-------------------------< D A T E _ N A M E _ X L A T E >------------------------------------------------ | ||
Attempts to translate English | Attempts to translate English date names to local-language date names using names supplied by MediaWiki's | ||
date parser function. This is simple name-for-name replacement and may not work for all languages. | date parser function. This is simple name-for-name replacement and may not work for all languages. | ||
Line 1,105: | Line 1,113: | ||
local date; | local date; | ||
local sources_t = { | |||
{cfg.date_names.en.long, cfg.date_names.inv_local_long}, -- for translating long English month names to long local month names | |||
{cfg.date_names.en.short, cfg.date_names.inv_local_short}, -- short month names | |||
{cfg.date_names.en.quarter, cfg.date_names.inv_local_quarter}, -- quarter date names | |||
{cfg.date_names.en.season, cfg.date_names.inv_local_season}, -- season date nam | |||
{cfg.date_names.en.named, cfg.date_names.inv_local_named}, -- named dates | |||
} | |||
local function is_xlateable (month) -- local function to get local date name that replaces existing English-language date name | |||
for _, date_names_t in ipairs (sources_t) do -- for each sequence table in date_names_t | |||
if date_names_t[1][month] then -- if date name is English month (long or short), quarter, season or named and | |||
if date_names_t[2][date_names_t[1][month]] then -- if there is a matching local date name | |||
return date_names_t[2][date_names_t[1][month]]; -- return the local date name | |||
end | |||
end | |||
end | |||
end | |||
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set(param_val.val) then -- if the parameter has a value | if is_set(param_val.val) then -- if the parameter has a value | ||
date = param_val.val; | date = param_val.val; | ||
for month in mw.ustring.gmatch (date, '%a+') do | for month in mw.ustring.gmatch (date, '[%a ]+') do -- iterate through all date names in the date (single date or date range) | ||
month = mw.text.trim (month); -- this because quarterly dates contain whitespace | |||
xlate = is_xlateable (month); -- get translate <month>; returns translation or nil | |||
if xlate then | |||
if | |||
date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation | date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation | ||
date_parameters_list[param_name].val = date; -- save the translated date | date_parameters_list[param_name].val = date; -- save the translated date | ||
Line 1,152: | Line 1,172: | ||
cfg = cfg_table_ptr; -- import tables from selected Module:Citation/CS1/Configuration | cfg = cfg_table_ptr; -- import tables from selected Module:Citation/CS1/Configuration | ||
end | |||
--[[--------------------------< A R C H I V E _ D A T E _ C H E C K >------------------------------------------ | |||
Compare value in |archive-date= with the timestamp in Wayback machine urls. Emits an error message with suggested | |||
date from the |archive-url= timestamp in an appropriate format when the value in |archive-date= does not match | |||
the timestamp. | |||
this function never called when any date in a cs1|2 template has errors | |||
error message suggests new |archive-date= value in an appropriate format specified by <df>. <df> is either | |||
|df= or cfg.global_df in that order. If <df> is nil, suggested date has format from |archive-date=. There is | |||
a caveat: when |df=dmy or |df=mdy, the reformatter leaves |access-date= and |archive-date= formats as they are. | |||
The error message suggested date is passed to the formatter as YYYY-MM-DD so when |df=dmy or |df=mdy, the format | |||
is not changed. | |||
]] | |||
local function archive_date_check (archive_date, archive_url_timestamp, df) | |||
local archive_date_format = 'dmy-y'; -- holds the date format of date in |archive-date; default to ymd; 'dmy' used here to spoof reformat_dates() | |||
for _, v_t in ipairs ({{'dMy', 'dmy-all'}, {'Mdy', 'mdy-all'}}) do -- is |archive-date= format dmy or mdy? | |||
if archive_date:match (patterns_t[v_t[1]][1]) then -- does the pattern match? | |||
archive_date_format = cfg.keywords_xlate[v_t[2]]; -- get appropriate |df= supported keyword from the i18n translator table | |||
break; | |||
end | |||
end | |||
local dates_t = {}; | |||
dates_t['archive-date'] = {val=archive_date, name=''}; -- setup to call reformat_dates(); never called when errors so <name> unset as not needed | |||
reformat_dates (dates_t, 'dmy-y'); -- reformat |archive-date= to ymd; 'dmy' used here to spoof reformat_dates() | |||
local archive_url_date = archive_url_timestamp:gsub ('(%d%d%d%d)(%d%d)(%d%d)%d*', '%1-%2-%3'); -- make ymd format date from timestamp | |||
if dates_t['archive-date'].val == archive_url_date then -- are the two dates the same | |||
return; -- yes, done | |||
else | |||
dates_t['archive-date'] = {val=archive_url_date, name=''}; -- setup to call reformat_dates() with the timestamp date | |||
reformat_dates (dates_t, df or archive_date_format); -- reformat timestamp to format specified by <df> or format used in |archive-date= | |||
archive_url_date = dates_t['archive-date'].val; | |||
set_message ('err_archive_date_url_ts_mismatch', archive_url_date); -- emit an error message | |||
end | |||
end | end | ||
Line 1,159: | Line 1,221: | ||
return { -- return exported functions | return { -- return exported functions | ||
archive_date_check = archive_date_check, | |||
date_hyphen_to_dash = date_hyphen_to_dash, | |||
date_name_xlate = date_name_xlate, | |||
dates = dates, | dates = dates, | ||
reformat_dates = reformat_dates, | |||
set_selected_modules = set_selected_modules, | |||
year_check = year_check, | |||
year_date_check = year_date_check, | year_date_check = year_date_check, | ||
} | } |