Module:Citation/CS1/Date validation/sandbox: Difference between revisions

m 1 revision imported
imported>Trappist the monk
No edit summary
 
(2 intermediate revisions by 2 users not shown)
Line 1: Line 1:
--[[
--[[
History of changes since last sync: 2021-04-10
History of changes since last sync: 2023-01-14
 
2021-04-30: extend allowed dates in |pmc-embargo-date= validation to two years; Help_talk:Citation_Style_1#|pmc-embargo-date=_validation
2021-05-01: revise month-name validation; see Help_talk:Citation_Style_1#i18n_tweaks
2021-05-02: add support to allow editors to see citations that emit properties cats; see Help_talk:Citation_Style_1#highlighting_cs1|2_citations_that_emit_properties_categories


2023-02-20: break out of for-loop early when date has been formatted; see Help_talk:Citation_Style_1#date_reformatting_tweak
2023-02-21: detect |archive-date= / |archive-url= timestamp mismatch; see Help_talk:Citation_Style_1#Error_or_Maint_message_if_archive_date_doesn't_match_url
2023-03-15: i18n tonumber() fix; see Help_talk:Citation_Style_1#i18n_date_handling
]]
]]


Line 196: Line 195:


year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to number for the comparison;
year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to number for the comparison;
 
if 'pmc-embargo-date' == param then -- special case for |pmc-embargo-date=
if 'pmc-embargo-date' == param then -- special case for |pmc-embargo-date=
return year and (year <= tonumber(os.date("%Y"))+2) or false; -- years more than two years in the future are not accepted
return year and (year <= tonumber(os.date("%Y"))+2) or false; -- years more than two years in the future are not accepted
Line 331: Line 330:
local date; -- one date or first date in a range
local date; -- one date or first date in a range
local date2 = ''; -- end of range date
local date2 = ''; -- end of range date
input.year = tonumber (input.year) or lang_object:parseFormattedNumber (input.year); -- language-aware tonumber()
input.year2 = tonumber (input.year2) or lang_object:parseFormattedNumber (input.year2); -- COinS dates are pseudo-ISO 8601 so convert to Arabic numerals
-- start temporary Julian / Gregorian calendar uncertainty detection
-- start temporary Julian / Gregorian calendar uncertainty detection
local year = tonumber(input.year); -- this temporary code to determine the extent of sources dated to the Julian/Gregorian
local year = input.year; -- this temporary code to determine the extent of sources dated to the Julian/Gregorian
local month = tonumber(input.month); -- interstice 1 October 1582 – 1 January 1926
local month = tonumber(input.month); -- interstice 1 October 1582 – 1 January 1926
local day = tonumber (input.day);
local day = tonumber (input.day);
Line 343: Line 345:
-- end temporary Julian / Gregorian calendar uncertainty detection
-- end temporary Julian / Gregorian calendar uncertainty detection
if 1582 > tonumber(input.year) or 20 < tonumber(input.month) then -- Julian calendar or season so &rft.date gets year only
if 1582 > input.year or 20 < tonumber(input.month) then -- Julian calendar or season so &rft.date gets year only
date = input.year;
date = input.year;
if 0 ~= input.year2 and input.year ~= input.year2 then -- if a range, only the second year portion when not the same as range start year
if 0 ~= input.year2 and input.year ~= input.year2 then -- if a range, only the second year portion when not the same as range start year
date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range
date = string.format ('%.4d/%.4d', input.year, input.year2) -- assemble the date range
end
end
if 20 < tonumber(input.month) then -- if season or proper-name date
if 20 < tonumber(input.month) then -- if season or proper-name date
Line 370: Line 372:
end
end
end
end
tCOinS_date.rftdate = date;
tCOinS_date.rftdate = tostring(date);
return; -- done
return; -- done
end
end
Line 579: Line 581:
elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash
elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash
month, year, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy-y'][1]);
month, year, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy-y'][1]);
if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer
month = get_season_number (month, param); -- <month> can only be winter or summer; also for metadata
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
if (month ~= cfg.date_names['en'].season['Winter']) and (month ~= cfg.date_names['en'].season['Summer']) then
return false; -- not Summer or Winter; abandon
end
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
month = get_season_number (month, param); -- for metadata


elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash
elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash
Line 635: Line 639:


if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003
if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003
year2 = century .. year2; -- add the century to year2 for comparisons
year2 = century .. year2; -- add the century to year2 for comparisons
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
Line 649: Line 653:
end
end


if 'access-date' == param then -- test accessdate here because we have numerical date parts
if param ~= 'date' then -- CITEREF disambiguation only allowed in |date=; |year= & |publication-date= promote to date
if anchor_year:match ('%l$') then
return false;
end
end
 
if 'access-date' == param then -- test access-date here because we have numerical date parts
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; access-date must not be a range
if not is_valid_accessdate(year .. '-' .. month .. '-' .. day) then
if not is_valid_accessdate(year .. '-' .. month .. '-' .. day) then
return false; -- return false when accessdate out of bounds
return false; -- return false when access-date out of bounds
end
end
else
else
return false; -- return false when accessdate is a range of two dates
return false; -- return false when access-date is a range of two dates
end
end
end
end


if 'archive-date' == param then -- test archive-date here because we have numerical date parts
if not (0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required
0 == year2 and 0 == month2 and 0 == day2) then -- none of these; archive-date must not be a range
return false; -- return false when archive-date is a range of two dates
end
end
local result=true; -- check whole dates for validity; assume true because not all dates will go through this test
local result=true; -- check whole dates for validity; assume true because not all dates will go through this test
if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then -- YMD (simple whole date)
if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then -- YMD (simple whole date)
Line 906: Line 923:
end
end


-- yMd is not supported at en.wiki; if yMd is supported at your wiki, uncomment the next line
-- yMd is not supported at en.wiki; when yMd is supported at your wiki, uncomment the next line
-- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki
-- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki
-- if yMd is supported at your wiki, remove or comment-out the next line
if 'yMd' == format_param then -- yMd not supported at en.wiki; when yMd is supported at your wiki, remove or comment-out this line
if 'yMd' == format_param then -- yMd not supported at en.wiki
return; -- not a reformattable date
return; -- not a reformattable date
end
end
Line 926: Line 942:
};
};


if t.a then -- if this date has an anchor year capture
if t.a then -- if this date has an anchor year capture (all convertable date formats except ymd)
t.y = t.a; -- use the anchor year capture when reassembling the date
if t.y2 then -- for year range date formats
t.y2 = t.a; -- use the anchor year capture when reassembling the date
else -- here for single date formats (except ymd)
t.y = t.a; -- use the anchor year capture when reassembling the date
end
end
end


if tonumber(t.m) then -- if raw month is a number (converting from ymd)
if tonumber(t.m) then -- if raw month is a number (converting from ymd)
if 's' == mon_len then -- if we are to use abbreviated month names
if 's' == mon_len then -- if we are to use abbreviated month names
t.m = cfg.date_names['inv_local_s'][tonumber(t.m)]; -- convert it to a month name
t.m = cfg.date_names['inv_local_short'][tonumber(t.m)]; -- convert it to a month name
else
else
t.m = cfg.date_names['inv_local_l'][tonumber(t.m)]; -- convert it to a month name
t.m = cfg.date_names['inv_local_long'][tonumber(t.m)]; -- convert it to a month name
end
end
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present
Line 949: Line 969:
t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic)
t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic)
if 0 == t[mon] then return; end -- seasons and named dates can't be converted
if 0 == t[mon] then return; end -- seasons and named dates can't be converted
t[mon] = (('s' == mon_len) and cfg.date_names['inv_local_s'][t[mon]]) or cfg.date_names['inv_local_l'][t[mon]]; -- fetch month name according to length
t[mon] = (('s' == mon_len) and cfg.date_names['inv_local_short'][t[mon]]) or cfg.date_names['inv_local_long'][t[mon]]; -- fetch month name according to length
end
end
end
end
Line 1,030: Line 1,050:
date_parameters_list[param_name].val = new_date; -- update date in date list
date_parameters_list[param_name].val = new_date; -- update date in date list
result = true; -- and announce that changes have been made
result = true; -- and announce that changes have been made
break;
end
end
end -- if
end -- if
Line 1,036: Line 1,057:
end -- if
end -- if
end -- for
end -- for
return result; -- declare boolean result and done
return result; -- declare boolean result and done
end
end


Line 1,068: Line 1,089:
--[[-------------------------< D A T E _ N A M E _ X L A T E >------------------------------------------------
--[[-------------------------< D A T E _ N A M E _ X L A T E >------------------------------------------------


Attempts to translate English month names to local-language month names using names supplied by MediaWiki's
Attempts to translate English date names to local-language date names using names supplied by MediaWiki's
date parser function.  This is simple name-for-name replacement and may not work for all languages.
date parser function.  This is simple name-for-name replacement and may not work for all languages.


Line 1,082: Line 1,103:
local date;
local date;
local sources_t = {
{cfg.date_names.en.long, cfg.date_names.inv_local_long}, -- for translating long English month names to long local month names
{cfg.date_names.en.short, cfg.date_names.inv_local_short}, -- short month names
{cfg.date_names.en.quarter, cfg.date_names.inv_local_quarter}, -- quarter date names
{cfg.date_names.en.season, cfg.date_names.inv_local_season}, -- season date nam
{cfg.date_names.en.named, cfg.date_names.inv_local_named}, -- named dates
}
local function is_xlateable (month) -- local function to get local date name that replaces existing English-language date name
for _, date_names_t in ipairs (sources_t) do -- for each sequence table in date_names_t
if date_names_t[1][month] then -- if date name is English month (long or short), quarter, season or named and
if date_names_t[2][date_names_t[1][month]] then -- if there is a matching local date name
return date_names_t[2][date_names_t[1][month]]; -- return the local date name
end
end
end
end
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
if is_set(param_val.val) then -- if the parameter has a value
if is_set(param_val.val) then -- if the parameter has a value
date = param_val.val;
date = param_val.val;
for month in mw.ustring.gmatch (date, '%a+') do -- iterate through all dates in the date (single date or date range)
for month in mw.ustring.gmatch (date, '[%a ]+') do -- iterate through all date names in the date (single date or date range)
if cfg.date_names.en.long[month] then
month = mw.text.trim (month); -- this because quarterly dates contain whitespace
mode = 'F'; -- English name is long so use long local name
xlate = is_xlateable (month); -- get translate <month>; returns translation or nil
elseif cfg.date_names.en.short[month] then
mode = 'M'; -- English name is short so use short local name
if xlate then
else
mode = nil; -- not an English month name; could be local language month name or an English season name
end
if mode then -- might be a season
xlate = lang_object:formatDate(mode, '1' .. month); -- translate the month name to this local language
date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation
date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation
date_parameters_list[param_name].val = date; -- save the translated date
date_parameters_list[param_name].val = date; -- save the translated date
Line 1,129: Line 1,162:


cfg = cfg_table_ptr; -- import tables from selected Module:Citation/CS1/Configuration
cfg = cfg_table_ptr; -- import tables from selected Module:Citation/CS1/Configuration
end
--[[--------------------------< A R C H I V E _ D A T E _ C H E C K >------------------------------------------
Compare value in |archive-date= with the timestamp in Wayback machine urls.  Emits an error message when |archive-date=
does not match the timestamp.
]]
local function archive_date_check (archive_date, archive_url_timestamp)
local good, archive_date_ts = pcall (lang_object.formatDate, lang_object, 'Ymd', archive_date); -- |archive-date= value to YYYYMMDD format
-- local archive_date_ts = lang_object:formatDate ('Ymd', archive_date); -- |archive-date= value to YYYYMMDD format
if good then
if not archive_url_timestamp:find (archive_date_ts, 1, true) then -- plain text find; begin search at position 1
set_message ('err_archive_date_url_ts_mismatch'); -- emit an error message
end
end
end
end


Line 1,136: Line 1,188:


return { -- return exported functions
return { -- return exported functions
archive_date_check = archive_date_check,
date_hyphen_to_dash = date_hyphen_to_dash,
date_name_xlate = date_name_xlate,
dates = dates,
dates = dates,
reformat_dates = reformat_dates,
set_selected_modules = set_selected_modules,
year_date_check = year_date_check,
year_date_check = year_date_check,
reformat_dates = reformat_dates,
date_hyphen_to_dash = date_hyphen_to_dash,
date_name_xlate = date_name_xlate,
set_selected_modules = set_selected_modules
}
}