Module:Citation/CS1/COinS: Difference between revisions
|  sync from sandbox; | Richardpruen (talk | contribs) m 1 revision imported: template update | ||
| (7 intermediate revisions by 4 users not shown) | |||
| Line 1: | Line 1: | ||
| --[[--------------------------< F O R W A R D   D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D   D E C L A R A T I O N S >-------------------------------------- | ||
| ]] | ]] | ||
| local is_set, in_array, remove_wiki_link, strip_apostrophe_markup;	 | local has_accept_as_written, is_set, in_array, remove_wiki_link, strip_apostrophe_markup;	-- functions in Module:Citation/CS1/Utilities | ||
| local cfg;																		-- table of configuration tables that are defined in Module:Citation/CS1/Configuration | local cfg;																		-- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
| Line 18: | Line 17: | ||
| local function make_coins_title (title, script) | local function make_coins_title (title, script) | ||
| 	title = has_accept_as_written (title); | |||
| 	if is_set (title) then | 	if is_set (title) then | ||
| 		title = strip_apostrophe_markup (title);								-- strip any apostrophe markup | 		title = strip_apostrophe_markup (title);								-- strip any apostrophe markup | ||
| 	else | 	else | ||
| 		title='';																-- if not set, make sure title is an empty string | 		title = '';																-- if not set, make sure title is an empty string | ||
| 	end | 	end | ||
| 	if is_set (script) then | 	if is_set (script) then | ||
| Line 27: | Line 27: | ||
| 		script = strip_apostrophe_markup (script);								-- strip any apostrophe markup | 		script = strip_apostrophe_markup (script);								-- strip any apostrophe markup | ||
| 	else | 	else | ||
| 		script='';	 | 		script = '';															-- if not set, make sure script is an empty string | ||
| 	end | 	end | ||
| 	if is_set (title) and is_set (script) then | 	if is_set (title) and is_set (script) then | ||
| Line 38: | Line 38: | ||
| --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | ||
| Returns a string where all of  | Returns a string where all of Lua's magic characters have been escaped.  This is important because functions like | ||
| string.gsub() treat their pattern and replace strings as patterns, not literal strings. | string.gsub() treat their pattern and replace strings as patterns, not literal strings. | ||
| ]] | ]] | ||
| Line 44: | Line 44: | ||
| local function escape_lua_magic_chars (argument) | local function escape_lua_magic_chars (argument) | ||
| 	argument = argument:gsub("%%", "%%%%");										-- replace % with %% | 	argument = argument:gsub("%%", "%%%%");										-- replace % with %% | ||
| 	argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1");				-- replace all other  | 	argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1");				-- replace all other Lua magic pattern characters | ||
| 	return argument; | 	return argument; | ||
| end | end | ||
| Line 60: | Line 60: | ||
| 	while true do | 	while true do | ||
| 		pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]");					-- pattern is the opening bracket, the  | 		pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]");					-- pattern is the opening bracket, the URL and following space(s): "[url " | ||
| 		if nil == pattern then break; end										-- no more  | 		if nil == pattern then break; end										-- no more URLs | ||
| 		pattern = escape_lua_magic_chars (pattern);								-- pattern is not a literal string; escape  | 		pattern = escape_lua_magic_chars (pattern);								-- pattern is not a literal string; escape Lua's magic pattern characters | ||
| 		pages = pages:gsub(pattern, "");										-- remove as many instances of pattern as possible | 		pages = pages:gsub(pattern, "");										-- remove as many instances of pattern as possible | ||
| 	end | 	end | ||
| 	pages = pages:gsub("[%[%]]", "");											-- remove the brackets | 	pages = pages:gsub("[%[%]]", "");											-- remove the brackets | ||
| 	pages = pages:gsub("–", "-" );	 | 	pages = pages:gsub("–", "-" );												-- replace endashes with hyphens | ||
| 	pages = pages:gsub("&%w+;", "-" );	 | 	pages = pages:gsub("&%w+;", "-" );											-- and replace HTML entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like? | ||
| 	return pages; | 	return pages; | ||
| end | end | ||
| Line 80: | Line 80: | ||
| 	MathML with SVG or PNG fallback | 	MathML with SVG or PNG fallback | ||
| All three are heavy with  | All three are heavy with HTML and CSS which doesn't belong in the metadata. | ||
| Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings | Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings | ||
| Line 87: | Line 87: | ||
| This function gets the rendered form of an equation according to the editor's preference before the page is saved.  It | This function gets the rendered form of an equation according to the editor's preference before the page is saved.  It | ||
| then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so | then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so | ||
| that the page is saved without extraneous  | that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation. | ||
| When a replacement is made, this function returns true and the value with replacement; otherwise false and the  | When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial | ||
| value.  To replace multipe equations it is  | value.  To replace multipe equations it is necessary to call this function from within a loop. | ||
| ]=] | ]=] | ||
| Line 120: | Line 120: | ||
| --[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | --[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | ||
| Cleanup parameter values for the metadata by removing or replacing invisible characters and certain  | Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities. | ||
| 2015-12-10: there is a bug in mw.text.unstripNoWiki ().  It replaces math stripmarkers with the appropriate content | 2015-12-10: there is a bug in mw.text.unstripNoWiki ().  It replaces math stripmarkers with the appropriate content | ||
| Line 137: | Line 137: | ||
| 	end | 	end | ||
| 	value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR");	 | 	value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR");			-- one or more couldn't be replaced; insert vague error message | ||
| 	value = mw.text.unstripNoWiki (value);										-- replace nowiki stripmarkers with their content | 	value = mw.text.unstripNoWiki (value);										-- replace nowiki stripmarkers with their content | ||
| Line 143: | Line 143: | ||
| 	value = value:gsub (' ', ' ');											-- replace   entity with plain space | 	value = value:gsub (' ', ' ');											-- replace   entity with plain space | ||
| 	value = value:gsub ('\226\128\138', ' ');									-- replace hair space with plain space | 	value = value:gsub ('\226\128\138', ' ');									-- replace hair space with plain space | ||
| 	if not mw.ustring.find (value, cfg.indic_script) then						-- don't remove zero width joiner characters from indic script | 	if not mw.ustring.find (value, cfg.indic_script) then						-- don't remove zero-width joiner characters from indic script | ||
| 		value = value:gsub ('‍', '');	 | 		value = value:gsub ('‍', '');										-- remove ‍ entities | ||
| 		value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', '');	-- remove zero-width joiner, zero-width space, soft hyphen | 		value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', '');	-- remove zero-width joiner, zero-width space, soft hyphen | ||
| 	end | 	end | ||
| 	value = value:gsub ('[\009\010\013]', ' ');	 | 	value = value:gsub ('[\009\010\013 ]+', ' ');								-- replace horizontal tab, line feed, carriage return with plain space | ||
| 	return value; | 	return value; | ||
| end | end | ||
| Line 180: | Line 180: | ||
| 	}); | 	}); | ||
| 	if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or   | 	if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn', 'journal', 'news', 'magazine'}) or | ||
| 		(in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or | |||
| 		('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then | 		('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then | ||
| 			OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal";			-- journal metadata identifier | 			OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal";			-- journal metadata identifier | ||
| 			if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn'}) then	-- set genre according to the type of citation template we are rendering | 			if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn'}) then	-- set genre according to the type of citation template we are rendering | ||
| 				OCinSoutput["rft.genre"] = "preprint";							-- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn | 				OCinSoutput["rft.genre"] = "preprint";							-- cite arxiv, cite biorxiv, cite citeseerx, cite medrxiv, cite ssrn | ||
| 			elseif 'conference' == class then | 			elseif 'conference' == class then | ||
| 				OCinSoutput["rft.genre"] = "conference";						-- cite conference (when Periodical set) | 				OCinSoutput["rft.genre"] = "conference";						-- cite conference (when Periodical set) | ||
| Line 197: | Line 198: | ||
| 																				-- these used only for periodicals | 																				-- these used only for periodicals | ||
| 			OCinSoutput["rft.ssn"] = data.Season;								-- keywords: winter, spring, summer, fall | 			OCinSoutput["rft.ssn"] = data.Season;								-- keywords: winter, spring, summer, fall | ||
| 			OCinSoutput["rft.quarter"] = data.Quarter;							-- single digits 1->first quarter, etc. | |||
| 			OCinSoutput["rft.chron"] = data.Chron;								-- free-form date components | 			OCinSoutput["rft.chron"] = data.Chron;								-- free-form date components | ||
| 			OCinSoutput["rft.volume"] = data.Volume;							-- does not apply to books | 			OCinSoutput["rft.volume"] = data.Volume;							-- does not apply to books | ||
| 			OCinSoutput["rft.issue"] = data.Issue; | 			OCinSoutput["rft.issue"] = data.Issue; | ||
| 			OCinSoutput['rft.artnum'] = data.ArticleNumber;						-- {{cite journal}} only | |||
| 			OCinSoutput["rft.pages"] = data.Pages;								-- also used in book metadata | 			OCinSoutput["rft.pages"] = data.Pages;								-- also used in book metadata | ||
| Line 220: | Line 223: | ||
| 				end | 				end | ||
| 			end | 			end | ||
| 		else	--{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | 		else	-- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | ||
| 			OCinSoutput["rft.genre"] = "unknown"; | 			OCinSoutput["rft.genre"] = "unknown"; | ||
| 		end | 		end | ||
| Line 236: | Line 239: | ||
| 		OCinSoutput['rft.inst'] = data.PublisherName;							-- book and dissertation | 		OCinSoutput['rft.inst'] = data.PublisherName;							-- book and dissertation | ||
| 	end | 	end | ||
| 	-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx" | |||
| 																				-- and now common parameters (as much as possible) | 																				-- and now common parameters (as much as possible) | ||
| 	OCinSoutput["rft.date"] = data.Date;										-- book, journal, dissertation | 	OCinSoutput["rft.date"] = data.Date;										-- book, journal, dissertation | ||
| 	for k, v in pairs( data.ID_list ) do										-- what to do about these? For now assume that they are common to all? | 	for k, v in pairs( data.ID_list ) do										-- what to do about these? For now assume that they are common to all? | ||
| 		if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | 		if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | ||
| Line 244: | Line 248: | ||
| 		if string.sub( id or "", 1, 4 ) == 'info' then							-- for ids that are in the info:registry | 		if string.sub( id or "", 1, 4 ) == 'info' then							-- for ids that are in the info:registry | ||
| 			OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | 			OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | ||
| 		elseif string.sub (id or "", 1, 3 ) == 'rft' then						-- for isbn, issn, eissn, etc that have defined COinS keywords | 		elseif string.sub (id or "", 1, 3 ) == 'rft' then						-- for isbn, issn, eissn, etc. that have defined COinS keywords | ||
| 			OCinSoutput[ id ] = v; | 			OCinSoutput[ id ] = v; | ||
| 		elseif id then															-- when cfg.id_handlers[k].COinS is not nil | 		elseif 'url' == id then													-- for urls that are assembled in ~/Identifiers; |asin= and |ol= | ||
| 			OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v };	-- others; provide a  | 			OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label}); | ||
| 		elseif id then															-- when cfg.id_handlers[k].COinS is not nil so urls created here | |||
| 			OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label };	-- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers) | |||
| 		end | 		end | ||
| 	end | 	end | ||
| Line 253: | Line 259: | ||
| 	local last, first; | 	local last, first; | ||
| 	for k, v in ipairs( data.Authors ) do | 	for k, v in ipairs( data.Authors ) do | ||
| 		last, first = coins_cleanup (v.last), coins_cleanup (v.first or '');	-- replace any nowiki  | 		last, first = coins_cleanup (v.last), coins_cleanup (v.first or '');	-- replace any nowiki stripmarkers, non-printing or invisible characters | ||
| 		if k == 1 then															-- for the first author name only | 		if k == 1 then															-- for the first author name only | ||
| 			if is_set(last)  | 			if is_set(last) and is_set(first) then								-- set these COinS values if |first= and |last= specify the first author name | ||
| 				OCinSoutput["rft.aulast"] = last;								-- book, journal, dissertation | 				OCinSoutput["rft.aulast"] = last;								-- book, journal, dissertation | ||
| 				OCinSoutput["rft.aufirst"] = first;								-- book, journal, dissertation | 				OCinSoutput["rft.aufirst"] = first;								-- book, journal, dissertation | ||
| Line 267: | Line 273: | ||
| 				OCinSoutput["rft.au"] = last;									-- book, journal, dissertation | 				OCinSoutput["rft.au"] = last;									-- book, journal, dissertation | ||
| 			end | 			end | ||
| 			-- TODO: At present we do not report "et al.". Add anything special if this condition applies? | |||
| 		end | 		end | ||
| 	end | 	end | ||
| Line 272: | Line 279: | ||
| 	OCinSoutput.rft_id = data.URL; | 	OCinSoutput.rft_id = data.URL; | ||
| 	OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | 	OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | ||
| 	-- TODO: Add optional extra info: | |||
| 	-- rfr_dat=#REVISION<version> (referrer private data) | |||
| 	-- ctx_id=<data.RawPage>#<ref> (identifier for the context object) | |||
| 	-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd) | |||
| 	-- ctx_enc=info:ofi/enc:UTF-8 (character encoding) | |||
| 	OCinSoutput = setmetatable( OCinSoutput, nil ); | 	OCinSoutput = setmetatable( OCinSoutput, nil ); | ||
| 	-- sort with version string always first, and combine. | 	-- sort with version string always first, and combine. | ||
| 	--table.sort( OCinSoutput ); | 	-- table.sort( OCinSoutput ); | ||
| 	table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver );  | 	table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" | ||
| 	return table.concat(OCinSoutput, "&"); | 	return table.concat(OCinSoutput, "&"); | ||
| end | end | ||
| Line 290: | Line 304: | ||
| 	cfg = cfg_table_ptr; | 	cfg = cfg_table_ptr; | ||
| 	has_accept_as_written = utilities_page_ptr.has_accept_as_written;			-- import functions from selected Module:Citation/CS1/Utilities module | |||
| 	is_set = utilities_page_ptr.is_set; | |||
| 	in_array = utilities_page_ptr.in_array; | 	in_array = utilities_page_ptr.in_array; | ||
| 	remove_wiki_link = utilities_page_ptr.remove_wiki_link; | 	remove_wiki_link = utilities_page_ptr.remove_wiki_link; | ||