Jump to content

User:SMcCandlish/TidyRefs.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// TidyRefs should generally not be run on its own, on an article,
// without making a more substantive change in the same edit
// per the human-editor part of WP:COSMETICBOT (unless fixing actual errors).

if (autosummary === null) var autosummary = true; //generate a short summary
if (showdiff === null) var showdiff = true; //show diff after pressing button
if (markminor === null) var markminor = false; //mark as minor

function tidyRefs(orientation) {
    var txt = document.editform.wpTextbox1;
    
	// Steps we need to do regardless of horizontal or vertical layout:
	
	// Repair outright broken `<ref>` tags:
		// Detects for the already correct `<ref>` because this is case-
		// insensitive and will correct cases of `<REF>`
	var regexBrokeRefClosed = /<\s*ref\s*>/gmi;

	var replaceBrokeRefClosed = `<ref>`;
	
	var preStep1 = txt.value.replace(regexBrokeRefClosed, replaceBrokeRefClosed);

	var regexBrokeRefOpen = /<\s*ref\s+/gmi;

	var replaceBrokeRefOpen = `<ref `;
	
	var preStep2 = preStep1.replace(regexBrokeRefOpen, replaceBrokeRefOpen);
	
	var regexBrokeRefEnd = /<\s*\/\s*ref\s*>/gmi;
	var replaceBrokeRefEnd = `</ref>`;
	
	var preStep3 = preStep2.replace(regexBrokeRefEnd, replaceBrokeRefEnd);

	//  nothing yet
	
	if(orientation == "horizontal") {

        // Do quotation and internal spacing cleanup on `<ref ... name= ...>` constructions:

		var regexName = /<ref\s+((?:group|follow|extends)\s*=(?:(?!name\s*=)[\s\S])*)?name\s*=\s*(?:"\s*([^"](?:(?!\s*\/>|\s*"\s*>|\s+(?:group|follow|extends)\s*=).)*?)\s*"|'\s*([^'"](?:(?!\s*\/>|\s*'>|\s+(?:group|follow|extends)\s*=).)*?)\s*'|((?:[^">\/\s*])(?:(?!\s*\/>|\s*>|\s+(?:group|follow|extends)\s*=).)*))(?:\s+((?:group|follow|extends)\s*=\s*(?:"[^"](?:(?!\s*\/>|\s*"\s*>).)*?\s*"|(?:'[^'"](?:(?!\s*\/>|\s*'\s*>).)*?\s*')|(?:[^>\/\s*]|\s*(?!\/?>)|\/(?!>)))*))*\s*(?:(\/)>|>\s*)/gmi;

		// Because JS does not support the advanced conditional-replace `${#:...}` syntax
		// of `<ref $1name="$2$3$4"$5${6:+ $6}>`, we have to jump through some hoops and
		// deal with the ` />` spacing in a separate operation later.
	
		var replaceName = `<ref $1name="$2$3$4"$5$6>`;
	
		var vStep1 = preStep3.replace(regexName, replaceName);

        // Do quotation and internal spacing cleanup on `<ref ... group= ...>` constructions:

		var regexGroup = /<ref\s+((?:name|follow|extends)\s*=(?:(?!group\s*=)[\s\S])*)?group\s*=\s*(?:"\s*([^"](?:(?!\s*\/>|\s*"\s*>|\s+(?:name|follow|extends)\s*=).)*?)\s*"|'\s*([^'"](?:(?!\s*\/>|\s*'>|\s+(?:name|follow|extends)\s*=).)*?)\s*'|((?:[^">\/\s*])(?:(?!\s*\/>|\s*>|\s+(?:name|follow|extends)\s*=).)*))(?:\s+((?:name|follow|extends)\s*=\s*(?:"[^"](?:(?!\s*\/>|\s*"\s*>).)*?\s*"|(?:'[^'"](?:(?!\s*\/>|\s*'\s*>).)*?\s*')|(?:[^>\/\s*]|\s*(?!\/?>)|\/(?!>)))*))*\s*(?:(\/)>|>\s*)/gmi;

		var replaceGroup = `<ref $1group="$2$3$4"$5$6>`;
	
		var vStep2 = vStep1.replace(regexGroup, replaceGroup);

        // Do quotation and internal spacing cleanup on `<ref ... follow= ...>` constructions:

		var regexFollow = /<ref\s+((?:group|name|extends)\s*=(?:(?!follow\s*=)[\s\S])*)?follow\s*=\s*(?:"\s*([^"](?:(?!\s*\/>|\s*"\s*>|\s+(?:group|name|extends)\s*=).)*?)\s*"|'\s*([^'"](?:(?!\s*\/>|\s*'>|\s+(?:group|name|extends)\s*=).)*?)\s*'|((?:[^">\/\s*])(?:(?!\s*\/>|\s*>|\s+(?:group|name|extends)\s*=).)*))(?:\s+((?:group|name|extends)\s*=\s*(?:"[^"](?:(?!\s*\/>|\s*"\s*>).)*?\s*"|(?:'[^'"](?:(?!\s*\/>|\s*'\s*>).)*?\s*')|(?:[^>\/\s*]|\s*(?!\/?>)|\/(?!>)))*))*\s*(?:(\/)>|>\s*)/gmi;

		var replaceFollow = `<ref $1follow="$2$3$4"$5$6>`;
	
		var vStep3 = vStep2.replace(regexFollow, replaceFollow);

        // Do quotation and internal spacing cleanup on `<ref ... extends= ...>` constructions:

		var regexExtends = /<ref\s+((?:group|follow|name)\s*=(?:(?!extends\s*=)[\s\S])*)?extends\s*=\s*(?:"\s*([^"](?:(?!\s*\/>|\s*"\s*>|\s+(?:group|follow|name)\s*=).)*?)\s*"|'\s*([^'"](?:(?!\s*\/>|\s*'>|\s+(?:group|follow|name)\s*=).)*?)\s*'|((?:[^">\/\s*])(?:(?!\s*\/>|\s*>|\s+(?:group|follow|name)\s*=).)*))(?:\s+((?:group|follow|name)\s*=\s*(?:"[^"](?:(?!\s*\/>|\s*"\s*>).)*?\s*"|(?:'[^'"](?:(?!\s*\/>|\s*'\s*>).)*?\s*')|(?:[^>\/\s*]|\s*(?!\/?>)|\/(?!>)))*))*\s*(?:(\/)>|>\s*)/gmi;

		var replaceExtends = `<ref $1extends="$2$3$4"$5$6>`;
	
		var vStep4 = vStep3.replace(regexExtends, replaceExtends);

		// Fix spacing; there's a more "elegant" way to do this stuff, by
		// breaking all the regex parts above into proper JS variables and
		// using those, then doing some string tests to decide when the first
		// attribute is followed by another and replace any whitespace before
		// the second with a single space; but it's much easier to just nuke
		// the whitespace above and re-insert a space here, since these strings
		// are likely to not have false-positives.
		var regexSpaceCleanup = /"(name|group|follow|extends)=/g;
		
		var replaceSpaceCleanup = `" $1=`;

		var vStep5 = vStep4.replace(regexSpaceCleanup, replaceSpaceCleanup);

		// Now to fix invalid nested double-quotes:

		// Regex to capture attribute values after `<ref `, :
		var regexRefValues = /<ref (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)"/g;

		// Function to replace invalid nested double-quotes with single-quotes
		var replaceInvalidQuotes = function (match, attributeName, attributeValue) {
			var sanitizedValue = attributeValue.replace(/"/g, "'"); // Replace double-quotes with single-quotes
			return '<ref ' + attributeName + '="' + sanitizedValue + '"';
		};

		// Apply the replacement function to each match
		var vStep6 = vStep5.replace(regexRefValues, replaceInvalidQuotes);

		// Repeat essentially the same thing for more attributes; this is
		// tedious and inefficient, but it is safer with regard to potential
		// false positives.

		// Regex to capture 2nd attribute values:
		var regexRefValues2 = /<ref (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)"/g;

		// Function to replace invalid nested double-quotes with single-quotes
		var replaceInvalidQuotes2 = function (match, attributeName1, attributeValue1, attributeName2, attributeValue2) {
			var sanitizedValue2 = attributeValue2.replace(/"/g, "'");
			return '<ref ' + attributeName1 + '="' + attributeValue1 + '" ' + attributeName2 + '="' + sanitizedValue2 + '"';
		};

		// Apply the replacement function to each match
		var vStep7 = vStep6.replace(regexRefValues2, replaceInvalidQuotes2);

		// Regex to capture 3nd attribute values:
		var regexRefValues3 = /<ref (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)"/g;

		// Function to replace invalid nested double-quotes with single-quotes
		var replaceInvalidQuotes3 = function (match, attributeName1, attributeValue1, attributeName2, attributeValue2, attributeName3, attributeValue3) {
			var sanitizedValue3 = attributeValue3.replace(/"/g, "'");
			return '<ref ' + attributeName1 + '="' + attributeValue1 + '" ' + attributeName2 + '="' + attributeValue2 + '" ' + attributeName3 + '="' + sanitizedValue3 + '"';
		};

		// Apply the replacement function to each match
		var vStep8 = vStep7.replace(regexRefValues3, replaceInvalidQuotes3);
		
		// Regex to capture 4th attribute values:
		var regexRefValues4 = /<ref (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)"/g;

		// Function to replace invalid nested double-quotes with single-quotes
		var replaceInvalidQuotes4 = function (match, attributeName1, attributeValue1, attributeName2, attributeValue2, attributeName3, attributeValue3, attributeName4, attributeValue4) {
			var sanitizedValue4 = attributeValue4.replace(/"/g, "'");
			return '<ref ' + attributeName1 + '="' + attributeValue1 + '" ' + attributeName2 + '="' + attributeValue2 + '" ' + attributeName3 + '="' + attributeValue3 + '" ' + attributeName4 + '="' + sanitizedValue4 + '"';
		};

		// Apply the replacement function to each match
		var vStep9 = vStep8.replace(regexRefValues4, replaceInvalidQuotes4);

		// Remove leading space in front of `</ref>`
		var regexSpaceSlashRef = /\s+<\/ref>/gmi;
		
		var replaceSpaceSlashRef = `</ref>`;
		
		var vStep10 = vStep9.replace(regexSpaceSlashRef, replaceSpaceSlashRef);

		// Paste it back into the page:
        txt.value = vStep10;
    }
    else if (orientation == "vertical") {
		//nothing yet
    }

	//Things to do after either vertical or horizontal formatting:
	
	// Enforce a space before the closing `/>` characters (but not `>` alone).
	// We've already normalized to `"/>' so now we change it to `" />`.
	var regexSlashTagEnd = /(")(\/>)/g;
	var replaceSlashTagEnd = `$1 $2`;
	
	var postStep1 = txt.value.replace(regexSlashTagEnd, replaceSlashTagEnd);
	
	// Bonus cleanup: fix `<br>`, `<br/>`, broken `</br>`, etc. to `<br />`
	// (this does not handle rare instances of `<br attribute="value" ... />`):

	var regexBRmess = /<\s*\/?\s*br\s*\/?\s*>/gi;

	var replaceBRmess = `<br />`;

	var postStep2 = postStep1.replace(regexBRmess, replaceBRmess);

	// Bonus cleanup: fix `<hr>`, `<hr/>`, broken `</hr>`, etc. to `<hr />`
	// (this does not handle rare instances of `<hr attribute="value" ... />`):

	var regexHRmess = /<\s*\/?\s*hr\s*\/?\s*>/gi;

	var replaceHRmess = `<hr />`;

	var postStep3 = postStep2.replace(regexHRmess, replaceHRmess);

	// Paste it back into the page:
    txt.value = postStep3; 
    
	if(autosummary) edit_summary();
	if(showdiff) 	diff();
	if(markminor)   document.editform.wpMinoredit.checked = true;

}

function edit_summary() {
	var sum = document.editform.wpSummary;
	var summary = ";";
	summary += " [[User:SMcCandlish/tidyRefs.js|tidyRefs]]";
	if (sum.value.indexOf(summary) == -1) {
    	if (sum.value.match(/[^\*\/\s][^\/\s]?\s*$/)) {
            	sum.value += "";
    	}
    			sum.value += summary;
	}	
}

function diff() {
	document.editform.wpDiff.click();
}

$(function () {
    if(document.forms.editform) {
        mw.util.addPortletLink('p-tb', 'javascript:tidyRefs("horizontal")', '\<Tidy\>', 'ca-formatrefs', 'Format refs: tidy quotation marks and whitespace');
//        mw.util.addPortletLink('p-tb', 'javascript:tidyRefs("vertical")', '\<Tidy\> (vertically - FORTHCOMING)', 'ca-formatrefs-vertical', 'Format refs: vertically, tidy quotation marks and whitespace');
    }
});
/*</source>
 
[[Category:Wikipedia scripts]]
*/