User:SMcCandlish/TidyRefs.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
This user script seems to have a documentation page at User:SMcCandlish/TidyRefs. |
// TidyRefs should generally not be run on its own, on an article,
// without making a more substantive change in the same edit
// per the human-editor part of WP:COSMETICBOT (unless fixing actual errors).
if (autosummary === null) var autosummary = true; //generate a short summary
if (showdiff === null) var showdiff = true; //show diff after pressing button
if (markminor === null) var markminor = false; //mark as minor
function tidyRefs(orientation) {
var txt = document.editform.wpTextbox1;
// Steps we need to do regardless of horizontal or vertical layout:
// Repair outright broken `<ref>` tags:
// Detects for the already correct `<ref>` because this is case-
// insensitive and will correct cases of `<REF>`
var regexBrokeRefClosed = /<\s*ref\s*>/gmi;
var replaceBrokeRefClosed = `<ref>`;
var preStep1 = txt.value.replace(regexBrokeRefClosed, replaceBrokeRefClosed);
var regexBrokeRefOpen = /<\s*ref\s+/gmi;
var replaceBrokeRefOpen = `<ref `;
var preStep2 = preStep1.replace(regexBrokeRefOpen, replaceBrokeRefOpen);
var regexBrokeRefEnd = /<\s*\/\s*ref\s*>/gmi;
var replaceBrokeRefEnd = `</ref>`;
var preStep3 = preStep2.replace(regexBrokeRefEnd, replaceBrokeRefEnd);
// nothing yet
if(orientation == "horizontal") {
// Do quotation and internal spacing cleanup on `<ref ... name= ...>` constructions:
var regexName = /<ref\s+((?:group|follow|extends)\s*=(?:(?!name\s*=)[\s\S])*)?name\s*=\s*(?:"\s*([^"](?:(?!\s*\/>|\s*"\s*>|\s+(?:group|follow|extends)\s*=).)*?)\s*"|'\s*([^'"](?:(?!\s*\/>|\s*'>|\s+(?:group|follow|extends)\s*=).)*?)\s*'|((?:[^">\/\s*])(?:(?!\s*\/>|\s*>|\s+(?:group|follow|extends)\s*=).)*))(?:\s+((?:group|follow|extends)\s*=\s*(?:"[^"](?:(?!\s*\/>|\s*"\s*>).)*?\s*"|(?:'[^'"](?:(?!\s*\/>|\s*'\s*>).)*?\s*')|(?:[^>\/\s*]|\s*(?!\/?>)|\/(?!>)))*))*\s*(?:(\/)>|>\s*)/gmi;
// Because JS does not support the advanced conditional-replace `${#:...}` syntax
// of `<ref $1name="$2$3$4"$5${6:+ $6}>`, we have to jump through some hoops and
// deal with the ` />` spacing in a separate operation later.
var replaceName = `<ref $1name="$2$3$4"$5$6>`;
var vStep1 = preStep3.replace(regexName, replaceName);
// Do quotation and internal spacing cleanup on `<ref ... group= ...>` constructions:
var regexGroup = /<ref\s+((?:name|follow|extends)\s*=(?:(?!group\s*=)[\s\S])*)?group\s*=\s*(?:"\s*([^"](?:(?!\s*\/>|\s*"\s*>|\s+(?:name|follow|extends)\s*=).)*?)\s*"|'\s*([^'"](?:(?!\s*\/>|\s*'>|\s+(?:name|follow|extends)\s*=).)*?)\s*'|((?:[^">\/\s*])(?:(?!\s*\/>|\s*>|\s+(?:name|follow|extends)\s*=).)*))(?:\s+((?:name|follow|extends)\s*=\s*(?:"[^"](?:(?!\s*\/>|\s*"\s*>).)*?\s*"|(?:'[^'"](?:(?!\s*\/>|\s*'\s*>).)*?\s*')|(?:[^>\/\s*]|\s*(?!\/?>)|\/(?!>)))*))*\s*(?:(\/)>|>\s*)/gmi;
var replaceGroup = `<ref $1group="$2$3$4"$5$6>`;
var vStep2 = vStep1.replace(regexGroup, replaceGroup);
// Do quotation and internal spacing cleanup on `<ref ... follow= ...>` constructions:
var regexFollow = /<ref\s+((?:group|name|extends)\s*=(?:(?!follow\s*=)[\s\S])*)?follow\s*=\s*(?:"\s*([^"](?:(?!\s*\/>|\s*"\s*>|\s+(?:group|name|extends)\s*=).)*?)\s*"|'\s*([^'"](?:(?!\s*\/>|\s*'>|\s+(?:group|name|extends)\s*=).)*?)\s*'|((?:[^">\/\s*])(?:(?!\s*\/>|\s*>|\s+(?:group|name|extends)\s*=).)*))(?:\s+((?:group|name|extends)\s*=\s*(?:"[^"](?:(?!\s*\/>|\s*"\s*>).)*?\s*"|(?:'[^'"](?:(?!\s*\/>|\s*'\s*>).)*?\s*')|(?:[^>\/\s*]|\s*(?!\/?>)|\/(?!>)))*))*\s*(?:(\/)>|>\s*)/gmi;
var replaceFollow = `<ref $1follow="$2$3$4"$5$6>`;
var vStep3 = vStep2.replace(regexFollow, replaceFollow);
// Do quotation and internal spacing cleanup on `<ref ... extends= ...>` constructions:
var regexExtends = /<ref\s+((?:group|follow|name)\s*=(?:(?!extends\s*=)[\s\S])*)?extends\s*=\s*(?:"\s*([^"](?:(?!\s*\/>|\s*"\s*>|\s+(?:group|follow|name)\s*=).)*?)\s*"|'\s*([^'"](?:(?!\s*\/>|\s*'>|\s+(?:group|follow|name)\s*=).)*?)\s*'|((?:[^">\/\s*])(?:(?!\s*\/>|\s*>|\s+(?:group|follow|name)\s*=).)*))(?:\s+((?:group|follow|name)\s*=\s*(?:"[^"](?:(?!\s*\/>|\s*"\s*>).)*?\s*"|(?:'[^'"](?:(?!\s*\/>|\s*'\s*>).)*?\s*')|(?:[^>\/\s*]|\s*(?!\/?>)|\/(?!>)))*))*\s*(?:(\/)>|>\s*)/gmi;
var replaceExtends = `<ref $1extends="$2$3$4"$5$6>`;
var vStep4 = vStep3.replace(regexExtends, replaceExtends);
// Fix spacing; there's a more "elegant" way to do this stuff, by
// breaking all the regex parts above into proper JS variables and
// using those, then doing some string tests to decide when the first
// attribute is followed by another and replace any whitespace before
// the second with a single space; but it's much easier to just nuke
// the whitespace above and re-insert a space here, since these strings
// are likely to not have false-positives.
var regexSpaceCleanup = /"(name|group|follow|extends)=/g;
var replaceSpaceCleanup = `" $1=`;
var vStep5 = vStep4.replace(regexSpaceCleanup, replaceSpaceCleanup);
// Now to fix invalid nested double-quotes:
// Regex to capture attribute values after `<ref `, :
var regexRefValues = /<ref (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)"/g;
// Function to replace invalid nested double-quotes with single-quotes
var replaceInvalidQuotes = function (match, attributeName, attributeValue) {
var sanitizedValue = attributeValue.replace(/"/g, "'"); // Replace double-quotes with single-quotes
return '<ref ' + attributeName + '="' + sanitizedValue + '"';
};
// Apply the replacement function to each match
var vStep6 = vStep5.replace(regexRefValues, replaceInvalidQuotes);
// Repeat essentially the same thing for more attributes; this is
// tedious and inefficient, but it is safer with regard to potential
// false positives.
// Regex to capture 2nd attribute values:
var regexRefValues2 = /<ref (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)"/g;
// Function to replace invalid nested double-quotes with single-quotes
var replaceInvalidQuotes2 = function (match, attributeName1, attributeValue1, attributeName2, attributeValue2) {
var sanitizedValue2 = attributeValue2.replace(/"/g, "'");
return '<ref ' + attributeName1 + '="' + attributeValue1 + '" ' + attributeName2 + '="' + sanitizedValue2 + '"';
};
// Apply the replacement function to each match
var vStep7 = vStep6.replace(regexRefValues2, replaceInvalidQuotes2);
// Regex to capture 3nd attribute values:
var regexRefValues3 = /<ref (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)"/g;
// Function to replace invalid nested double-quotes with single-quotes
var replaceInvalidQuotes3 = function (match, attributeName1, attributeValue1, attributeName2, attributeValue2, attributeName3, attributeValue3) {
var sanitizedValue3 = attributeValue3.replace(/"/g, "'");
return '<ref ' + attributeName1 + '="' + attributeValue1 + '" ' + attributeName2 + '="' + attributeValue2 + '" ' + attributeName3 + '="' + sanitizedValue3 + '"';
};
// Apply the replacement function to each match
var vStep8 = vStep7.replace(regexRefValues3, replaceInvalidQuotes3);
// Regex to capture 4th attribute values:
var regexRefValues4 = /<ref (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)" (name|group|follow|extends)="((?:(?!" ?\/>|">|" (?:name|group|follow|extends)).)*)"/g;
// Function to replace invalid nested double-quotes with single-quotes
var replaceInvalidQuotes4 = function (match, attributeName1, attributeValue1, attributeName2, attributeValue2, attributeName3, attributeValue3, attributeName4, attributeValue4) {
var sanitizedValue4 = attributeValue4.replace(/"/g, "'");
return '<ref ' + attributeName1 + '="' + attributeValue1 + '" ' + attributeName2 + '="' + attributeValue2 + '" ' + attributeName3 + '="' + attributeValue3 + '" ' + attributeName4 + '="' + sanitizedValue4 + '"';
};
// Apply the replacement function to each match
var vStep9 = vStep8.replace(regexRefValues4, replaceInvalidQuotes4);
// Remove leading space in front of `</ref>`
var regexSpaceSlashRef = /\s+<\/ref>/gmi;
var replaceSpaceSlashRef = `</ref>`;
var vStep10 = vStep9.replace(regexSpaceSlashRef, replaceSpaceSlashRef);
// Paste it back into the page:
txt.value = vStep10;
}
else if (orientation == "vertical") {
//nothing yet
}
//Things to do after either vertical or horizontal formatting:
// Enforce a space before the closing `/>` characters (but not `>` alone).
// We've already normalized to `"/>' so now we change it to `" />`.
var regexSlashTagEnd = /(")(\/>)/g;
var replaceSlashTagEnd = `$1 $2`;
var postStep1 = txt.value.replace(regexSlashTagEnd, replaceSlashTagEnd);
// Bonus cleanup: fix `<br>`, `<br/>`, broken `</br>`, etc. to `<br />`
// (this does not handle rare instances of `<br attribute="value" ... />`):
var regexBRmess = /<\s*\/?\s*br\s*\/?\s*>/gi;
var replaceBRmess = `<br />`;
var postStep2 = postStep1.replace(regexBRmess, replaceBRmess);
// Bonus cleanup: fix `<hr>`, `<hr/>`, broken `</hr>`, etc. to `<hr />`
// (this does not handle rare instances of `<hr attribute="value" ... />`):
var regexHRmess = /<\s*\/?\s*hr\s*\/?\s*>/gi;
var replaceHRmess = `<hr />`;
var postStep3 = postStep2.replace(regexHRmess, replaceHRmess);
// Paste it back into the page:
txt.value = postStep3;
if(autosummary) edit_summary();
if(showdiff) diff();
if(markminor) document.editform.wpMinoredit.checked = true;
}
function edit_summary() {
var sum = document.editform.wpSummary;
var summary = ";";
summary += " [[User:SMcCandlish/tidyRefs.js|tidyRefs]]";
if (sum.value.indexOf(summary) == -1) {
if (sum.value.match(/[^\*\/\s][^\/\s]?\s*$/)) {
sum.value += "";
}
sum.value += summary;
}
}
function diff() {
document.editform.wpDiff.click();
}
$(function () {
if(document.forms.editform) {
mw.util.addPortletLink('p-tb', 'javascript:tidyRefs("horizontal")', '\<Tidy\>', 'ca-formatrefs', 'Format refs: tidy quotation marks and whitespace');
// mw.util.addPortletLink('p-tb', 'javascript:tidyRefs("vertical")', '\<Tidy\> (vertically - FORTHCOMING)', 'ca-formatrefs-vertical', 'Format refs: vertically, tidy quotation marks and whitespace');
}
});
/*</source>
[[Category:Wikipedia scripts]]
*/