User:Qwerfjkl/scripts/linkrot.js
Appearance
< User:Qwerfjkl | scripts
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
Documentation for this user script can be added at User:Qwerfjkl/scripts/linkrot. |
// WARNING - this is not 100% accurate! Use editor discretion.
// This is a Bandersnatch ([[:w:de:Benutzer:Schnark/js/bandersnatch]]) edit function
// It is a fork of [[User:BrownHairedGirl/linkrot.js]], with only minor changes to fit the Bandersnatch format
// The search query I use is `-insource:/\<ref[^>]*?\>\s*https?:[^>< \|\[\]]+\s*\<\s*\/\s*ref/ -hastemplate:"Bare URL inline" hastemplate:"Cleanup bare URLs"`
// based on BHG's own regex
// Linkrot.js v2.0 -- with untagging, and ref counting. 9 March 2022
//
// Install with:
// <code><nowiki> {{subst:Iusc|User:BrownHairedGirl/linkrot.js}} </nowiki></code>
// or with
// <code><nowiki> importScript( 'User:BrownHairedGirl/linkrot.js' ); // Backlink: [[User:BrownHairedGirl/linkrot.js]] </nowiki></code>
// This script is hacked from [[User:DannyS712/Draft no cat.js]]
// If forking this script, please note our contributions / give us credit
// making these global variables global to avoid pssing them as parameters
let CleanupBareURLsTagMatcher = /\{\{ *([tT]emplate *: *)?([Cc]leanup[_ ]+bare[_ ]+URLs|[Bb]are[_ ]+|[Bb]are|[Bb]are[_ ]+link|[Bb]are[_ ]+linkname|[Bb]are[_ ]+links|[Bb]are[_ ]+references|[Bb]are[_ ]+refs|[Bb]are[_ ]+URL|[Bb]are[_ ]+[uU][rR][[lL]s|[Bb]are-URLs|[Bb]arelinks|[Bb]areURL|[Bb]areURLs|[Cc]leanup[_ ]+bare-URLs|[Cc]leanup[_ ]+link[_ ]+rot|[Cc]leanup[_ ]+link-rot|[Cc]leanup-Bare[_ ]+URLs|[Cc]leanup-barelinks|[Cc]leanup-link[_ ]+rot|[Cc]leanup-link-rot|[Cc]leanup-linkrot|[Cc]UBURL|[Ll]ink[_ ]*rot|[Ll]INKROT|[Ll]R) *(\|[^\}]*)?\}\}\s*/g;
var lkrArticleOriginalText = null;
var lkrWgPageName = null;
let add_linkrot_edit_summary = "Added {{[[Template:Cleanup bare URLs|Cleanup bare URLs]]}}, " +
"using [[User:BrownHairedGirl/linkrot.js|a script]]. " +
"For other recently-tagged pages with [[WP:Bare URLs|bare URLs]], " +
"see [[:Category:Articles with bare URLs for citations from " + monthyear datestamp() + "]]";
let remove_linkrot_edit_summary = "Removed {{[[Template:Cleanup bare URLs|Cleanup bare URLs]]}}, " +
"using [[User:BrownHairedGirl/linkrot.js|a script]]. " +
"This page currently has no [[WP:Bare URLs|Bare URLs]]";
// nested funtions
function monthyear_datestamp() {
var d = new Date();
let month = ["January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
];
var myyear = d.getFullYear();
let mydatestamp = month[d.getMonth()] + " " + myyear.toString();
return mydatestamp;
}
function lkrPreCheck() {
// Start by checking for an existing {{Cleanup Bare URLs}} tag
var CleanupBareURLsTagCount = lkrCountMatches(CleanupBareURLsTagMatcher);
// Now count bare URLs and tags
var untaggedBareURLcount = lkrCountMatches(/<ref[^>]*?\>\s*\[?\s*https?:[^>< \|\[\]]+\s*\]?\s*<\s*\/\s*ref/gi);
var BareURLinlineTagcount = lkrCountMatches(/\{\{ *([tT]emplate *: *)?([Bb]are[_ ]+URL[\- ]inline|[Ll]inkrot-inline|[Bb]are-inline|[Bb]are[_ ]+inline|[Bb]are[_ ]+url[_ ]+inline|[Bb]are-url[_ ]+inline|[Bb]are[_ ]+link[_ ]+inline|[Bb]are-link-inline|[Bb]are-url-inline|[Bb]are[_ ]+url) *(\|[^\}]*)?\}\}/g);
var BareURLPDFTagcount = lkrCountMatches(/\{\{ *([tT]emplate *: *)?([Bb]are[_ ]+URL[\- ]PDF) *(\|[^\}]*)?\}\}/g);
if (CleanupBareURLsTagCount > 0) {
// Alreday tagged with {{Cleanup Bare URLs}}
if ((untaggedBareURLcount + BareURLinlineTagcount + BareURLPDFTagcount) == 0) {
if (confirm("[[" + lkrWgPageName + "]] has no bare URL refs.\n\n" + toUnicodeVariant("Remove", 'bold') + " {{Cleanup Bare URLs}}?")) {
return "untag";
} else return null;
} else {
// Already tagged
alert("[[" + lkrWgPageName + "]] is already tagged with {{Cleanup Bare URLs}}");
return null;
}
} else {
// No existing {{Cleanup Bare URLs}} tag
if ((untaggedBareURLcount + BareURLinlineTagcount + BareURLPDFTagcount) == 0) {
alert("[[" + lkrWgPageName + "]] has no bare URLs");
return null;
} else {
// We have bare URLs
var myTotalCountOfBareURLs = untaggedBareURLcount + BareURLinlineTagcount + BareURLPDFTagcount;
var myTotalCountOfTaggedBareURLs = BareURLinlineTagcount + BareURLPDFTagcount;
if (confirm("[[" + lkrWgPageName + "]] has " + myTotalCountOfBareURLs +
" bare URLs:\n* " + myTotalCountOfTaggedBareURLs + " inline-tagged\n* " +
untaggedBareURLcount + " untagged." +
"\n\n" + toUnicodeVariant("Add", 'bold') + " the banner tag {{Cleanup Bare URLs}}?")) {
return "addtag";
}
return null;
}
}
return null; // we shouldn't be able to get here, but just in case ...
}
function lkrCountMatches(myREgEx) {
var count = 0;
while (myREgEx.exec(lkrArticleOriginalText) !== null) {
++count;
}
return count;
}
/**
* (c) David Konrad 2018
* MIT License
*
* Javascript function to convert plain text to unicode variants
*
* Loosely based on the nodejs monotext CLI utility https://github.com/cpsdqs/monotext
* (c) cpsdqs 2016
*
* For more inspiration see http://unicode.org/charts/
*
*/
/*
* supported unicode variants
*
* m: monospace
* b: bold
* i: italic
* c: script
* g: gothic / fraktur
* d: double-struck
* s: sans-serif
* o: circled text
* p: parenthesized latin letters
* w: fullwidth
*/
function toUnicodeVariant(str, variant, flags) {
const offsets = {
m: [0x1d670, 0x1d7f6],
b: [0x1d400, 0x1d7ce],
i: [0x1d434, 0x00030],
bi: [0x1d468, 0x00030],
c: [0x1d49c, 0x00030],
bc: [0x1d4d0, 0x00030],
g: [0x1d504, 0x00030],
d: [0x1d538, 0x1d7d8],
bg: [0x1d56c, 0x00030],
s: [0x1d5a0, 0x1d7e2],
bs: [0x1d5d4, 0x1d7ec],
is: [0x1d608, 0x00030],
bis: [0x1d63c, 0x00030],
o: [0x24B6, 0x2460],
p: [0x249C, 0x2474],
w: [0xff21, 0xff10],
u: [0x2090, 0xff10]
};
const variantOffsets = {
'monospace': 'm',
'bold': 'b',
'italic': 'i',
'bold italic': 'bi',
'script': 'c',
'bold script': 'bc',
'gothic': 'g',
'gothic bold': 'bg',
'doublestruck': 'd',
'sans': 's',
'bold sans': 'bs',
'italic sans': 'is',
'bold italic sans': 'bis',
'parenthesis': 'p',
'circled': 'o',
'fullwidth': 'w'
};
// special characters (absolute values)
var special = {
m: {
' ': 0x2000,
'-': 0x2013
},
i: {
'h': 0x210e
},
g: {
'C': 0x212d,
'H': 0x210c,
'I': 0x2111,
'R': 0x211c,
'Z': 0x2128
},
o: {
'0': 0x24EA,
'1': 0x2460,
'2': 0x2461,
'3': 0x2462,
'4': 0x2463,
'5': 0x2464,
'6': 0x2465,
'7': 0x2466,
'8': 0x2467,
'9': 0x2468,
},
p: {},
w: {}
};
//support for parenthesized latin letters small cases
for (var i = 97; i <= 122; i++) {
special.p[String.fromCharCode(i)] = 0x249C + (i - 97);
}
//support for full width latin letters small cases
for (var iz = 97; iz <= 122; iz++) {
special.w[String.fromCharCode(iz)] = 0xff41 + (iz - 97);
}
const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
const numbers = '0123456789';
var getType = function(variant) {
if (variantOffsets[variant]) return variantOffsets[variant];
if (offsets[variant]) return variant;
return 'm'; //monospace as default
};
var getFlag = function(flag, flags) {
if (!flags) return false;
return flags.split(',').indexOf(flag) > -1;
};
var type = getType(variant);
var underline = getFlag('underline', flags);
var strike = getFlag('strike', flags);
var result = '';
for (var k of str) {
let index;
let c = k;
if (special[type] && special[type][c]) c = String.fromCodePoint(special[type][c]);
if (type && (index = chars.indexOf(c)) > -1) {
result += String.fromCodePoint(index + offsets[type][0]);
} else if (type && (index = numbers.indexOf(c)) > -1) {
result += String.fromCodePoint(index + offsets[type][1]);
} else {
result += c;
}
if (underline) result += '\u0332'; // add combining underline
if (strike) result += '\u0336'; // add combining strike
}
return result;
}
// main code
lkrWgPageName = title;
lkrArticleOriginalText = oldText;
var lkrArticleNewText = null;
var lkr_edit_summary = null;
var myActionString = lkrPreCheck();
if ((myActionString) == null || (myActionString == "")) {
return; // no action
} else if (myActionString == "addtag") {
lkrArticleNewText = "{{Cleanup bare URLs|date=" + monthyear_datestamp() + "}}\n" + lkrArticleOriginalText;
lkr_edit_summary = add_linkrot_edit_summary;
} else if (myActionString == "untag") {
lkrArticleNewText = lkrArticleOriginalText.replaceAll(CleanupBareURLsTagMatcher, '');
// check that removal worked
if (lkrArticleNewText == lkrArticleOriginalText) {
//alert("ERROR\n\nTag removal failed");
return;
}
lkr_edit_summary = remove_linkrot_edit_summary;
}
// sanity check
if (!(lkr_edit_summary && lkrArticleNewText)) {
//alert("ERROR!\n\naction failed: " + myActionString);
return;
}
return {
text: lkrArticleNewText,
summary: lkr_edit_summary
};