Jump to content

User:Ingenuity/ReferenceFixer.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// <nowiki>

const ReferenceFixerData = {};
const BareURLTemplates = [
	"bare",
	"bare links",
	"barelinks",
	"bare references",
	"bare refs",
	"bare urls",
	"bareurls",
	"bare-urls",
	"cleanup link rot",
	"cleanup link-rot",
	"cleanup-link-rot",
	"cleanup-linkrot",
	"link rot",
	"linkrot",
	"cleanup bare urls",
	"lr"
];
let PresentBareURLTemplates = [];

async function ReferenceFixer() {
	const content = await GetPageWikitext();
	const bareReferences = [...content.matchAll(/<ref(?: name="?(.+?)"?)?>((?:\s+)?http.+?(?:\s+)?)<\/ref>/g)];
	const cleaned = [];
	
	for (const item of bareReferences) {
		cleaned.push({ raw: item[2], name: item[1] || null, url: item[2].match(/[^ ]+/)[0] });
	}

	for (const item of BareURLTemplates) {
		const bareURLs = [...content.matchAll(new RegExp(`{{${item}\\|(?:.+?)}}`, "ig"))];

		if (bareURLs) {
			PresentBareURLTemplates.push(...bareURLs);
		}
	}

	let hasDateTemplate = false;

	if (content.toLowerCase().includes("use dmy dates") || content.toLowerCase().includes("use mdy dates")) {
		hasDateTemplate = true;
	}

	ReferenceFixerInterface(cleaned, hasDateTemplate);
}

async function GetPageWikitext() {
	const api = new mw.Api();

	// get the wiktext of the page
	const page = await api.get({
		action: 'query',
		prop: 'revisions',
		rvprop: 'content',
		titles: mw.config.get('wgPageName'),
		formatversion: 2,
		rvslots: '*'
	});

	return page.query.pages[0].revisions[0].slots.main.content;
}

async function ReferenceFixerInterface(cleaned, hasDateTemplate) {
	const container = document.createElement("div");
	const stylesheet = document.createElement("style");

	container.id = "ReferenceFixer";
	stylesheet.id = "ReferenceFixerStylesheet";

	stylesheet.innerHTML = `
		#ReferenceFixer {
			position: fixed;
			top: calc(50% - 250px);
			left: calc(50% - 400px);
			width: 800px;
			height: 500px;
			background-color: #fafafa;
			border: 1px solid #ccc;
			border-radius: 5px;
			overflow-y: auto;
			padding-bottom: 50px;
			box-sizing: border-box;
		}

		#ReferenceFixerHeader, #ReferenceFixerFooter {
			display: flex;
			justify-content: space-between;
			align-items: center;
			padding: 10px;
			background-color: #eee;
			user-select: none;
			position: sticky;
			top: 0;
			z-index: 1;
		}

		#ReferenceFixerFooter {
			position: sticky;
			height: 50px;
			box-sizing: border-box;
			width: 100%;
			top: 100%;
		}

		.ReferenceFixerItem, .ReferenceFixerMeta {
			padding: 10px 20px;
			border-bottom: 1px solid #ddd;
			position: relative;
			top: -50px;
		}

		.ReferenceFixerSection {
			padding-top: 7px;
		}

		.ReferenceFixerSectionTitle {
			font-size: 0.9em;
			display: block;
		}

		.ReferenceFixerURL {
			white-space: nowrap;
			overflow-x: hidden;
		}

		.ReferenceFixerStatus {
			user-select: none;
			cursor: pointer;
		}

		input[type=number] {
			width: 75px;
		}
	`;

	container.innerHTML = `
		<div id="ReferenceFixerHeader">
			<div>
				<span>Reference Fixer</span>
				<span id="ReferenceFixerLoading">(loading <span id="ReferenceFixerLoaded">0</span> of ${cleaned.length})</span>
			</div>
			<div>
				<span onclick="ReferenceFixerSettings()" style="cursor: pointer;">[settings]</span>
				<span onclick="CloseReferenceFixer()" style="cursor: pointer;">[close]</span>
			</div>
		</div>
		<div id="ReferenceFixerFooter">
			<div>
				<input type="checkbox" id="ReferenceFixerAddArchives" checked>
				<label for="ReferenceFixerRemoveTemplates" style="font-size: 0.8em;">Add archive URLs where possible</label>
				<div style="margin-left: 10px; display: ${PresentBareURLTemplates.length ? "inline" : "none"};">
					<input type="checkbox" id="ReferenceFixerRemoveTemplates" checked>
					<label for="ReferenceFixerRemoveTemplates" style="font-size: 0.8em;">Remove bare URL templates</label>
				</div>
			</div>
			<div>
				<button onclick="ReferenceFixerSave()" id="ReferenceFixerSaveButton">Save</button>
			</div>
		</div>
		<div class="ReferenceFixerMeta" style="display: ${hasDateTemplate ? "none" : "block"};">
			<span>This article does not have a standardized date format set. Would you like to add one?</span>
			<select id="ReferenceFixerDateTemplate">
				<option value="none">No, do not add a date format</option>
				<option value="dmy">{{Use dmy dates}}</option>
				<option value="mdy">{{Use mdy dates}}</option>
			</select>
		</div>
	`;

	let totalLoaded = 0;

	for (let i = 0; i < cleaned.length; i++) {
		const { raw, url, name } = cleaned[i];

		ReferenceFixerItem(url, raw, name, container).then(() => {
			document.getElementById("ReferenceFixerLoaded").innerHTML = ++totalLoaded;
			if (totalLoaded === cleaned.length) {
				document.getElementById("ReferenceFixerLoading").style.display = "none";
			}
		});
	}

	if (cleaned.length === 0) {
		container.innerHTML += `
			<div style="padding: 20px;">No bare references found.</div>
		`;
	}

	document.body.appendChild(container);
	document.head.appendChild(stylesheet);
}

async function ReferenceFixerItem(url, raw, name, container) {
	let day = "", month = "", year = "";
	let title = "", websiteName = "";
	const itemId = Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);

	const item = document.createElement("div");
	item.classList.add("ReferenceFixerItem");
	item.id = itemId;

	item.innerHTML = `
		<div class="ReferenceFixerURL">
			<span class="ReferenceFixerStatus" style="font-weight: bold;"></span>
			<a href="${url}" target="_blank" class="ReferenceFixerLink">${url}</a>
		</div>
		<div class="ReferenceFixerOptions">
			<div>
				<span class="ReferenceFixerSectionTitle">Reference type:</span>
				<select class="type">
					<option value="web">{{cite web}}</option>
					<option value="book">{{cite book}}</option>
					<option value="newspaper">{{cite news}}</option>
					<option value="journal">{{cite journal}}</option>
					<option value="magazine">{{cite magazine}}</option>
					<option value="encyclopedia">{{cite encyclopedia}}</option>
					<option value="thesis">{{cite thesis}}</option>
					<option value="tweet">{{cite tweet}}</option>
				</select>
			</div>
			<div class="ReferenceFixerSection">
				<span class="ReferenceFixerSectionTitle">Author name:</span>
				<input type="text" placeholder="First" class="first1">
				<input type="text" placeholder="Last" class="last1">
			</div>
			<div class="ReferenceFixerSection">
				<span class="ReferenceFixerSectionTitle">Title:</span>
				<input type="text" placeholder="Title" value="${title}" class="title">
				<input type="text" placeholder="Website title" value="${websiteName}" class="name">
			</div>
			<div class="ReferenceFixerSection">
				<span class="ReferenceFixerSectionTitle">Date:</span>
				<input type="number" placeholder="Day" value="${day}" class="day">
				<input type="number" placeholder="Month" value="${month}" class="month">
				<input type="number" placeholder="Year" value="${year}" class="year">
			</div>
			<div class="ReferenceFixerSection">
				<span class="ReferenceFixerSectionTitle">Archive:
					<a href="https://web.archive.org/web/*/${url}" target="_blank">(search)</a>&nbsp;
					<span class="archiveloading">Loading archive...</span>
				</span>
				<input type="text" placeholder="URL" class="archiveurl">
				<input type="number" placeholder="Day" class="archiveday">
				<input type="number" placeholder="Month" class="archivemonth">
				<input type="number" placeholder="Year" class="archiveyear">
			</div>
			<div class="ReferenceFixerSection">
				<span class="ReferenceFixerSectionTitle">URL status:</span>
				<select class="urlstatus">
					<option value="live">Live</option>
					<option value="dead">Dead</option>
				</select>
			</div>
			<div class="ReferenceFixerSection">
				<button class="ReferenceFixerSave">Save</button>
				<button class="ReferenceFixerIgnore">Ignore</button>
				<button class="ReferenceFixerRemove">Remove</button>
			</div>
		</div>
	`;

	container.appendChild(item);

	item.querySelector(".ReferenceFixerSave").addEventListener("click", () => {
		item.querySelector(".ReferenceFixerOptions").style.display = "none";
		item.querySelector(".ReferenceFixerStatus").innerHTML = "[SAVED]";
		item.querySelector(".ReferenceFixerStatus").style.color = "green";

		ReferenceFixerData[itemId] = {
			url: url,
			raw: raw,
			text: GenerateCitationWikitext(item),
			name: name
		};
	});

	item.querySelector(".ReferenceFixerIgnore").addEventListener("click", () => {
		item.querySelector(".ReferenceFixerOptions").style.display = "none";
		item.querySelector(".ReferenceFixerStatus").innerHTML = "[IGNORED]";
		item.querySelector(".ReferenceFixerStatus").style.color = "grey";
	});

	item.querySelector(".ReferenceFixerRemove").addEventListener("click", () => {
		item.querySelector(".ReferenceFixerOptions").style.display = "none";
		item.querySelector(".ReferenceFixerStatus").innerHTML = "[REMOVED]";
		item.querySelector(".ReferenceFixerStatus").style.color = "red";

		ReferenceFixerData[itemId] = {
			url: url,
			raw: raw,
			text: GenerateCitationWikitext(item),
			name: name
		};
	});

	item.querySelector(".ReferenceFixerStatus").addEventListener("click", () => {
		item.querySelector(".ReferenceFixerOptions").style.display = "block";
		item.querySelector(".ReferenceFixerStatus").innerHTML = "";
	});

	const archive = await ReferenceFixerGetArchive(url);

	if (archive.url.length) {
		item.querySelector(".archiveurl").value = archive.url;
		item.querySelector(".archiveday").value = archive.day;
		item.querySelector(".archivemonth").value = archive.month;
		item.querySelector(".archiveyear").value = archive.year;

		item.querySelector(".archiveloading").innerHTML = `<a href="${archive.url}" target="_blank">(view archive)</a>`;
	} else {
		item.querySelector(".archiveloading").innerHTML = "No archive found";
	}
}

function CloseReferenceFixer() {
	document.getElementById("ReferenceFixer").remove();
	document.getElementById("ReferenceFixerStylesheet").remove();
}

async function ReferenceFixerGetArchive(url) {
	const response = await fetch("https://archive.org/wayback/available?url=" + url);
	const json = await response.json();

	if (!json["archived_snapshots"] || !json["archived_snapshots"]["closest"]) {
		return { url: "", day: "", month: "", year: "" };
	}

	const { timestamp, url: archiveURL } = json["archived_snapshots"]["closest"];
	const [_, year, month, day] = timestamp.match(/(\d{4})(\d{2})(\d{2})/);

	return { url: archiveURL, day, month, year };
}

function ReferenceFixerSettings() {

}

function GenerateCitationWikitext(item) {
	const type = item.querySelector(".type").value;
	const first1 = item.querySelector(".first1").value;
	const last1 = item.querySelector(".last1").value;
	const title = item.querySelector(".title").value;
	const name = item.querySelector(".name").value;
	const day = item.querySelector(".day").value;
	const month = item.querySelector(".month").value;
	const year = item.querySelector(".year").value;
	const archiveurl = item.querySelector(".archiveurl").value;
	const archiveday = item.querySelector(".archiveday").value;
	const archivemonth = item.querySelector(".archivemonth").value;
	const archiveyear = item.querySelector(".archiveyear").value;
	const url = item.querySelector(".ReferenceFixerLink").href;
	const urlstatus = item.querySelector(".urlstatus").value;

	let parameters = `|url=${url} |access-date=${GetDate(new Date().getFullYear(), new Date().getMonth() + 1, new Date().getDate())}`;
	if (first1 && last1) {
		parameters += ` |first1=${first1} |last1=${last1}`;
	}

	if (title) {
		parameters += ` |title=${title}`;
	}

	if (name) {
		parameters += ` |website=${name}`;
	}

	if (year && month) {
		parameters += ` |date=${GetDate(year, month, day)}`;
	}

	if (archiveurl && archiveyear && archivemonth) {
		parameters += ` |archive-url=${archiveurl} |archive-date=${GetDate(archiveyear, archivemonth, archiveday)}`;
	}

	if (urlstatus) {
		parameters += ` |url-status=${urlstatus}`;
	}

	const citation = `{{cite ${type} ${parameters}}}`;
	return citation;
}

function GetDate(year, month, day) {
	if (!year || !month) {
		return "";
	}
	const date = new Date();
	date.setFullYear(year);
	date.setMonth(month - 1);
	date.setDate(day || 1);
	return `${date.getFullYear()}-${padNumber(date.getMonth() + 1, 2)}-${padNumber(date.getDate(), 2)}`;
}

function padNumber(number, length) {
	return number.toString().padStart(length, "0");
}

async function ReferenceFixerSave() {
	const items = [...document.querySelectorAll(".ReferenceFixerItem")];
	const removeTemplates = document.getElementById("ReferenceFixerRemoveTemplates").checked;
	const summaryFragments = [];
	let fixed = 0, removed = 0, archived = 0;
	let content = await GetPageWikitext();
	const dateTemplate = document.getElementById("ReferenceFixerDateTemplate").value;
	const addArchives = document.getElementById("ReferenceFixerAddArchives").checked;

	document.getElementById("ReferenceFixerSaveButton").innerHTML = "Preparing...";
	document.getElementById("ReferenceFixerSaveButton").disabled = true;

	for (const item of items) {
		const status = item.querySelector(".ReferenceFixerStatus").innerHTML;
		const itemData = ReferenceFixerData[item.id];
		if (status === "[SAVED]") {
			const regex = new RegExp(`<ref(.+?)?>${EscapeRegExp(itemData.raw)}<\/ref>`, "gi");
			content = content.replace(regex, `<ref$1>${itemData.text}</ref>`);
			fixed++;
		}

		if (status === "[REMOVED]") {
			content = content.replace(new RegExp(`<ref(.+?)?>${EscapeRegExp(itemData.raw)}<\/ref>`, "gi"), "");
			if (itemData.name) {
				content = content.replaceAll(new RegExp("<ref name=\"?" + itemData.name + "\"? ?/>", "g"), "");
			}
			removed++;
		}
	}

	if (fixed) {
		summaryFragments.push(`formatted ${fixed} reference${fixed > 1 ? "s" : ""}`);
	}

	if (removed) {
		summaryFragments.push(`removed ${removed} dead or unreliable reference${removed > 1 ? "s" : ""}`);
	}

	if (dateTemplate === "mdy" || dateTemplate === "dmy") {
		const monthNames = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"];
		const date = monthNames[new Date().getMonth()] + " " + new Date().getFullYear();
		content = `{{use ${dateTemplate} dates|date=${date}}}\n` + content;
		summaryFragments.push(`added {{use ${dateTemplate} dates}} template`);
	}

	if (removeTemplates && PresentBareURLTemplates.length) {
		for (const item of PresentBareURLTemplates) {
			content = content.replaceAll(item, "");
		}

		summaryFragments.push(`removed bare URL template${PresentBareURLTemplates.length > 1 ? "s" : ""}`);
	}

	if (addArchives) {
		const regex = /<ref(?: name="?(?:[^\/]+?)"?)?>({{cite .+?}})<\/ref>/gi;
		const matches = [...content.matchAll(regex)]
			.map(match => match[1]);
		
		for (const item of matches) {
			if (item.toLowerCase().includes("archive-url") || item.toLowerCase().includes("archiveurl")) {
				continue;
			}

			if (item.toLowerCase().includes("cite book") || item.toLowerCase().includes("cite journal")) {
				continue;
			}

			const url = item.match(/\|url=([^\|}]+)/i);

			if (!url || url.length < 2 || url[1].toLowerCase().includes("archive.org")) {
				continue;
			}

			const archive = await ReferenceFixerGetArchive(url[1]);

			if (!archive || !archive.url) {
				continue;
			}

			const toAdd = ` |archive-url=${archive.url} |archive-date=${GetDate(archive.year, archive.month, archive.day)} |url-status=live`;
			const newCite = `{{${item.match(/^{{(.+?)}}$/i)[1]}${toAdd}}}`;
			archived++;

			content = content.replace(item, newCite);
		}
	}

	if (archived) {
		summaryFragments.push(`archived ${archived} reference${archived > 1 ? "s" : ""}`);
	}

	const cleaned = FixPunctuation(content)
		.replaceAll("”", "\"")
		.replaceAll("“", "\"")
		.replaceAll("‘", "'")
		.replaceAll("’", "'");

	if (cleaned !== content) {
		content = cleaned;
		summaryFragments.push("cleaned up punctuation");
	}

	content = ReplaceMultipleIssues(content);

	const summary = summaryFragments.join(", ") + " ([[User:Ingenuity/ReferenceFixer.js|ReferenceFixer]])";
	
	document.getElementById("ReferenceFixerSaveButton").innerHTML = "Saving...";
	await SavePageWikitext(content, summary);

	CloseReferenceFixer();
	location.reload();
}

async function SavePageWikitext(content, summary) {
	const api = new mw.Api();

	return await api.postWithEditToken({
		action: "edit",
		title: mw.config.get("wgPageName"),
		text: content,
		summary,
		minor: true
	});
}

function ReplaceMultipleIssues(content) {
	const issuesTag = [...content.matchAll(/{{multiple issues\|((?:\s+)?(?:{{[^\n]+?}}(?:\s+)?)+)}}/gmis)];

	if (!issuesTag.length) {
		return content;
	}

	const tags = [...issuesTag[0][1].matchAll(/{{.+?}}/gi)];

	if (tags.length !== 1) {
		return content;
	}

	return content.replace(issuesTag[0][0], tags[0][0]);
}

function EscapeRegExp(string) {
	return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

function FixPunctuation(content) {
	let punctRegexes = [
		[/ ?(<ref(?: name ?= ?"[^<>]{0,30}")?>(?:(?!<\/ref>).)+?<\/ref>)/gs, "$1"],
		[/ ?(<ref name ?= ?"[^<>]{0,30}" ?\/>)/gs, "$1"],
		[/(<ref(?: name ?= ?"[^<>]{0,30}")?>(?:(?!<\/ref>).)+?<\/ref>)[\n ]?([,\.\?\!\;])/gs, "$2$1"],
		[/(<ref name ?= ?"[^<>]{0,30}" ?\/>)[\n ]?([,\.\?\!\;])/gs, "$2$1"]
	];
	
	let cleaned = RunRegexes(content, punctRegexes);

	while (cleaned !== content) {
		content = cleaned;
		cleaned = RunRegexes(content, punctRegexes);
	}

	return cleaned;
}

function RunRegexes(text, list) {
	for (let item of list) {
		text = text.replaceAll(item[0], item[1]);
	}
	return text;
}

if ([0, 2, 118].includes(mw.config.get("wgNamespaceNumber"))) {
	mw.util.addPortletLink("p-cactions", "#", "ReferenceFixer", "ca-reffixer", null, null, "#ca-reffixer"); 
	document.querySelector("#ca-reffixer").addEventListener("click", ReferenceFixer);
}

// </nowiki>