User:Harej/citation-watchlist-staging.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
Documentation for this user script can be added at User:Harej/citation-watchlist-staging. |
/*
Wiki Configuration for Citation Watchlist
Leave the "new Set()" lines alone.
*/
const LANGUAGE = 'en';
const FAMILY = 'wikipedia';
const actionApiEndpoint = `https://${LANGUAGE}.${FAMILY}.org/w/api.php`;
const publicSuffixList = "Wikipedia:Citation_Watchlist/Public_Suffix_List";
const listOfLists = "Wikipedia:Citation_Watchlist/Lists";
const indicators = {
warning: {
msg: "Warning",
emoji: '\u2757',
section: "==Warn==",
priority: 3,
list: new Set()
},
caution: {
msg: "Caution",
emoji: '\u270B',
section: "==Caution==",
priority: 2,
list: new Set()
},
inspect: {
msg: "Inspect",
emoji: '\uD83D\uDD0E',
section: "==Inspect==",
priority: 1,
list: new Set()
}
};
/*
Citation Watchlist Script – Highlights watchlist entries when questionable
sources are added
author: Hacks/Hackers
license: GPL 3.0
*/
let publicSuffixSet = new Set();
const namespacesObj = mw.config.get('wgFormattedNamespaces');
const namespaces = Object.entries(namespacesObj)
.filter(([num, name]) => num !== '0' && num !== '118')
.map(([_, name]) => name.replace(/ /g, '_') + ':');
async function analyzeView() {
const ns = mw.config.get('wgNamespaceNumber');
if (![-1, 0, 118].includes(ns)) {
return;
}
publicSuffixSet = await fetchPublicSuffixList();
if (publicSuffixSet.size === 0) {
console.error('Public Suffix List loading failed');
return;
}
console.log("Welcome to Citation Watchlist");
const listPages = await fetchDomainListPages(listOfLists);
if (listPages) {
const lists = await fetchAndOrganizeDomainLists(listPages);
if (lists) {
for (const type in indicators) {
lists[type].list.forEach(indicators[type].list.add, indicators[type].list);
}
}
}
const entriesContainers = document.querySelectorAll('.mw-changeslist-links');
let noLinks = true;
for (const container of entriesContainers) {
const diffLink = container.querySelector('a.mw-changeslist-diff');
const histLink = container.querySelector('a.mw-changeslist-history');
const prevLink = container.querySelector(
'a.mw-history-histlinks-previous');
const curLink = container.querySelector('a.mw-history-histlinks-current');
let revision = null;
let urlParams = '';
if (diffLink) {
noLinks = false;
const diffUrl = new URL(diffLink.href);
urlParams = new URLSearchParams(diffUrl.search);
const pageTitle = urlParams.get('title');
if (isNotArticle(pageTitle)) continue;
revision = {
oldrevision: urlParams.get('diff'),
newrevision: urlParams.get('oldid'),
element: diffLink.parentNode.parentNode
};
if (revision.oldrevision == 'prev') { // This happens on user contributions pages
const previousRevisionMap = await fetchPreviousRevisionIds(
[revision.newrevision]);
revision.oldrevision = revision.newrevision;
revision.newrevision = previousRevisionMap[revision.newrevision];
}
} else if (histLink) {
noLinks = false;
const histUrl = new URL(histLink.href);
urlParams = new URLSearchParams(histUrl.search);
const pageTitle = urlParams.get('title');
if (isNotArticle(pageTitle)) continue;
const firstID = await fetchFirstRevisionId(pageTitle);
if (!firstID) continue;
revision = {
oldrevision: firstID,
element: histLink.parentNode.parentNode
};
} else if (prevLink) {
noLinks = false;
urlParams = new URLSearchParams(prevLink.href);
const previousRevisionMap = await fetchPreviousRevisionIds(
[urlParams.get('oldid')]);
revision = {
oldrevision: urlParams.get('oldid'),
newrevision: previousRevisionMap[urlParams.get('oldid')],
element: prevLink.parentNode.parentNode
};
} else if (curLink) {
noLinks = false;
urlParams = new URLSearchParams(curLink.href);
revision = {
oldrevision: urlParams.get('oldid'),
element: curLink.parentNode.parentNode
};
}
if (revision) {
await analyzeRevision(revision);
}
}
// If no links were found, extract the first revision ID
if (noLinks == true) {
const pageTitle = mw.config.get('wgTitle');
const firstID = await fetchFirstRevisionId(pageTitle);
revision = {
oldrevision: firstID,
element: entriesContainers[0]
};
await analyzeRevision(revision);
}
}
async function analyzeRevision(revision) {
const lookup = [revision.oldrevision];
if (revision.newrevision) { lookup.push(revision.newrevision); }
const wikitext = await fetchRevisionContent(lookup);
const fromURLs = new Set(extractAddedURLs(wikitext.oldrevision) || []);
const toURLs = new Set(extractAddedURLs(wikitext.newrevision) || []);
let addedURLs = [];
if (revision.newrevision) {
addedURLs = [...toURLs].filter(url => !fromURLs.has(url));
} else addedURLs = Array.from(fromURLs);
console.log(`Revision element: ${revision.element.innerHTML}
Added URLs: ${addedURLs.join(' ')}
`);
const matchedDomains = Object.keys(indicators).reduce((acc, key) => {
acc[key] = [];
return acc;
}, {});
for (const url of addedURLs) {
const hostname = new URL(url).hostname;
const domain = getRootDomain(hostname, publicSuffixSet);
let highestPriorityType = null;
for (const type in indicators) {
if (indicators[type].list.has(domain)) {
if (highestPriorityType === null || indicators[type].priority >
indicators[highestPriorityType].priority) {
highestPriorityType = type;
}
}
}
if (highestPriorityType !== null && !matchedDomains[highestPriorityType]
.includes(domain)) {
matchedDomains[highestPriorityType].push(domain);
for (const type in indicators) {
if (indicators[type].priority < indicators[highestPriorityType].priority) {
matchedDomains[type] = matchedDomains[type].filter(d => d !==
domain);
}
}
}
}
for (const type in indicators) {
if (matchedDomains[type].length > 0) {
prependEmojiWithTooltip(revision.element, type, matchedDomains[type]);
}
}
}
function prependEmojiWithTooltip(element, type, domains) {
const indicator = indicators[type];
if (!indicator || element.getAttribute(`data-processed-${type}`) === 'true') {
return;
}
const emojiSpan = document.createElement('span');
emojiSpan.textContent = indicator.emoji + " ";
emojiSpan.title = `${indicator.msg}: ${domains.join(", ")}`;
element.parentNode.insertBefore(emojiSpan, element);
element.setAttribute(`data-processed-${type}`, 'true');
}
async function getFirstPage(data) {
if (!data || !data.query || !data.query.pages) return null;
const pages = data.query.pages;
return Object.values(pages)[0]; // Return the first page
}
async function getFirstRevision(page) {
if (page.revisions && page.revisions.length > 0) {
return page.revisions[0];
}
return null;
}
async function fetchRevisionContent(revIds) {
const data = await fetchRevisionData({
revids: revIds,
rvprop: ['content'],
rvslots: ['main']
});
const page = await getFirstPage(data);
const wikitext = { oldrevision: null, newrevision: null };
if (page.revisions && page.revisions.length > 0) {
wikitext.oldrevision = page.revisions[0].slots.main['*'] || null;
if (page.revisions.length > 1) {
wikitext.newrevision = page.revisions[1].slots.main['*'] || null;
}
}
return wikitext;
}
async function fetchPreviousRevisionIds(revisionIds) {
const data = await fetchRevisionData({
revids: revisionIds,
rvprop: ['ids']
});
const page = await getFirstPage(data);
if (!page) return {};
const revisionMap = {};
for (const revision of page.revisions) {
revisionMap[revision.revid] = revision.parentid;
}
return revisionMap;
}
async function fetchFirstRevisionId(pageTitle) {
const data = await fetchRevisionData({
titles: [pageTitle],
rvlimit: 1,
rvdir: 'newer',
rvprop: ['ids'],
});
const page = await getFirstPage(data);
if (!page) return null;
const revision = await getFirstRevision(page);
return revision ? revision.revid : null;
}
async function fetchDomainListPages(pageName) {
const cacheKey = `citationWatchlistFetchDomainListPages_${pageName}`;
const cacheExpiration = 4 * 60 * 60 * 1000;
const now = Date.now();
const cachedData = localStorage.getItem(cacheKey);
const cachedTimestamp = localStorage.getItem(`${cacheKey}_timestamp`);
if (cachedData && cachedTimestamp && (now - parseInt(cachedTimestamp, 10)) <
cacheExpiration) {
console.log("Loaded list of lists from cache");
return JSON.parse(cachedData);
}
const data = await fetchRevisionData({
titles: [pageName],
rvprop: ['content'],
rvslots: ['*']
});
const page = await getFirstPage(data);
if (!page) return [];
const content = page.revisions[0].slots.main['*'];
const pageTitles = [];
const lines = content.split('\n');
for (let line of lines) {
if (line.startsWith('* [[')) {
const match = line.match(
/\[\[([^\]]+)\]\]/); // Matches the first instance of [[Page Title]]
if (match) {
pageTitles.push(match[1]);
}
}
}
localStorage.setItem(cacheKey, JSON.stringify(pageTitles));
localStorage.setItem(`${cacheKey}_timestamp`, now.toString());
console.log("Loaded from API and stored in cache");
return pageTitles;
}
async function fetchAndOrganizeDomainLists(pageNames) {
const data = await fetchRevisionData({
titles: pageNames,
rvprop: ['content'],
rvslots: ['*'],
});
const pages = data.query.pages;
for (const pageId in pages) {
const content = pages[pageId].revisions[0].slots.main['*'];
let currentList = null;
const lines = content.split('\n');
for (let line of lines) {
for (const type in indicators) {
if (line.trim() === indicators[type].section) {
currentList = indicators[type].list;
break;
}
}
if (line.startsWith('*') && currentList) {
const domain = line.substring(1).trim();
currentList.add(domain);
}
}
}
return indicators;
}
async function fetchPublicSuffixList() {
const pslUrl =
`https://${LANGUAGE}.${FAMILY}.org/wiki/${publicSuffixList}?action=raw`;
console.log(`Raw page text request: ${pslUrl}`);
const content = await safeFetch(fetch, pslUrl).then(response => response ?
response.text() : null);
if (!content) return new Set();
const suffixSet = new Set();
const lines = content.split('\n');
for (const line of lines) {
if (line.trim() && !line.trim().startsWith('//')) {
suffixSet.add(line.trim());
}
}
return suffixSet;
}
async function fetchRevisionData(data) {
const paramKeys = ['rvprop', 'revids', 'titles', 'rvslots'];
const params = {
action: 'query',
prop: 'revisions',
format: 'json',
rvdir: data.rvdir || 'older',
origin: '*'
};
if (data.rvlimit) { params.rvlimit = data.rvlimit; }
paramKeys.forEach(key => {
if (data[key]) {
params[key] = Array.isArray(data[key]) ? data[key].join('|') : data[key];
}
});
const api = new mw.Api();
return await safeFetch(api.get.bind(api), params);
}
async function safeFetch(fn, ...args) {
try {
return await fn(...args);
} catch (error) {
console.error(`Error during ${fn.name}:`, error);
return null;
}
}
function extractAddedURLs(wikitext) {
const addedURLs = [];
const urlRegex = /https?:\/\/[^\s<"]+/g;
let match;
while ((match = urlRegex.exec(wikitext)) !== null) {
try {
const url = new URL(match[0]);
addedURLs.push(url.href);
} catch (error) {
console.error(`Invalid URL rejected: ${match[0]}`);
}
}
return addedURLs;
}
function getRootDomain(hostname, publicSuffixSet) {
const domainParts = hostname.split('.');
for (let i = 0; i < domainParts.length; i++) {
const candidate = domainParts.slice(i).join('.');
if (publicSuffixSet.has(candidate) || publicSuffixSet.has(
`!${candidate}`)) {
return domainParts.slice(i - 1).join('.');
}
}
return hostname;
}
function isNotArticle(pageTitle) {
return namespaces.some(namespace => pageTitle.startsWith(namespace));
}
analyzeView().then(() => console.log(
'Citation Watchlist script finished executing'));