Jump to content

User:Harej/citation-watchlist-staging.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/*

Wiki Configuration for Citation Watchlist
Leave the "new Set()" lines alone.

*/
const LANGUAGE = 'en';
const FAMILY = 'wikipedia';
const actionApiEndpoint = `https://${LANGUAGE}.${FAMILY}.org/w/api.php`;
const publicSuffixList = "Wikipedia:Citation_Watchlist/Public_Suffix_List";
const listOfLists = "Wikipedia:Citation_Watchlist/Lists";
const indicators = {
  warning: {
    msg: "Warning",
    emoji: '\u2757',
    section: "==Warn==",
    priority: 3,
    list: new Set()
  },
  caution: {
    msg: "Caution",
    emoji: '\u270B',
    section: "==Caution==",
    priority: 2,
    list: new Set()
  },
  inspect: {
    msg: "Inspect",
    emoji: '\uD83D\uDD0E',
    section: "==Inspect==",
    priority: 1,
    list: new Set()
  }
};

/*
Citation Watchlist Script – Highlights watchlist entries when questionable
sources are added

author: Hacks/Hackers
license: GPL 3.0
*/

let publicSuffixSet = new Set();
const namespacesObj = mw.config.get('wgFormattedNamespaces');
const namespaces = Object.entries(namespacesObj)
  .filter(([num, name]) => num !== '0' && num !== '118')
  .map(([_, name]) => name.replace(/ /g, '_') + ':');

async function analyzeView() {
  const ns = mw.config.get('wgNamespaceNumber');
  if (![-1, 0, 118].includes(ns)) {
    return;
  }
  publicSuffixSet = await fetchPublicSuffixList();
  if (publicSuffixSet.size === 0) {
    console.error('Public Suffix List loading failed');
    return;
  }
  console.log("Welcome to Citation Watchlist");
  const listPages = await fetchDomainListPages(listOfLists);
  if (listPages) {
    const lists = await fetchAndOrganizeDomainLists(listPages);
    if (lists) {
      for (const type in indicators) {
        lists[type].list.forEach(indicators[type].list.add, indicators[type].list);
      }
    }
  }
  const entriesContainers = document.querySelectorAll('.mw-changeslist-links');
  let noLinks = true;
  for (const container of entriesContainers) {
    const diffLink = container.querySelector('a.mw-changeslist-diff');
    const histLink = container.querySelector('a.mw-changeslist-history');
    const prevLink = container.querySelector(
      'a.mw-history-histlinks-previous');
    const curLink = container.querySelector('a.mw-history-histlinks-current');
    let revision = null;
    let urlParams = '';
    if (diffLink) {
      noLinks = false;
      const diffUrl = new URL(diffLink.href);
      urlParams = new URLSearchParams(diffUrl.search);
      const pageTitle = urlParams.get('title');
      if (isNotArticle(pageTitle)) continue;
      revision = {
        oldrevision: urlParams.get('diff'),
        newrevision: urlParams.get('oldid'),
        element: diffLink.parentNode.parentNode
      };
      if (revision.oldrevision == 'prev') { // This happens on user contributions pages
        const previousRevisionMap = await fetchPreviousRevisionIds(
        	[revision.newrevision]);
        revision.oldrevision = revision.newrevision;
        revision.newrevision = previousRevisionMap[revision.newrevision];
      }
    } else if (histLink) {
      noLinks = false;
      const histUrl = new URL(histLink.href);
      urlParams = new URLSearchParams(histUrl.search);
      const pageTitle = urlParams.get('title');
      if (isNotArticle(pageTitle)) continue;
      const firstID = await fetchFirstRevisionId(pageTitle);
      if (!firstID) continue;
      revision = {
        oldrevision: firstID,
        element: histLink.parentNode.parentNode
      };
    } else if (prevLink) {
      noLinks = false;
      urlParams = new URLSearchParams(prevLink.href);
      const previousRevisionMap = await fetchPreviousRevisionIds(
      	[urlParams.get('oldid')]);
      revision = {
        oldrevision: urlParams.get('oldid'),
        newrevision: previousRevisionMap[urlParams.get('oldid')],
        element: prevLink.parentNode.parentNode
      };
    } else if (curLink) {
      noLinks = false;
      urlParams = new URLSearchParams(curLink.href);
      revision = {
        oldrevision: urlParams.get('oldid'),
        element: curLink.parentNode.parentNode
      };
    }
    if (revision) {
      await analyzeRevision(revision);
    }
  }
  // If no links were found, extract the first revision ID
  if (noLinks == true) {
    const pageTitle = mw.config.get('wgTitle');
    const firstID = await fetchFirstRevisionId(pageTitle);
    revision = {
      oldrevision: firstID,
      element: entriesContainers[0]
    };
    await analyzeRevision(revision);
  }
}

async function analyzeRevision(revision) {
  const lookup = [revision.oldrevision];
  if (revision.newrevision) { lookup.push(revision.newrevision); }
  const wikitext = await fetchRevisionContent(lookup);
  const fromURLs = new Set(extractAddedURLs(wikitext.oldrevision) || []);
  const toURLs = new Set(extractAddedURLs(wikitext.newrevision) || []);
  let addedURLs = [];
  if (revision.newrevision) {
    addedURLs = [...toURLs].filter(url => !fromURLs.has(url));
  } else addedURLs = Array.from(fromURLs);
  console.log(`Revision element: ${revision.element.innerHTML}
  Added URLs: ${addedURLs.join(' ')}
  `);
  const matchedDomains = Object.keys(indicators).reduce((acc, key) => {
    acc[key] = [];
    return acc;
  }, {});
  for (const url of addedURLs) {
    const hostname = new URL(url).hostname;
    const domain = getRootDomain(hostname, publicSuffixSet);
    let highestPriorityType = null;
    for (const type in indicators) {
      if (indicators[type].list.has(domain)) {
        if (highestPriorityType === null || indicators[type].priority >
          indicators[highestPriorityType].priority) {
          highestPriorityType = type;
        }
      }
    }
    if (highestPriorityType !== null && !matchedDomains[highestPriorityType]
      .includes(domain)) {
      matchedDomains[highestPriorityType].push(domain);
      for (const type in indicators) {
        if (indicators[type].priority < indicators[highestPriorityType].priority) {
          matchedDomains[type] = matchedDomains[type].filter(d => d !==
            domain);
        }
      }
    }
  }
  for (const type in indicators) {
    if (matchedDomains[type].length > 0) {
      prependEmojiWithTooltip(revision.element, type, matchedDomains[type]);
    }
  }
}

function prependEmojiWithTooltip(element, type, domains) {
  const indicator = indicators[type];
  if (!indicator || element.getAttribute(`data-processed-${type}`) === 'true') {
    return;
  }
  const emojiSpan = document.createElement('span');
  emojiSpan.textContent = indicator.emoji + " ";
  emojiSpan.title = `${indicator.msg}: ${domains.join(", ")}`;
  element.parentNode.insertBefore(emojiSpan, element);
  element.setAttribute(`data-processed-${type}`, 'true');
}

async function getFirstPage(data) {
  if (!data || !data.query || !data.query.pages) return null;
  const pages = data.query.pages;
  return Object.values(pages)[0]; // Return the first page
}

async function getFirstRevision(page) {
  if (page.revisions && page.revisions.length > 0) {
    return page.revisions[0];
  }
  return null;
}

async function fetchRevisionContent(revIds) {
  const data = await fetchRevisionData({
    revids: revIds,
    rvprop: ['content'],
    rvslots: ['main']
  });
  const page = await getFirstPage(data);
  const wikitext = { oldrevision: null, newrevision: null };
  if (page.revisions && page.revisions.length > 0) {
    wikitext.oldrevision = page.revisions[0].slots.main['*'] || null;
    if (page.revisions.length > 1) {
      wikitext.newrevision = page.revisions[1].slots.main['*'] || null;
    }
  }
  return wikitext;
}

async function fetchPreviousRevisionIds(revisionIds) {
  const data = await fetchRevisionData({
    revids: revisionIds,
    rvprop: ['ids']
  });
  const page = await getFirstPage(data);
  if (!page) return {};
  const revisionMap = {};
  for (const revision of page.revisions) {
    revisionMap[revision.revid] = revision.parentid;
  }
  return revisionMap;
}

async function fetchFirstRevisionId(pageTitle) {
  const data = await fetchRevisionData({
    titles: [pageTitle],
    rvlimit: 1,
    rvdir: 'newer',
    rvprop: ['ids'],
  });
  const page = await getFirstPage(data);
  if (!page) return null;
  const revision = await getFirstRevision(page);
  return revision ? revision.revid : null;
}

async function fetchDomainListPages(pageName) {
  const cacheKey = `citationWatchlistFetchDomainListPages_${pageName}`;
  const cacheExpiration = 4 * 60 * 60 * 1000;
  const now = Date.now();
  const cachedData = localStorage.getItem(cacheKey);
  const cachedTimestamp = localStorage.getItem(`${cacheKey}_timestamp`);
  if (cachedData && cachedTimestamp && (now - parseInt(cachedTimestamp, 10)) <
    cacheExpiration) {
    console.log("Loaded list of lists from cache");
    return JSON.parse(cachedData);
  }
  const data = await fetchRevisionData({
    titles: [pageName],
    rvprop: ['content'],
    rvslots: ['*']
  });
  const page = await getFirstPage(data);
  if (!page) return [];
  const content = page.revisions[0].slots.main['*'];
  const pageTitles = [];
  const lines = content.split('\n');
  for (let line of lines) {
    if (line.startsWith('* [[')) {
      const match = line.match(
        /\[\[([^\]]+)\]\]/); // Matches the first instance of [[Page Title]]
      if (match) {
        pageTitles.push(match[1]);
      }
    }
  }
  localStorage.setItem(cacheKey, JSON.stringify(pageTitles));
  localStorage.setItem(`${cacheKey}_timestamp`, now.toString());
  console.log("Loaded from API and stored in cache");
  return pageTitles;
}

async function fetchAndOrganizeDomainLists(pageNames) {
  const data = await fetchRevisionData({
    titles: pageNames,
    rvprop: ['content'],
    rvslots: ['*'],
  });
  const pages = data.query.pages;
  for (const pageId in pages) {
    const content = pages[pageId].revisions[0].slots.main['*'];
    let currentList = null;
    const lines = content.split('\n');
    for (let line of lines) {
      for (const type in indicators) {
        if (line.trim() === indicators[type].section) {
          currentList = indicators[type].list;
          break;
        }
      }
      if (line.startsWith('*') && currentList) {
        const domain = line.substring(1).trim();
        currentList.add(domain);
      }
    }
  }
  return indicators;
}

async function fetchPublicSuffixList() {
  const pslUrl =
    `https://${LANGUAGE}.${FAMILY}.org/wiki/${publicSuffixList}?action=raw`;
  console.log(`Raw page text request: ${pslUrl}`);
  const content = await safeFetch(fetch, pslUrl).then(response => response ?
    response.text() : null);
  if (!content) return new Set();
  const suffixSet = new Set();
  const lines = content.split('\n');
  for (const line of lines) {
    if (line.trim() && !line.trim().startsWith('//')) {
      suffixSet.add(line.trim());
    }
  }
  return suffixSet;
}

async function fetchRevisionData(data) {
  const paramKeys = ['rvprop', 'revids', 'titles', 'rvslots'];
  const params = {
    action: 'query',
    prop: 'revisions',
    format: 'json',
    rvdir: data.rvdir || 'older',
    origin: '*'
  };
  if (data.rvlimit) { params.rvlimit = data.rvlimit; }
  paramKeys.forEach(key => {
    if (data[key]) {
      params[key] = Array.isArray(data[key]) ? data[key].join('|') : data[key];
    }
  });
  const api = new mw.Api();
  return await safeFetch(api.get.bind(api), params);
}

async function safeFetch(fn, ...args) {
  try {
    return await fn(...args);
  } catch (error) {
    console.error(`Error during ${fn.name}:`, error);
    return null;
  }
}

function extractAddedURLs(wikitext) {
  const addedURLs = [];
  const urlRegex = /https?:\/\/[^\s<"]+/g;
  let match;
  while ((match = urlRegex.exec(wikitext)) !== null) {
    try {
      const url = new URL(match[0]);
      addedURLs.push(url.href);
    } catch (error) {
      console.error(`Invalid URL rejected: ${match[0]}`);
    }
  }
  return addedURLs;
}

function getRootDomain(hostname, publicSuffixSet) {
  const domainParts = hostname.split('.');
  for (let i = 0; i < domainParts.length; i++) {
    const candidate = domainParts.slice(i).join('.');
    if (publicSuffixSet.has(candidate) || publicSuffixSet.has(
        `!${candidate}`)) {
      return domainParts.slice(i - 1).join('.');
    }
  }
  return hostname;
}

function isNotArticle(pageTitle) {
  return namespaces.some(namespace => pageTitle.startsWith(namespace));
}

analyzeView().then(() => console.log(
  'Citation Watchlist script finished executing'));