Jump to content

User:Daask/MultiResolver/en.wikipedia.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
"use strict";

/*
 * NOTE: At the moment, this includes all of
 * [[User:Daask/MultiResolver/MultiResolver.js]].
 * In the future, that will probably be packaged separately as a module.
 *
 * Written in 2024 by https://en.wikipedia.org/wiki/User:Daask
 * Status: stable
 * License:: GNU AGPLv3+
 * Kinds of invocation
 *   page query parameters (MultiResolver-standalone.html)
 *   popup invoked by a form (MultiResolver-modal.html)
 *   popup invoked by links automatically generated
 * TODO: Advertise on https://wikitech.wikimedia.org/wiki/Portal:Toolforge
 *
 * Future features
 * 1) Add preprocessor so display format on Wikipedia can be
 *    different than the way the identifier is stored on Wikidata, eg. EIN
 * 2) Display qualifiers.
 * 3) Exit modal with escape button.
 * 4) Exit modal with close button.
 */

importStylesheet( 'User:Daask/MultiResolver/MultiResolver.css' );
importStylesheet( 'User:Daask/MultiResolver/MultiResolver-modal.css' );

if (typeof document.createEntityReference === 'undefined') {
  Reflect.defineProperty(document, 'createEntityReference', {
    configurable: true,
    enumerable: false,
    value: function (entityRefName) {
      const span = document.createElement('span');
      span.innerHTML = `&${entityRefName};`;
      return document.createTextNode(span.innerText);
    }
  });
}

if (typeof Element.prototype.clearChildren === 'undefined') {
  Reflect.defineProperty(Element.prototype, 'clearChildren', {
    configurable: true,
    enumerable: false,
    value: function () {
      while (this.firstChild) { this.lastChild.remove(); }
    }
  });
}

const elementTableById = {};
const cachedPropertyLabels = {};
const cachedWikidataStatementValues = {};

// This creates the elements the first time, then reuses them.
function getOrCreateElement(id, parent = null, tag = 'div') {
  if (elementTableById[id]) { return elementTableById[id]; }
  let el = document.getElementById(id);
  if (!el) {
    el = document.createElement(tag);
    el.id = id;
    if (parent) { parent.appendChild(el); }
  }
  elementTableById[id] = el;
  return el;
}

/*
 * This provides more precise error messages for
 * +getLabelFromWikidataQueryJSON+
*/
function detailedErrorsForGetLabelFromWikidataQueryJSON(json, item, err){
  const jsonstr = JSON.stringify(json);
  if (json.success !== 1) {
    throw new Error(`Wikidata server indicates error code ${json.success} in their response to label request.`);
  }
  const entities = json.entities;
  if (!entities){
    throw new Error(`Wikidata query JSON output is not structured as expected. No entities found in JSON: ${jsonstr}`);
  }
  const propertyData = entities[item];
  if (!propertyData){
    //throw new Error(`No information found for property ${item} in Wikidata query JSON output: ${jsonstr}`);
    throw new Error(`No information found for property ${item} in Wikidata query JSON output.`);
  }
  const labels = propertyData.labels;
  if (!labels){
    throw new Error(`Wikidata query JSON output is not structured as expected. No labels found in ${propertyData}`);
  }
  // This should never be executed, because the above tests should be identical to the code in
  // +getLabelFromWikidataQueryJSON+.
  throw err;
}

function getLabelFromWikidataQueryJSON(json, langstr, item) {
  let labels;
  try {
    labels = json.entities[item].labels;
  } catch (err) {
    detailedErrorsForGetLabelFromWikidataQueryJSON(json, item, err);
  }

  // Is there a better looping structure than find that would prevent me from
  // having to lookup labels[lang] twice?
  const availableLang = langstr.split(',').find((lang) => labels[lang]);
  if (!availableLang) {
    throw new Error(`Wikidata information found for property ${item}, but does not include a label in the requested language(s): ${langstr}.`);
  }
  return labels[availableLang].value;
}

/*
 * Asynchronously obtain the label then invoke the callback function.
 * 
 * If the label cannot be obtained, the callback function will be provided
 * the Wikidata entity identifier instead.
 * 
 * +langstr+ is a string of language codes joined by a comma.
 * 
 */
function fetchLabelForWikidataItem(item, langstr) {
  let url;
  // The API doesn't seem to accept multiple languages in requests,
  // so it's better to request all languages than perform many requests.
  if (langstr.includes(',')){
    url = `https://www.wikidata.org/w/api.php?action=wbgetentities&ids=${item}&props=labels&format=json&origin=*`;
  } else {
    url = `https://www.wikidata.org/w/api.php?action=wbgetentities&ids=${item}&languages=${langstr}&props=labels&format=json&origin=*`;
  }

  const labelPromise = fetch(url).then((response) => {
    return response.json().then((json) => {
      let label;
      try {
        label = getLabelFromWikidataQueryJSON(json, langstr, item);
      } catch (err) {
        console.log(err);
        console.log('Unable to determine Wikidata item label. Using identifier instead...');
        label = item;
      }
      return label;
    });
  });

  return labelPromise;
}

// This stores labels in the cache by requested languages, not by the actual
// language retrieved.
function fetchLabelForWikidataItemCached(item, langstr) {
  const cacheKey = item + langstr;
  const cached = cachedPropertyLabels[cacheKey];
  if (cached) { return cached; }
  const labelPromise = fetchLabelForWikidataItem(item, langstr);
  cachedPropertyLabels[cacheKey] = labelPromise;
  labelPromise.then((label) => {
    if (!label || label === item){
      console.log(`There was an issue with the label for Wikidata item ${item} and language(s) ${langstr}.`);
      Reflect.deleteProperty(cachedPropertyLabels, cacheKey);
    }
  });
  return labelPromise;
}

const domainRegexp = /https?:\/\/(?:www\.)?(?<domain>[^/?#:\t]+)/u;

function addIdentifierLinks(formatterURLs, container, headingText, id_value) {
  container.clearChildren();
  const ol = document.createElement('ol');
  let firstRound = true;
  for (const formatterURL of formatterURLs) {
    if (firstRound) {
      const subheader = document.createElement('h3');
      subheader.innerText = headingText;
      container.appendChild(subheader);
      container.appendChild(ol);
      firstRound = false;
    }
    const li = document.createElement('li');
    const a = document.createElement('a');
    const regexpMatch = formatterURL.match(domainRegexp);
    if (!regexpMatch){
      console.log(`ERROR: formatter URL isn't a recognizable URL: ${formatterURL}. Skipping...`);
      continue;
    }
    a.innerText = regexpMatch.groups.domain;
    a.href = formatterURL.replace('$1', id_value);
    // this should also display qualifiers
    li.appendChild(a);
    ol.appendChild(li);
  }
}

function sparqlQueryForWikidataStatementValues(item, prop, varName) {
  return `SELECT ?${varName} WHERE {
            wd:${item} wdt:${prop} ?${varName}
          }`.replaceAll(/^ +/umg, '');
}

function urlForWikidataStatementValuesJSON(item, prop, varName) {
  const sparqlQuery = sparqlQueryForWikidataStatementValues(item, prop, varName);
  const queryBase = "https://query.wikidata.org/sparql?format=json&query=";
  return queryBase + encodeURIComponent(sparqlQuery);
}

function fetchWikidataStatementValues(item, property) {
  const varName = "statementValue";
  const url = urlForWikidataStatementValuesJSON(item, property, varName);
  const promise = fetch(url);
  return promise.then((response) => {
    if (!response.ok) {
      throw new Error(`HTTP error, status = ${response.status}`);
    }
    return response.json().then((data)=> {
      return data.results.bindings.map((obj) => obj[varName].value);
    });
  });
}

// This uses fetch and returns a promise, and is intended to replace use_values_of_wikidata_statement
// Use a cached value when available, and store values in a cache.
function fetchWikidataStatementValuesCached(item, property) {
  // I need to generate a promise for the cached value.
  const cachekey = item + property;
  const cached = cachedWikidataStatementValues[cachekey];
  if (cached){ return cached; }
  const json_promise = fetchWikidataStatementValues(item, property);
  return json_promise.then((json) => {
    if (json) {
      cachedWikidataStatementValues[cachekey] = json_promise;
    } else {
      // Is this necessary, or is it enough not to store it in the first place?
      console.log(`Deleting cache of failed retrieval of item ${item} and property ${property}.`);
      Reflect.deleteProperty(cachedWikidataStatementValues, cachekey);
    }
    return json;
  });
}

function generateFooter(div){
  const p = document.createElement('p');
  const a1 = document.createElement('a');
  a1.href="https://en.wikipedia.org/wiki/User:Daask/MultiResolver";
  a1.innerText = 'MultiResolver';
  p.append('This content is automatically generated from Wikidata by ');
  p.appendChild(a1);
  p.append(". If some of these links look wrong or don't work, you may want to edit the ");
  const editLink = document.createElement('a');
  editLink.id = 'edit-link';
  editLink.innerText = 'Wikidata item';
  p.appendChild(editLink);
  p.append('.');
  const multiresolver_footer = getOrCreateElement('multiresolver-footer', div);
  multiresolver_footer.appendChild(p);
  return editLink;
}

function setHeadings(propertyName, id_value, headerProper, changeTitle){
  headerProper.innerText = `Sources for ${propertyName} ${id_value}`;
  if (changeTitle) {
    document.title = `MultiResolver for ${propertyName} ${id_value}`;
  }
}

/* 
 * +languages+ is a string of language codes joined by a comma.
 * 
 * Returns [div, whenMultiResolverBuilt, whenMultiResolverHasContent]
 *   div is the top-level element for all MultiResolver content
 *   whenMultiResolverBuilt is when the object is fairly complete
 *     (actually the property label might still get edited.)
 *   whenMultiResolverHasContent is when either the official or the third-party
 *     resolver list has been generated, and is probably when you should display
 *     the MultiResolver in modal mode.
 */
function generateMultiResolver({ id_value, item, languages, changeTitle = false }) {
  languages ||= 'en,mul';
  const label_promise = fetchLabelForWikidataItemCached(item, languages);
  const whenOfficialFormattersData = fetchWikidataStatementValuesCached(item, 'P1630');
  const whenThirdPartyFormattersData = fetchWikidataStatementValuesCached(item, 'P3303');

  const div = getOrCreateElement('multiresolver-main');
  const headerContainer = getOrCreateElement('multiresolver-header', div);
  const headerProper = getOrCreateElement('multiresolver-header-proper', headerContainer, 'h2');
  const multiresolver_body = getOrCreateElement('multiresolver-body', div);
  const primary_div = getOrCreateElement('primary-resolvers', multiresolver_body);
  const thirdPartyDiv = getOrCreateElement('third-party-resolvers', multiresolver_body);
  let editLink = document.getElementById('edit-link');
  if (!editLink) { editLink = generateFooter(div); }
  editLink.href = `https://www.wikidata.org/wiki/Special:EntityData/${item}`;

  setHeadings('identifier', id_value, headerProper, changeTitle);
  label_promise.then((identifier_name) => {
    setHeadings(identifier_name, id_value, headerProper, changeTitle);
  });
  const whenOfficialLinksAdded = whenOfficialFormattersData.then((json) => {
    addIdentifierLinks(json, primary_div, "Official sources", id_value);
  });
  const whenThirdPartyAdded = whenThirdPartyFormattersData.then((values) => {
    addIdentifierLinks(values, thirdPartyDiv, "Third-party sources", id_value);
  });
  const linksAddedPromises = [whenOfficialLinksAdded, whenThirdPartyAdded];
  const whenMultiResolverBuilt = Promise.all(linksAddedPromises);
  const whenMultiResolverHasContent = Promise.any(linksAddedPromises);
  return [div, whenMultiResolverBuilt, whenMultiResolverHasContent];
}

function generateMultiResolverFromURL(changeTitle = true){
  const queryParams = {};
  const url = window.location;
  const qparams = new URLSearchParams(url.search);
  const hparams = new URLSearchParams(url.hash.replace(/^#/u, '?'));
  queryParams.item      = qparams.get("property")  || hparams.get("property");
  queryParams.languages = qparams.get("languages") || hparams.get("languages");
  queryParams.id_value  = qparams.get("value")     || hparams.get("value");
  queryParams.changeTitle = changeTitle;
  return generateMultiResolver(queryParams)[0];
}

function displayMultiResolverModal(queryParams) {

  ['item', 'id_value'].forEach((key) => { if (!(key in queryParams)) {
    throw new Error(`Required parameter ${key} not provided to displayMultiResolverModal.`);
  }});

  const [containerNode, contentsReady] = generateMultiResolver(queryParams);
  const modal = getOrCreateElement('modal', document.body);
  modal.className = "modal";
  modal.appendChild(containerNode);

  contentsReady.then(() => {
    const headerContainer = document.getElementById('multiresolver-header');
    const modalShade = document.getElementById('modal');
    const closeButton = getOrCreateElement('close', headerContainer, 'span');
    closeButton.replaceChildren(document.createEntityReference('times'));
    // This is the only place where modalShade is displayed.
    // We should ensure that it is not left displayed if this script
    // throws an error of any kind.
    modalShade.style.display = "block";

    // When the user clicks on <span> (x), close the modal dialog
    closeButton.onclick = function () {
      //document.getElementById('multiresolver-main').style.display = 'none';
      modalShade.style.display = "none";
    };

    // When the user clicks anywhere outside of the modal dialog, close it
    window.onclick = function (event) {
      if (event.target === modalShade) {
        //document.getElementById('multiresolver-main').style.display = 'none';
        modalShade.style.display = "none";
      }
    };

  });
  return true;
}

function linkWikidataIdentifierModal(anchor, property, identifier){
  anchor.href = `javascript:void(displayMultiResolverModal({item:'${property}',id_value:'${identifier}'}));`;
}

function activateWikidataPropertyLinks() {
  document.querySelectorAll('.identifier-linkable-via-wikidata').forEach((el) => {
    try {
      const item = el.attributes['data-wikidata-property'].value;
      const identifierNode = el.lastChild;
      const id_value = identifierNode.textContent.trim();
      const a = document.createElement('a');
      a.innerText = id_value;
      linkWikidataIdentifierModal(a, item, id_value);
      identifierNode.replaceWith(document.createEntityReference('nbsp'), a);
    } catch {
      console.log(`Error while adding link to element: ${el.outerHTML}`);
    }
  });
}

/*
 * In theory, both identifier_regexp and href could be pulled from Wikidata.
 * In practice, any script providing the property can provide the others as well
 * to improve performance.
 */
function findAndLinkIdentifiersByHref(property, identifier_regexp, href){
  const identifierPlusRegexp = new RegExp(`^\\s*(${identifier_regexp})(.*)`);
  const einLabelLinks = $(`a[href='${href}']`);
  const einPlusTextNodes = einLabelLinks.map((i, el) => el.nextSibling);
  for (const textNode of einPlusTextNodes){
    const regexpMatch = textNode.textContent.match(identifierPlusRegexp);
    if(regexpMatch){
      const [ , id_value, textAfterIdentifier] = regexpMatch;
      const anchor = document.createElement('a');
            anchor.href      = '#';
            anchor.innerText = id_value;
      const replacementNodes = [document.createEntityReference('nbsp'), anchor];
      if(textAfterIdentifier){
        replacementNodes.push(document.createTextNode(textAfterIdentifier));
      }
      textNode.replaceWith(...replacementNodes);
      linkWikidataIdentifierModal(anchor, property, id_value);
    } else {
      console.log(`Text doesn't match RegExp for property ${property}: ${textNode.textContent}`);
    }
  }
}

// Begin en.wikipedia-specific code.

function findEINs(){
  findAndLinkIdentifiersByHref('P1297', '\\d+', '/wiki/EIN_(identifier)');
}

$( document ).ready(findEINs());