User:Phlsph7/ListUnreferencedParagraphs.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
This user script seems to have a documentation page at User:Phlsph7/ListUnreferencedParagraphs. |
(function(){
const scriptName = 'List Unreferenced Paragraphs';
$.when(mw.loader.using('mediawiki.util'), $.ready).then(function(){
const listPortletlink = mw.util.addPortletLink('p-tb', '#', scriptName, scriptName + 'Id');
listPortletlink.onclick = function(e) {
e.preventDefault();
listUnreferencedParagraphs();
};
const highlightPortletlinkName = 'Highlight Unreferenced Paragraphs';
const highlightPortletlink = mw.util.addPortletLink('p-tb', '#', highlightPortletlinkName, highlightPortletlinkName + 'Id');
highlightPortletlink.onclick = function(e) {
e.preventDefault();
highlightUnreferencedParagraphs();
};
});
function listUnreferencedParagraphs(){
const timeout = 50;
let stopProcessing = false;
const content = document.getElementById('content');
const contentContainer = content.parentElement;
content.style.display = 'none';
let scriptContainer = document.createElement('div');
contentContainer.appendChild(scriptContainer);
scriptContainer.outerHTML = `
<div id="scriptContainer" style="display:flex; flex-direction: column;">
<style>
textarea {
resize: none;
padding: 5px;
}
button {
margin: 5px;
}
</style>
<h1>Unreferenced Paragraph Counter</h1>
<div style="display:flex;">
<div style="flex: 1; display:flex; flex-direction: column; margin: 5px; height: 50vh; overflow-y: auto;">
<label for="taList">Article Titles</label>
<textarea id="taList" style="height: 100%;"></textarea>
</div>
<div style="flex: 2; display:flex; flex-direction: column; margin: 5px; height: 50vh; overflow-y: auto;">
<label for="tableCounter">Overview table</label>
<table id="tableCounter" class="wikitable" style="height: 100%; margin: 0px; width: 100%; border-collapse: collapse;">
<thead>
<tr>
<th>Article title</th>
<th title="paragraphs that require and lack references">Paragraphs without references</th>
<th>Maintenance tags</th>
</tr>
</thead>
<tbody id="tbodyCounter">
</tbody>
</table>
</div>
</div>
<div style="display:flex; flex-direction: column">
<div style="display:flex;">
<button id="btStart" style="flex: 1;">Start</button>
<button id="btStop" disabled style="flex: 1;">Stop</button>
<button id="btCopy" style="flex: 1;">Copy</button>
</div>
<div>
<button id="btClose" style="width: 100%;">Close</button>
</div>
</div>
</div>
`;
const btStart = $('#btStart');
btStart.click(function(){
stopProcessing = false;
btStart.prop("disabled", true);
btStop.prop("disabled", false);
let articleTitles = $('#taList').val().trim()
.split('\r').join('')
.split('\n');
// remove duplicates
articleTitles = [...new Set(articleTitles)];
// populate table
$("#tbodyCounter").empty();
for(let i = 0; i < articleTitles.length; i++){
let linkHTML = getLinkHTML(articleTitles[i]);
let row = `<tr><td>${linkHTML}</td><td id="td_unref_${i}" style="text-align: center;">-</td><td id="td_tags_${i}"></td></tr>`;
$("#tbodyCounter").append(row);
}
recursivelyProcessArticles(articleTitles, 0, timeout);
function getLinkHTML(articleTitle) {
var link = document.createElement('a');
link.href = 'https://en.wikipedia.org/wiki/' + encodeURIComponent(articleTitle);
link.textContent = articleTitle;
return link.outerHTML;
}
});
const btStop = $('#btStop');
btStop.click(function(){
stopProcessing = true;
btStart.prop("disabled", false);
btStop.prop("disabled", true);
});
const btCopy = $('#btCopy');
btCopy.click(function(){
const tableText = getTextViaSelection();
copyToClipboard(tableText);
mw.notify("The table was copied to the clipboard.");
function getTextViaSelection(){
const tbodyCounter = $('#tbodyCounter')[0];
const range = document.createRange();
range.selectNodeContents(tbodyCounter);
const selection = window.getSelection();
selection.removeAllRanges();
selection.addRange(range);
return selection.toString();
}
function copyToClipboard(text) {
const textarea = document.createElement('textarea');
textarea.value = text;
document.body.appendChild(textarea);
textarea.select();
document.execCommand('copy');
document.body.removeChild(textarea);
}
});
const btClose = $('#btClose');
btClose.click(function(){
btStop.trigger('click');
let scriptContainer = document.getElementById('scriptContainer');
scriptContainer.parentElement.removeChild(scriptContainer);
content.style.display = '';
});
function recursivelyProcessArticles(articleTitles, index, timeout){
if(!stopProcessing && index < articleTitles.length){
btStop.text(`Stop (${index}/${articleTitles.length})`);
const articleTitle = articleTitles[index];
processArticle(articleTitles, index);
setTimeout(function(){recursivelyProcessArticles(articleTitles, index + 1, timeout);}, timeout);
}
else{
btStop.text(`Stop`);
btStop.trigger('click');
}
}
function processArticle(articleTitles, index){
const articleTitle = articleTitles[index];
const articleSearchTerm = encodeURIComponent(articleTitle);
let wikiApiUrl = `https://en.wikipedia.org/w/api.php?action=parse&page=${articleSearchTerm}&format=json`;
fetch(wikiApiUrl).then(async function(response) { // jshint ignore:line
const data = await response.json();
const cellUnrefId = `td_unref_${index}`;
const cellTagsId = `td_tags_${index}`;
if (data && data.parse && data.parse.text && data.parse.text['*']) {
const articleHTML = data.parse.text['*'];
const parser = new DOMParser();
const doc = parser.parseFromString(articleHTML, 'text/html');
const paragraphContainer = $(doc).find('.mw-parser-output').eq(0);
const paragraphInfo = getParagraphInfo(paragraphContainer);
const unreferencedParagraphs = paragraphInfo.unreferencedParagraphs;
const includedParagraphs = paragraphInfo.includedParagraphs;
//const count = `${unreferencedParagraphs.length} / ${includedParagraphs.length}`;
const count = `${unreferencedParagraphs.length}`;
$('#' + cellUnrefId).html(count);
const maintenanceTagString = getMaintenanceTagString(paragraphContainer);
$('#' + cellTagsId).html(maintenanceTagString);
} else {
$('#' + cellUnrefId).html('error');
$('#' + cellTagId).html('error');
}
});
}
function getMaintenanceTagString(element){
const templateOverview = {};
const amboxes = getAmboxes(element);
for(const ambox of amboxes){
const amboxType = getAmboxTyp(ambox);
updateOverview(templateOverview, amboxType);
}
const inlineTemplates = getInlineTemplates(element);
for(const inlineTemplate of inlineTemplates){
const inlineTemplateType = getInlineTemplateType(inlineTemplate);
updateOverview(templateOverview, inlineTemplateType);
}
const overviewString = getOverviewString(templateOverview);
return overviewString;
function getInlineTemplates(element){
return element.find('.Inline-Template').toArray();
}
function getInlineTemplateType(inlineTemplate){
let innerText = inlineTemplate.innerText;
let type = innerText.substring(1, innerText.length - 1);
return type;
}
function getAmboxes(element){
return element.find('.ambox').toArray();
}
function getAmboxTyp(ambox){
for(const entry of ambox.classList){
if(entry.substring(0,4) === 'box-'){
return entry.substring(4).split('_').join(' ');
}
}
return entry.innerText;
}
function updateOverview(overview, entry){
if(Object.keys(overview).includes(entry)){
overview[entry]++;
}
else{
overview[entry] = 1;
}
}
function getOverviewString(overview){
let overviewString = '';
const keys = Object.keys(overview);
if(keys.length > 0){
for(const key of keys){
const count = overview[key];
overviewString += count + 'x ';
overviewString += key + ', ';
}
overviewString = overviewString.substring(0, overviewString.length - 2);
}
return overviewString;
}
}
}
function highlightUnreferencedParagraphs(){
const paragraphContainer = $('#mw-content-text').find('.mw-parser-output').eq(0);
const paragraphInfo = getParagraphInfo(paragraphContainer);
const includedParagraphs = paragraphInfo.includedParagraphs;
const unreferencedParagraphs = paragraphInfo.unreferencedParagraphs;
for(let p of includedParagraphs){
if(unreferencedParagraphs.includes(p)){
p.style.background = '#faa';
}
else{
p.style.background = '#afa';
}
}
console.log(unreferencedParagraphs);
mw.notify(`${unreferencedParagraphs.length} unreferenced paragraphs found`);
}
function getParagraphInfo(paragraphContainer){
const minimalParagraphLength = 100;
hideRefs(paragraphContainer[0]);
combineMathBlocks(paragraphContainer.children().toArray());
addElementsFollowingParagraphs(paragraphContainer.children().toArray());
addElementsPrecedingParagraphs(paragraphContainer.children().toArray());
showRefs(paragraphContainer[0]);
const children = paragraphContainer.children();
const releventChildren = [];
for(let child of children){
if(child.tagName.toLowerCase() === 'p'){
releventChildren.push(child);
}
else if(child.classList.contains('mw-heading2')){
releventChildren.push(child);
}
}
const articleObject = convertToObject(releventChildren);
removeIrrelevantSections(articleObject);
const paragraphsInRelevantSections = convertToSimpleArray(articleObject);
const includedParagraphs = removeShortParagraphs(paragraphsInRelevantSections);
const unreferencedParagraphs = getUnreferencedParagraphs(includedParagraphs);
return {
'includedParagraphs': includedParagraphs,
'unreferencedParagraphs': unreferencedParagraphs
};
function hideRefs(element){
let refs = element.querySelectorAll('.reference, .Inline-Template');
for(let ref of refs){
ref.style.display = 'none';
}
}
function showRefs(element){
let refs = element.querySelectorAll('.reference, .Inline-Template');
for(let ref of refs){
ref.style.display = '';
}
}
// includes the elements before and after a paragraph consisting only of a math formula into one element; this is based on the idea that the math formula artifically divides a single paragraph into parts
function combineMathBlocks(elements){
for(let i = 1; i < elements.length-1; i++){
let previousElement = elements[i-1];
let element = elements[i];
let nextElement = elements[i+1];
if(isMathBlock(elements[i])){
previousElement.appendChild(element);
previousElement.appendChild(nextElement);
}
}
function isMathBlock(element){
if(element.firstChild && element.firstChild.classList){
if(element.firstChild.classList.contains('mwe-math-element')){
if(element.innerText === element.firstChild.innerText){
return true;
}
}
}
return false;
}
}
// if the meaning of the passage does not end with the html paragraph then add the next element to it.
function addElementsFollowingParagraphs(elements){
for(let i = 0; i < elements.length-1; i++){
let element = elements[i];
let clone = element.cloneNode(true);
removeStyleElements(clone);
let innerText = clone.innerText.trim();
if(element.tagName === 'P' && innerText.length > 0){
let lastCharacter = innerText[innerText.length-1];
const nonEndingCharacters = [',', ':'];
if(nonEndingCharacters.includes(lastCharacter) || isLetter(lastCharacter)){
let nextElement = elements[i+1];
element.appendChild(nextElement);
if(nextElement.tagName === 'STYLE' || nextElement.tagName === 'LINK'){
if(i+2 < elements.length -1){
let nextNextElement = elements[i+2];
element.appendChild(nextNextElement);
}
}
}
}
}
function isLetter(character){
return character.toLowerCase() !== character.toUpperCase();
}
function removeStyleElements(element){
let styleElements = element.getElementsByTagName('style');
for(const styleElement of styleElements){
styleElement.remove();
}
}
}
// if a paragraph starts in the middle then add the previous element
function addElementsPrecedingParagraphs(elements){
for(let i = 1; i < elements.length; i++){
let element = elements[i];
let innerText = element.innerText.trim();
if(element.tagName === 'P' && innerText.length > 0){
let firstCharacter = innerText[0];
if(isLowerCaseLetter(firstCharacter)){
let previousElement = elements[i-1];
element.insertBefore(previousElement, element.firstChild);
}
}
}
function isLowerCaseLetter(character){
return character.toLowerCase() !== character.toUpperCase() && character === character.toLowerCase();
}
}
function convertToObject(elementArray){
const articleObject = {};
let currentSection = "Lead";
articleObject["Lead"] = []; // jshint ignore:line
for(let element of elementArray){
if(element.classList.contains('mw-heading2')){
currentSection = element.innerText.split('[edit]').join('');
articleObject[currentSection] = [];
}
else{
articleObject[currentSection].push(element);
}
}
return articleObject;
}
function removeIrrelevantSections(articleObject){
const excludedSections = ['Lead', 'Plot', 'Plots', 'Plot summary', 'Plot synopsis', 'Synopsis', 'Storylines', 'Appearances', 'Further reading', 'See also', 'External links', 'References', 'Bibliography', 'Notes', 'Selected publications', 'Selected works', 'Cited sources', 'Sources', 'Footnotes'];
for(let sectionName in articleObject){
if(excludedSections.indexOf(sectionName) != -1){
delete articleObject[sectionName];
}
}
}
function convertToSimpleArray(articleObject){
let array = [];
for (let sectionName in articleObject){
array = array.concat(articleObject[sectionName]);
}
return array;
}
function removeShortParagraphs(paragraphArray){
const longParagraphs = [];
for(let paragraph of paragraphArray){
if(paragraph.innerText.length >= minimalParagraphLength){
longParagraphs.push(paragraph);
}
}
return longParagraphs;
}
function getUnreferencedParagraphs(paragraphArray){
const unreferencedParagraph = [];
for(let paragraph of paragraphArray){
if(isUnreferenced(paragraph)){
unreferencedParagraph.push(paragraph);
}
}
return unreferencedParagraph;
}
function isUnreferenced(paragraph){
let hasRegularRef = $(paragraph).find('.reference').length > 0;
let hasHarvRef = false;
const links = $(paragraph).find('a').toArray();
for(const link of links){
let href = link.getAttribute('href');
if(href && href.substring(0, 8) == '#CITEREF'){
hasHarvRef = true;
}
}
return !(hasRegularRef || hasHarvRef);
}
}
})();