User:Brighterorange/punctuation.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
Documentation for this user script can be added at User:Brighterorange/punctuation. |
/* <nowiki> */
var punctuationVersion = "19 April 2008";
var punctuationID = 1;
var punctuationEdits = undefined;
var punctuationOriginalSummary = undefined;
var punctuationPageOriginalSummary = undefined;
var puCONTEXT = 40;
var puENDASH = 0;
var puSPELL = 1;
var puEMDASH = 2;
var puCOMMA = 3;
var puPERCENT = 4;
var puBORN = 5;
var puLINKSPACE = 6;
var puDECADE = 7;
var puPAREN = 8;
var puXHTML = 9;
var puREF = 10;
var puSEMICOLON = 11;
var puCITYSTATE = 12;
var puDESCRIPTIONS = ["en dash", "spelling", "em dash", "comma", "percent", "born", "link space", "decade", "paren", "xhtml", "ref", "semicolon", "city-state"];
var puNDESC = 13;
// TODO:
// The TODO list is maintained in the development version, at [[User:Brighterorange/punctuation2.js]].
// Feel free to make new suggestions on my [[User talk:Brighterorange|talk page]].
function doPunctuation() {
// alert(document.editform.wpTextbox1.value);
// document.editform.wpMinoredit.checked = true;
// just need some prominent element to put our messages in. We use the "From Wikipedia" header.
var e = document.getElementById('siteSub');
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Running autopunctuation...</span>';
puDisableEditing(true);
// We'll represent the document as a list of chunks, where
// a chunk can either be raw text (no replacement suggested)
// or an edit (the suggested replacement text, the reason,
// the original text, and a flag indicating whether the
// change has been rejected).
// start by producing the singleton raw chunk:
var edits = new puCons(puRaw(document.editform.wpTextbox1.value), undefined);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">References...</span>';
setTimeout(function (){ // refs
edits = puRawMapConcat(puRef, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Spelling...</span>';
setTimeout(function (){ // spell
edits = puSpell(edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Born style...</span>';
setTimeout(function (){ // born
edits = puBorn(edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Em dashes...</span>';
setTimeout(function (){ // em dash
edits = puRawMapConcat(puEmDash, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">En dashes...</span>';
setTimeout(function (){ // en dash
edits = puRawMapConcat(puEnDash, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Commas...</span>';
setTimeout(function (){ // comma
edits = puRawMapConcat(puComma, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Semicolons...</span>';
setTimeout(function (){ // semicolon
edits = puRawMapConcat(puSemicolon, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Link space...</span>';
setTimeout(function (){ // linkspace
edits = puRawMapConcat(puLinkSpace, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Decade...</span>';
setTimeout(function (){ // decade
edits = puRawMapConcat(puDecade, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Parens...</span>';
setTimeout(function (){ // paren
edits = puRawMapConcat(puParen, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">XHTML...</span>';
setTimeout(function (){ // xhtml
edits = puXhtml(edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">City-State...</span>';
setTimeout(function (){ // city-state
edits = puCityState(edits);
punctuationEdits = edits;
punctuationOriginalSummary = document.editform.wpSummary.value;
document.editform.wpTextbox1.value = puRewrite(edits);
document.editform.wpSummary.value = puSummary(edits);
// finally, show interface for undos
puShowChanges("", edits);
}, 50); // city-state
}, 50); // xhtml
}, 50); // paren
}, 50); // decade
}, 50); // linkspace
}, 50); // semicolon
}, 50); // comma
}, 50); // en dash
}, 50); // em dash
}, 50); // born
}, 50); // spell
}, 50); // refs
};
// don't use textbox's "disable" field, since
// it makes the form submit an empty textbox,
// blanking the article!
function puDisableEditing(flag) {
var e = document.editform.wpTextbox1;
if (flag) {
e.style.opacity = "0.5";
e.style.filter = "Alpha(Opacity=50)";
} else {
e.style.opacity = undefined;
e.style.filter = undefined;
};
};
function puSummary(edits) {
var counts = new Array();
for(var i = 0; i < puNDESC; i ++) counts.push (0);
for(var l = edits; l != undefined; l = l.tail) {
if (!l.head.israw) {
counts[l.head.what] ++;
// alert("!" + l.head.what + "(" + puDESCRIPTIONS[l.head.what] + ") = " + counts[l.head.what]);
}
}
var s = "";
for(var j = 0; j < puNDESC; j ++) {
if (counts[j] > 0) {
if (s != "") s = s + "; ";
s = s + counts[j] + " " + puDESCRIPTIONS[j];
}
// alert("@" + j + ": " + counts[j] + "/" + puDESCRIPTIONS[j] + " -> " + s);
}
if (s == "") return punctuationOriginalSummary;
else {
if (punctuationOriginalSummary == punctuationPageOriginalSummary) {
// user never did anything except run punctuation, so minor
document.editform.wpMinoredit.checked = true;
}
return punctuationOriginalSummary +
(punctuationOriginalSummary == "" ? "" : " ") + "(auto: " + s + ")";
}
};
function puKindButtons(edits) {
var counts = new Array();
for(var i = 0; i < puNDESC; i ++) counts.push (0);
for(var l = edits; l != undefined; l = l.tail) {
if (!l.head.israw) {
counts[l.head.what] ++;
}
}
// now for any edit kind we did do, give buttons for them.
var s = "<table><tr>"
for(var j = 0; j < puNDESC; j ++) {
if (counts[j] > 0) {
s = s +
'<td><div style="padding : 3px; margin-right: 6px; border : 2px solid #333377; background : #DDDDFF"><b><center>' +
counts[j] + " " + puDESCRIPTIONS[j] + '</center></b>' +
'<br/> <span style="cursor : hand; cursor : pointer;" onClick="puAllOn(' + j + ');">ON</span> ' +
'<span style="cursor : hand; cursor : pointer;" onClick="puAllOff(' + j + ');">OFF</span> ' +
'<span style="cursor : hand; cursor : pointer;" onClick="puAllHide(' + j + ');">HIDE</span>' +
'</div></td>';
// onClick="puUndo(' + l.head.id +');"
}
}
s = s + '</tr></table>';
return s;
};
function puContextBefore(ol, ne) {
var s = ol + ne;
if (s.length < puCONTEXT) return s;
else return s.substring(s.length - puCONTEXT);
};
function puContextAfter(l) {
var s = "";
for(var z = l; z != undefined; z = z.tail) {
if (z.head.israw) s = s + z.head.text;
else s = s + z.head.rep;
if (s.length >= puCONTEXT) return s.substr(0, puCONTEXT);
}
return s;
};
// creates the menu for punctuation while in showchanges mode.
// for now just a 'done' button
function puMenu() {
return('<div onclick="puDoneClick();" style="cursor:hand; cursor:pointer; border:2px outset #559955;' +
'padding:4px;margin:4px;background:#DDFFDD">click this when done with changes</div>');
};
// when clicked, get rid of all the shown changes and re-enable
// the textbox.
function puDoneClick() {
puDisableEditing(false);
var e = document.getElementById('siteSub');
e.innerHTML = '';
};
// from a chunk list, give an HTML summary with edit buttons
// pass in the context c of some previous characters.
function puShowChanges(c, l) {
var e = document.getElementById('siteSub');
// XXX actually, if all are deactivated too...
if (l == undefined) {
e.innerHTML = '<p>Punctuation: no changes.</p>';
} else {
e.innerHTML = puShowSomeChanges(c, l);
}
};
function puShowSomeChanges(c, l) {
var o = puMenu();
o = o + puKindButtons(l) + "<br />";
while (l != undefined) {
if (l.head.israw) {
var nc = puContextBefore(c, l.head.text);
o = o + '<span style="color:#AAAAAA">(...)</span>';
c = nc;
} else if (l.head.hidden) {
var nc = puContextBefore(c, l.head.rep);
o = o + '<span style="color:#AAAAAA">(hidden)</span>'
c = nc;
} else {
// XXX hover could select in edit box??
var nc = puContextBefore(c, l.head.rep);
var ca = puContextAfter(l.tail);
var src = (l.head.dispsrc == undefined)?l.head.orig:l.head.dispsrc;
var dst = (l.head.dispdst == undefined)?l.head.rep:l.head.dispdst;
o = o + '<br/> (' + puHighlightContext(puEscape(c)) +
'<span id="puEdit' + l.head.id + '" style="border : 1px solid #FF9999; background : #FFDDDD; cursor : hand; cursor : pointer;"' +
' onClick="puUndo(' + l.head.id +');">' +
puHighlight(puEscape(src)) + "→" + puHighlight(puEscape(dst)) + '</span>'
+ puHighlightContext(puEscape(ca)) +
') ';
c = nc;
}
l = l.tail;
}
return (o + puMenu());
};
// show spaces as light underscores, since many of these involve the deletion/insertion of spaces
function puHighlight(s) {
// first or it will mess up spaces in our html
s = s.replace(/ /g, '<span style="color:#888888">_</span>');
return s.replace(/__PUREF__/g, '<span style="color:#AA55AA"><REF></span>');
};
function puHighlightContext(s) {
s = s.replace(/\[/g, '<span style="color:#FF0000">[</span>');
s = s.replace(/\]/g, '<span style="color:#FF0000">]</span>');
s = s.replace(/\{/g, '<span style="color:#00FF00">{</span>');
s = s.replace(/\}/g, '<span style="color:#00FF00">}</span>');
s = s.replace(/\|/g, '<span style="color:#0000FF">|</span>');
// these occur next to false positives for en dashes, commonly
s = s.replace (/issn/gi, '<span style="color:#FF7722">ISSN</span>');
s = s.replace (/isbn/gi, '<span style="color:#FF7722">ISBN</span>');
// template requires literal dash
s = s.replace (/scotus/gi, '<span style="color:#FF7722">SCOTUS</span>');
return s;
};
function puEscape(s) {
var s1 = s.replace(/</g, "<");
var s2 = s1.replace(/>/g, ">");
return s2;
};
// called from generated html; hides (just don't display) all
// from this kind
function puAllHide(k) {
for(var h = punctuationEdits; h != undefined; h = h.tail) {
if (h.head.what == k) {
h.head.hidden = true;
}
}
// always keep these up to date (actually this should never need a rewrite, right?)
// document.editform.wpTextbox1.value = puRewrite(punctuationEdits);
document.editform.wpSummary.value = puSummary(punctuationEdits);
puShowChanges("", punctuationEdits);
return ;
};
// called from generated html above. undoes the specified edit, making
// the chunk into a raw chunk and rewriting the textarea.
function puUndo(i) {
// alert('undo unimplemented for #' + i);
for(var h = punctuationEdits; h != undefined; h = h.tail) {
if (h.head.id == i) {
h.head.text = h.head.orig;
h.head.israw = true;
// undo edit where it matters
document.editform.wpTextbox1.value = puRewrite(punctuationEdits);
document.editform.wpSummary.value = puSummary(punctuationEdits);
var e = document.getElementById('puEdit' + i);
e.style.border = "none";
e.style.opacity = "0.5";
e.style.filter = "Alpha(Opacity=50)";
return;
}
}
alert("Oops, can't undo? " + i + " ... " + punctuationEdits);
};
// generate the raw text from a chunk list
function puRewrite(l) {
var o = "";
while(l != undefined) {
if (l.head.israw && l.head.text != undefined) o = o + l.head.text;
else if (!l.head.israw && l.head.rep != undefined) o = o + l.head.rep;
else o = o + "???";
l = l.tail;
}
return o;
};
// given a function (f : string -> chunk list) and (l : chunk list)
// build a new list where each raw chunk within l has f applied to
// it and the result flattened. edit chunks are not modified.
function puRawMapConcat(f, l) {
if (l == undefined) return l;
if (l.head.israw) {
var nl = f(l.head.text);
return puAppend(nl, puRawMapConcat(f, l.tail));
} else return puCons(l.head, puRawMapConcat(f, l.tail));
};
function puAppend (l1, l2) {
if (l1 == undefined) return l2;
else return puCons(l1.head, puAppend(l1.tail, l2));
};
// lists are represented as head/tail cons cells
// with nil = undefined
function puCons(h, t) {
// if they are both raw, then flatten.
if (t != undefined && t.head.israw && h.israw) {
var nh = new Object();
nh.israw = true;
nh.text = h.text + t.head.text;
var o = new Object;
o.head = nh;
o.tail = t.tail;
return o;
} else {
var o = new Object();
o.head = h;
o.tail = t;
return o;
}
}
function puRaw(s) {
var o = new Object();
o.israw = true;
o.text = s;
return o;
};
// puCleave(small, large)
// find the next match of small in large.
// return a two-element array of the
// string preceding the match, and the string
// following the match. If there are no matches,
// return undefined.
function puCleave(small, large) {
var x = large.indexOf(small);
if (x == -1) return undefined;
else return new Array(large.substr(0, x),
large.substring(x + small.length));
};
function puBorn(edits) {
return puRawMapConcat(puSpellRep("(b. ", "(born ", puBORN), edits);
};
function puXhtml(edits) {
edits = puRawMapConcat(puSpellRep("<br>", "<br />", puXHTML), edits);
edits = puRawMapConcat(puSpellRep("<BR>", "<br />", puXHTML), edits);
return edits;
};
function puSpell(edits) {
edits = puRawMapConcat(puSpellRep("seperat", "separat", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("embarass", "embarrass", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("existance", "existence", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("supercede", "supersede", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("accomodat", "accommodat", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("foreward", "foreword", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("liason", "liaison", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("millenium", "millennium", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("accomoda", "accommoda", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("occassion", "occasion", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("occurrance", "occurrence", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("privelege", "privilege", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("priviledge", "privilege", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("withold", "withhold", puSPELL), edits);
return edits;
};
function puSpellRep(src, dst, wh) {
return (function(t) {
// spelling is kinda slow, and most misspellings never appear at all
if (t.indexOf(src) == -1) return puCons(puRaw(t), undefined);
else return puSpellOne (t, src, dst, wh);
});
};
function puSpellOne (t, src, dst, wh) {
var a = puCleave(src, t);
if (a == undefined) return puCons(puRaw(t), undefined);
var subst = puEdit(src, dst, wh);
return puCons(puRaw(a[0]), puCons(subst, puSpellOne(a[1], src, dst, wh)));
};
function puCityState(edits) {
/* for every US State... (could do countries here, too.) */
edits = puRawMapConcat(puCityStateFn("Alabama"), edits);
edits = puRawMapConcat(puCityStateFn("Alaska"), edits);
edits = puRawMapConcat(puCityStateFn("Arizona"), edits);
edits = puRawMapConcat(puCityStateFn("Arkansas"), edits);
edits = puRawMapConcat(puCityStateFn("California"), edits);
edits = puRawMapConcat(puCityStateFn("Colorado"), edits);
edits = puRawMapConcat(puCityStateFn("Connecticut"), edits);
edits = puRawMapConcat(puCityStateFn("Delaware"), edits);
edits = puRawMapConcat(puCityStateFn("Florida"), edits);
edits = puRawMapConcat(puCityStateFn("Georgia", "Georgia (U.S. state)|Georgia"), edits);
edits = puRawMapConcat(puCityStateFn("Hawaii"), edits);
edits = puRawMapConcat(puCityStateFn("Idaho"), edits);
edits = puRawMapConcat(puCityStateFn("Illinois"), edits);
edits = puRawMapConcat(puCityStateFn("Indiana"), edits);
edits = puRawMapConcat(puCityStateFn("Iowa"), edits);
edits = puRawMapConcat(puCityStateFn("Kansas"), edits);
edits = puRawMapConcat(puCityStateFn("Kentucky"), edits);
edits = puRawMapConcat(puCityStateFn("Louisiana"), edits);
edits = puRawMapConcat(puCityStateFn("Maine"), edits);
edits = puRawMapConcat(puCityStateFn("Maryland"), edits);
edits = puRawMapConcat(puCityStateFn("Massachusetts"), edits);
edits = puRawMapConcat(puCityStateFn("Michigan"), edits);
edits = puRawMapConcat(puCityStateFn("Minnesota"), edits);
edits = puRawMapConcat(puCityStateFn("Mississippi"), edits);
edits = puRawMapConcat(puCityStateFn("Missouri"), edits);
edits = puRawMapConcat(puCityStateFn("Montana"), edits);
edits = puRawMapConcat(puCityStateFn("Nebraska"), edits);
edits = puRawMapConcat(puCityStateFn("Nevada"), edits);
edits = puRawMapConcat(puCityStateFn("New Hampshire"), edits);
edits = puRawMapConcat(puCityStateFn("New Jersey"), edits);
edits = puRawMapConcat(puCityStateFn("New Mexico"), edits);
edits = puRawMapConcat(puCityStateFn("New York"), edits);
edits = puRawMapConcat(puCityStateFn("North Carolina"), edits);
edits = puRawMapConcat(puCityStateFn("North Dakota"), edits);
edits = puRawMapConcat(puCityStateFn("Ohio"), edits);
edits = puRawMapConcat(puCityStateFn("Oklahoma"), edits);
edits = puRawMapConcat(puCityStateFn("Oregon"), edits);
edits = puRawMapConcat(puCityStateFn("Pennsylvania"), edits);
edits = puRawMapConcat(puCityStateFn("Rhode Island"), edits);
edits = puRawMapConcat(puCityStateFn("South Carolina"), edits);
edits = puRawMapConcat(puCityStateFn("South Dakota"), edits);
edits = puRawMapConcat(puCityStateFn("Tennessee"), edits);
edits = puRawMapConcat(puCityStateFn("Texas"), edits);
edits = puRawMapConcat(puCityStateFn("Utah"), edits);
edits = puRawMapConcat(puCityStateFn("Vermont"), edits);
edits = puRawMapConcat(puCityStateFn("Virginia"), edits);
edits = puRawMapConcat(puCityStateFn("Washington"), edits);
edits = puRawMapConcat(puCityStateFn("West Virginia"), edits);
edits = puRawMapConcat(puCityStateFn("Wisconsin"), edits);
edits = puRawMapConcat(puCityStateFn("Wyoming"), edits);
return edits;
};
function puCityStateFn(state, statelink) {
return (function(t) {
// citystate is kind of slow and there are 50 states; only run a state
// if it appears at all...
if (t.indexOf(', ' + state + ']]') == -1) return puCons(puRaw(t), undefined);
else return puCityStateOne (t, state, statelink);
});
};
function puSplitWhiteEnd(s) {
for(var i = s.length - 1; i >= 0; i --) {
if (s.charAt(i) != ' '.charAt(0))
return new Array(s.substr(0, i + 1), s.substring(i + 1));
}
// all whitespace!
return new Array("", s);
};
function puSplitWhiteStart(s) {
for(var i = 0; i < s.length; i ++) {
if (s.charAt(i) != ' '.charAt(0))
return new Array(s.substr(0, i), s.substring(i));
}
return new Array(s, "");
};
// XXX allow decimal places
function puNumberEnd(s) {
var n = "";
for(var i = s.length - 1; i >= 0; i --) {
if ((s.charCodeAt(i) >= '0'.charCodeAt(0) &&
s.charCodeAt(i) <= '9'.charCodeAt(0)) ||
s.charAt(i) == '-')
n = s.charAt(i) + n;
// years are often linked
else if (s.charAt(i) == '[' || s.charAt(i) == ']')
/* nothing */ ;
else return n;
}
return n;
};
// XXX now just takes the next token up to whitespace or |, ignoring [[brackets]]
function puNumberStart(s) {
var n = "";
for(var i = 0; i < s.length; i ++) {
if (s.charAt(i) == '[' || s.charAt(i) == ']')
/* nothing */ ;
else if (s.charAt(i) != ' ' && s.charAt(i) != '\n' && s.charAt(i) != '|')
n = n + s.charAt(i);
else return n;
}
return n;
};
// does this string end with a (partial) http link?
function puEndsHTTP (s) {
// only http since we want to catch https too
var h = s.lastIndexOf('http');
if (h == -1) return false;
// is there a space or ] terminating the link, though?
if (s.lastIndexOf(' ') > h ||
s.lastIndexOf(']') > h) return false;
else return true;
};
// are we inside an HTML element?
function puIsElement(s) {
var h = s.lastIndexOf('&');
if (h == -1) return false;
// is there a space or ; terminating the element?
if (s.lastIndexOf(' ') > h ||
s.lastIndexOf(';') > h) return false;
else return true;
};
function puEnDash (t) {
// split on every dash
var a = puCleave("-", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// check if dash is preceded by a number and followed by
// a number.
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
var befn = puNumberEnd(bef[0]);
var aftn = puNumberStart(aft[1]);
// alert("[" + bef[0] + "][" + bef[1] + "]-[" + aft[0] + "][" + aft[1] + "] .. [" + befn + "]–[" + aftn + "]");
var befnn = befn * 1;
var aftnn = aftn * 1;
// exclude ISBNs and certain dates by making sure the number doesn't have dash in it
if (befn.length > 0 && aftn.length > 0 &&
puEnDashBefOK(befn) && puEnDashAftOK(aftn) &&
!(puInLink(a[0], a[1])) &&
!puEndsHTTP(bef[0]) &&
// ranges are usually lo-hi, but sometimes we see 1987-8
(isNaN(befnn) || isNaN(aftnn) || befnn <= aftnn
|| (befnn >= 1000 && befnn <= 9999 && aftn <= 99) )) {
// src has whitespace around dash, replacement does not
// (note unicode en dash)
return puCons(puRaw(bef[0]), puCons(puEdit(bef[1] + "-" + aft[0], "–", puENDASH), puEnDash(aft[1])));
} else {
// don't match. but if we found dashes to the right, we shouldn't look at those
// again. (e.g. in ISBN 01-1234-6789, once we look at the first dash and reject it,
// we don't want to then consider 1234-6789, which looks like a match.)
var skip = puEnSkip(aft[1]);
return puCons(puRaw(a[0] + "-" + aft[0] + skip[0]), puEnDash(skip[1]));
}
};
// no more hyphens in the number (like when considering the second dash in ISBN 01-1234-6789)
function puEnDashBefOK(s) {
return (s.indexOf('-') == -1);
};
// Sees if this is in a link. That means as a {{ template }},
// or {{ template | with args }}, (but not in the argument part),
// or a [[wiki link]], or a [[target of a piped|link]] (but not
// when in display portion).
function puInLink(a,b) {
var aa = puFindAnyLeft(a, ["}}", "]]", "{{", "[[", "|"]);
var bb = puFindAnyRight(b, ["}}", "]]", "{{", "[[", "|"]);
return ( (aa == "{{" && bb == "}}") ||
(aa == "{{" && bb == "|") ||
(aa == "[[" && bb == "|") ||
(aa == "[[" && bb == "]]") );
};
function puFindAnyLeft(str, finds) {
var latest = undefined;
var latesti = -1;
for(var i = 0; i < finds.length; i ++) {
var x = str.lastIndexOf(finds[i]);
if (x > latesti) {
latest = finds[i];
latesti = x;
}
}
return latest;
};
function puFindAnyRight(str, finds) {
var earliest = undefined;
var earliesti = str.length;
for(var i = 0; i < finds.length; i ++) {
var x = str.indexOf(finds[i]);
if (x < earliesti) {
earliest = finds[i];
earliesti = x;
}
}
return earliest;
};
function puEnDashAftOK(s) {
// some prefix has to be a number...
if (s.charCodeAt(0) >= '0'.charCodeAt(0) && s.charCodeAt(0) <= '9'.charCodeAt(0)) {
// but we should avoid certain stuff...
return (s.indexOf('-') == -1 &&
s.indexOf('.htm') == -1 &&
s.indexOf('.pdf') == -1 &&
s.indexOf('.png') == -1 &&
s.indexOf('.jpg') == -1 &&
s.indexOf('.gif') == -1 &&
s.indexOf('.svg') == -1 &&
s.indexOf('.stm') == -1);
} else {
// otherwise something special:
var ss = s.toLowerCase();
return (
puStartswith(ss, "january") ||
puStartswith(ss, "february") ||
puStartswith(ss, "march") ||
puStartswith(ss, "april") ||
puStartswith(ss, "may") ||
puStartswith(ss, "june") ||
puStartswith(ss, "july") ||
puStartswith(ss, "august") ||
puStartswith(ss, "september") ||
puStartswith(ss, "october") ||
puStartswith(ss, "november") ||
puStartswith(ss, "december") ||
puStartswith(ss, "today") ||
puStartswith(ss, "bc") ||
puStartswith(ss, "present"));
}
};
function puStartswith(lng, sht) {
return (lng.indexOf(sht) == 0);
};
// after not matching a dash for en dash replacement,
// split a string into two parts: the first is what we
// should skip, the rest is what we should look for
// more dashes within.
function puEnSkip(s) {
for(var i = 0; i < s.length; i ++) {
if ((s.charCodeAt(i) >= '0'.charCodeAt(0) &&
s.charCodeAt(i) <= '9'.charCodeAt(0)) ||
s.charAt(i) == '-' ||
s.charAt(i) == '[' ||
s.charAt(i) == ']')
/* nothing */ ;
else return new Array(s.substr(0, i), s.substring(i));
}
return new Array(s, "");
};
function puEdit(src, dst, what) {
return puEditExt(src, dst, what, undefined, undefined);
};
function puEditExt(src, dst, what, dispsrc, dispdst) {
var subst = new Object();
subst.orig = src;
subst.rep = dst;
subst.israw = false;
subst.what = what;
subst.hidden = false;
subst.dispsrc = dispsrc;
subst.dispdst = dispdst;
// alert (src + "→" + dst);
punctuationID ++;
subst.id = punctuationID;
return subst;
};
/* Fix faux em dashes.
"--" almost anywhere should almost always be a real em dash (unless there are four or as
part of an html comment)
TODO: " - " between words should usually be an em dash.
*/
function puEmDash(t) {
var a = puCleave("--", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// must be preceded by a word and followed by a word
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
if (aft[1].length > 0 && puEmOKChar(aft[1].charAt(0)) &&
bef[0].length > 0 && puEmOKChar(bef[0].charAt(bef[0].length - 1))) {
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + "--" + aft[0], "—", puEMDASH),
puEmDash(aft[1])));
} else {
/* not an em dash. */
return puCons(puRaw(a[0] + "--"), puEmDash(a[1]));
}
};
function puEmOKChar(c) {
// alert ("check char: [" + c + "]");
if (c == '>' || c == '!' || c == '<' || c == '-' || c == '|') return false;
else return true;
};
function puIsDigit(c) {
return (c.charCodeAt(0) >= '0'.charCodeAt(0) && c.charCodeAt(0) <= '9'.charCodeAt(0));
};
// [[Pittsburgh, Pennsylvania]] to [[Pittsburgh, Pennsylvania|Pittsburgh]], [[Pennsylvania]].
function puCityStateOne(t, state, statelink) {
var a = puCleave(", " + state + "]]", t);
// XXX could be improved by generating pipe trick expansion automatically
// (pipe trick doesn't work in ref tags, etc.)
// but that makes it a little trickier because we have to find "Pittsburgh" in the above
// and might fail (because of other edits)
// XXX when doing that should detect Image: and Category:
if (a == undefined) return puCons(puRaw(t), undefined);
var st = (statelink == undefined) ? state : statelink;
return puCons(puRaw(a[0]),
puCons(puEdit(", " + state + "]]", ", " + state + "|]], [[" + st + "]]", puCITYSTATE),
puCityStateOne(a[1], state, statelink)));
};
// 1980's to 1980s ([[Wikipedia:Manual of Style (dates and numbers)]])
// note this isn't always a mistake:
// "1981 was a cold year compared to 1980's record temperatures" would be okay
// so some context awareness is appropriate (but it is almost always wrong)
function puDecade(t) {
var a = puCleave("0's", t);
if (a == undefined) return puCons(puRaw(t), undefined);
if (// date before? (only do it for 4 or 2 digit dates)
(
(a[0].length >= 4 &&
puIsDigit(a[0].charAt(a[0].length - 1)) &&
puIsDigit(a[0].charAt(a[0].length - 2)) &&
puIsDigit(a[0].charAt(a[0].length - 3)) &&
!puIsDigit(a[0].charAt(a[0].length - 4))) ||
(a[0].length >= 2 &&
puIsDigit(a[0].charAt(a[0].length - 1)) &&
!puIsDigit(a[0].charAt(a[0].length - 2)))
)
&&
// safe to correct?
a[1].length > 0 && puDecadeOKChar(a[1].charAt(0))) {
return puCons(puRaw(a[0]),
puCons(puEdit("0's", "0s", puDECADE),
puDecade(a[1])));
} else {
/* no problem. */
return puCons(puRaw(a[0] + "0's"), puDecade(a[1]));
}
};
function puDecadeOKChar(c) {
// should be the end of a word
if (c == '\n' || c == ' ' || c == ',' || c == '.' ||
c == '&' || c == '—' || c == '-' || c == '–' ||
// text in tables?
c == '|' || c == '\t' || c == '<' || c == ')' ||
c == ';' || c == '!' || c == "'" || c == ':' ||
c == '/'
) return true;
else return false;
};
// space before/around(parentheses )
// closing parens are basically the same as commas below.
function puParen(t) {
var a = puCleave(")", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// must be preceded by a word and followed by a word
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
// alert('paren: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
if (// needs correction?
(bef[1].length > 0 || aft[0].length == 0) &&
// safe to correct?
aft[1].length > 0 && puRParenOKChar(aft[1].charAt(0)) &&
bef[0].length > 0 && puRParenOKChar(bef[0].charAt(bef[0].length - 1))) {
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + ")" + aft[0], ") ", puPAREN),
puParen(aft[1])));
} else {
/* no problem. */
return puCons(puRaw(a[0] + ")"), puParen(a[1]));
}
};
// XXX perhaps should be okay-on-right and okay-on-left; this may be too conservative
function puRParenOKChar(c) {
if (c == ")" || c == "(" || c == '|' ||
// otherwise we undo our linkspace fix ;)
c == ']' ||
// title markup
c == '=' ||
// sometimes people do
c == '&' ||
// quotes, obviously
c == '"' || c == '”' || c == '’' || c == "'" ||
// History of Russia (1900-1950)#World War II
c == "#" ||
// other stuff
c == '\n' || c == ':' || c == ';' || c == '.' || c == '-' || c == '—' || c == ',' ||
c == '}' || '{' || c == '<') return false;
else return true;
};
function puComma(t) {
return puCommaLike(',', puCOMMA, t);
};
function puSemicolon(t) {
return puCommaLike(';', puSEMICOLON, t);
};
// TODO: very important to filter out URL hits, since comma appears in lots of news URLs
function puCommaLike(ch, what, t) {
var a = puCleave(ch, t);
if (a == undefined) return puCons(puRaw(t), undefined);
// must be preceded by a word and followed by a word
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
// alert('comma: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
if (// needs correction?
(bef[1].length > 0 || aft[0].length == 0) &&
// safe to correct?
!puEndsHTTP(bef[0]) &&
!puIsElement(bef[0]) &&
aft[1].length > 0 && puCommaOKChar(aft[1].charAt(0)) &&
bef[0].length > 0 && puCommaOKChar(bef[0].charAt(bef[0].length - 1))) {
// alert('fix!');
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + ch + aft[0], ch + ' ', what),
puCommaLike(ch, what, aft[1])));
} else {
/* no problem. */
return puCons(puRaw(a[0] + ch), puCommaLike(ch, what, a[1]));
}
};
function puLinkSpace(t) {
var a = puCleave(" ]]", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// maybe multiple spaces...
var bef = puSplitWhiteEnd(a[0]);
// alert('linkspace: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
// filter out the common idiom <nowiki>[[Category:United States| ]]</nowiki>
if (a[0].length > 0 && a[0].charAt(a[0].length - 1) != '|') {
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + " ]]", "]]", puLINKSPACE),
puLinkSpace(a[1])));
} else {
return puCons(puRaw(a[0] + " ]]"), puLinkSpace(a[1]));
}
};
/// XXX not hooked up -- did I finish implementing this?
// between number and %, remove space.
function puPercent(t) {
var a = puCleave("%", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// must be preceded by a word and followed by a word
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
// alert('pct: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
if (// needs correction?
(bef[1].length > 0 || aft[0].length == 0) &&
// safe to correct?
aft[1].length > 0 && puPercentBeforeChar(aft[1].charAt(0)) &&
bef[0].length > 0 && puPercentAfterChar(bef[0].charAt(bef[0].length - 1))) {
// alert('fix!');
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + "%" + aft[0], "% ", puPERCENT),
puPercent(aft[1])));
} else {
/* no problem. */
return puCons(puRaw(a[0] + "%"), puPercent(a[1]));
}
};
function puCommaOKChar(c) {
// definitely not inside numbers
if ((c.charCodeAt(0) >= '0'.charCodeAt(0) && c.charCodeAt(0) <= '9'.charCodeAt(0)) ||
// text in tables?
c == '|' ||
// quotes, obviously
c == '"' || c == '”' || c == '’' || c == "'" ||
// link w/ underscores instead of spaces
c == '_' ||
c == '\n' || c == '&' || c == ',' ||
// ref tags
c == '{' || c == '<') return false;
else return true;
};
function puRefSpaceOKChar(c) {
if (// text in tables?
c == '|' ||
// parenthetical
c == ')' ||
// or space already...
c == ' ' ||
// ending image: tags
c == ']' ||
// ending template text
c == '}' ||
// before em dashes (see MOS)
c == '—' ||
// ending quotes...
c == '"' || c == '”' || c == '’' || c == "'" ||
c == '\n' || c == '&' || c == ',' ||
// ref tags
c == '{' || c == '<') return false;
else return true;
};
// for references, we want to find the ref tags, but
// they can appear in several common forms:
// <ref>...</ref>
// <ref name="first">...</ref>
// <ref name="reused" />
// this function returns a three-element array consisting of
// [the text before the first ref tag, the ref tag, the text following]
// (or it returns undefined if there are no ref tags to be found)
function puGetRef(t) {
var m = '<ref';
// but not this tag!
var nm = '<references';
for(var i = 0; i < t.length; i ++) {
if (t.substr(i, m.length) == m &&
t.substr(i, nm.length) != nm) {
// now, decide what kind of ref
// appearance this is. keep looking
// at characters until we see
// > (bracketing)
// or
// /> (unitary)
for(var j = i + m.length; j < t.length; j ++) {
if (t.charAt(j) == '/') {
if (j < (t.length - 1) && t.charAt(j + 1) == '>') {
var rt = t.substr(i, (j + 2) - i);
var bef = t.substr(0, i);
var aft = t.substr(j + 2, t.length - (j + 2));
return new Array(bef, rt, aft);
} else {
// XXX report problem?
return undefined;
}
} else if (t.charAt(j) == '>') {
// found bracketing ref tag.
// so now eat until </ref> is
// encountered.
var rest = t.substr(j, t.length - j);
var a = puCleave('</ref>', rest);
if (a == undefined) {
// XXX warn: unclosed ref tag??
return undefined;
}
var rt = t.substr(i, j - i) + a[0] + '</ref>';
var bef = t.substr(0, i);
var aft = a[1];
// alert("REF. bef: [" + bef + "]\n" +
// "rt: [" + rt + "]\n" +
// "aft: [" + aft + "]\n");
return new Array(bef, rt, aft);
}
}
}
}
// none found...
return undefined;
};
// If we find a ref tag, we need to ensure the following:
// 1. there should never be any space before the tag.
// 2. the ref tag should appear after punctuation (except dashes)
// UNLESS the reference is to a specific term rather than
// to the sentence or comma/semicolon-separated phrase
// (we'll leave it up to the user to reject these false positives)
// 3. there shouldn't be double punctuation before/after the ref
// 4. there should be space after the ref
// UNLESS the reference is followed by another reference
// (or a dash, or legal punctuation as above)
//
// (this is according to the manual of style at [[wikipedia:footnotes]];
// and conforms to the Chicago Manual of Style)
//
// So, we grab any punctuation that follows the reference,
// erase all space before the reference,
// insert space after the ref if needed
// and insert any trailing punctuation before the reference,
// unless there is already punctuation there.
function puRef(t) {
var a = puGetRef(t);
if (a == undefined) return puCons(puRaw(t), undefined);
var bef = puSplitWhiteEnd(a[0]);
var tag = a[1];
var aft = puSplitWhiteStart(a[2]);
// boolean flags
// insist on two newlines since people frequently put refs on their own lines.
var parend = aft[1].length > 1 && aft[1].charAt(0) == '\n' && aft[1].charAt(1) == '\n';
var nopuncbefore = bef[0].length == 0 || !(puRefPuncChar(bef[0].charAt(bef[0].length - 1)));
var needspuncbefore = nopuncbefore && bef[0].length > 0 && puRefNeedsPunc(bef[0].charAt(bef[0].length - 1));
// the punctuation char or undefined if none
var puncafter = (aft[1].length > 0)?aft[1].charAt(0):undefined;
if (puncafter != undefined && !puRefPuncChar(puncafter)) puncafter = undefined;
if (puncafter != undefined) {
aft[1] = aft[1].substr(1, aft[1].length - 1);
}
var needspaceafter = aft[1].length > 0 && puRefSpaceOKChar(aft[1].charAt(0));
// DEBUG
// var what = '';
// if (nopuncbefore) what = what + " NOPUNCBEFORE.";
// if (parend) what = what + " PAREND.";
// if (puncafter != undefined) what = what + " puncafter: " + puncafter;
// if (needspaceafter) what = what + " NEEDSPACEAFTER.";
// alert(what);
if (// whitespace before?
bef[1].length > 0 ||
// missing necessary whitespace after?
(aft[0].length == 0 && needspaceafter) ||
// punctuation after?
(puncafter != undefined) ||
// or there is no punctuation at all and this is
// the end of the paragraph
(parend && needspuncbefore)) {
// There's something to fix.
// the before part will be whatever's before, plus any additional punctuation,
// but minus any whitespace.
var befplus;
if (parend // implies no punctuation after ref
&& needspuncbefore) {
// assume period at end of paragraph.
// XXX note, this will put the period before only the last
// reference in a series of references at the end of
// a paragraph, sigh
befplus = '.';
} else if (nopuncbefore && puncafter != undefined) {
befplus = puncafter;
} else befplus = '';
var aftoldplus = '';
if (puncafter != undefined) aftoldplus = puncafter;
// XXX: should elide contents of ref in display somehow.
return puCons(puRaw(bef[0]),
puCons(puEditExt(// old:
bef[1] + tag + aft[0] + aftoldplus,
// new:
befplus + tag + (needspaceafter?' ':''),
puREF,
// display versions elide the ref itself:
bef[1] + '__PUREF__' + aft[0] + aftoldplus,
befplus + '__PUREF__' + (needspaceafter?' ':'')),
puRef(aft[1]) ));
} else {
// no change
return puCons(puRaw(a[0] + a[1]), puRef(a[2]));
}
};
function puRefPuncChar(c) {
// eta-expansion necessary??
if (c == '.' || c == ';' || c == ',' || c == '?' ||
c == '!' || c == ':') return true;
else return false;
};
function puRefNeedsPunc(c) {
return (c.charCodeAt(0) >= 'a' && c.charCodeAt(0) <= 'z') ||
(c.charCodeAt(0) >= 'A' && c.charCodeAt(0) <= 'Z') ||
(c.charCodeAt(0) >= '0' && c.charCodeAt(0) <= '9') ||
c == ']';
};
// ----------------------------------------------
// install it..
addOnloadHook(function() {
// not on talk pages...
if (document.title.indexOf("talk:") != -1) {
return;
}
if (document.title.indexOf("Editing ") != -1) {
addOnloadHook(addPunctuation);
}
});
function addPunctuation() {
// need to see later if user has done any editing...
punctuationPageOriginalSummary = document.editform.wpSummary.value;
addTab("javascript:doPunctuation()", "punctuation", "ca-punctuation", "Punctuation", "");
akeytt();
};
/* </nowiki> */