Jump to content

User:Brighterorange/punctuation.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* <nowiki> */
 var punctuationVersion = "19 April 2008";
 var punctuationID = 1;
 var punctuationEdits = undefined;
 var punctuationOriginalSummary = undefined;
 var punctuationPageOriginalSummary = undefined;
 var puCONTEXT = 40;

 var puENDASH = 0;
 var puSPELL = 1;
 var puEMDASH = 2;
 var puCOMMA = 3;
 var puPERCENT = 4;
 var puBORN = 5;
 var puLINKSPACE = 6;
 var puDECADE = 7;
 var puPAREN = 8;
 var puXHTML = 9;
 var puREF = 10;
 var puSEMICOLON = 11;
 var puCITYSTATE = 12;
 var puDESCRIPTIONS = ["en dash", "spelling", "em dash", "comma", "percent", "born", "link space", "decade", "paren", "xhtml", "ref", "semicolon", "city-state"];
 var puNDESC = 13;

 // TODO:
 // The TODO list is maintained in the development version, at [[User:Brighterorange/punctuation2.js]].
 // Feel free to make new suggestions on my [[User talk:Brighterorange|talk page]].

 function doPunctuation() {
  // alert(document.editform.wpTextbox1.value);
  // document.editform.wpMinoredit.checked = true;
 
  // just need some prominent element to put our messages in. We use the "From Wikipedia" header.
  var e = document.getElementById('siteSub');
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Running autopunctuation...</span>';
 
  puDisableEditing(true);

  // We'll represent the document as a list of chunks, where
  // a chunk can either be raw text (no replacement suggested)
  // or an edit (the suggested replacement text, the reason,
  // the original text, and a flag indicating whether the
  // change has been rejected).
  // start by producing the singleton raw chunk:
  var edits = new puCons(puRaw(document.editform.wpTextbox1.value), undefined);

  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">References...</span>';
  setTimeout(function (){ // refs
  edits = puRawMapConcat(puRef, edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Spelling...</span>';
  setTimeout(function (){ // spell
  edits = puSpell(edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Born style...</span>';
  setTimeout(function (){ // born
  edits = puBorn(edits);
  
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Em dashes...</span>';
  setTimeout(function (){ // em dash
  edits = puRawMapConcat(puEmDash, edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">En dashes...</span>';
  setTimeout(function (){ // en dash
  edits = puRawMapConcat(puEnDash, edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Commas...</span>';
  setTimeout(function (){ // comma
  edits = puRawMapConcat(puComma, edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Semicolons...</span>';
  setTimeout(function (){ // semicolon
  edits = puRawMapConcat(puSemicolon, edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Link space...</span>';
  setTimeout(function (){ // linkspace
  edits = puRawMapConcat(puLinkSpace, edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Decade...</span>';
  setTimeout(function (){ // decade
  edits = puRawMapConcat(puDecade, edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Parens...</span>';
  setTimeout(function (){ // paren
  edits = puRawMapConcat(puParen, edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">XHTML...</span>';
  setTimeout(function (){ // xhtml
  edits = puXhtml(edits);
 
  e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">City-State...</span>';
  setTimeout(function (){ // city-state
  edits = puCityState(edits);

  punctuationEdits = edits;
  punctuationOriginalSummary = document.editform.wpSummary.value;
 
  document.editform.wpTextbox1.value = puRewrite(edits);
  document.editform.wpSummary.value = puSummary(edits);
 
  // finally, show interface for undos
  puShowChanges("", edits);
 
  }, 50); // city-state
  }, 50); // xhtml
  }, 50); // paren
  }, 50); // decade
  }, 50); // linkspace
  }, 50); // semicolon
  }, 50); // comma
  }, 50); // en dash
  }, 50); // em dash
  }, 50); // born
  }, 50); // spell
  }, 50); // refs
 };

 // don't use textbox's "disable" field, since
 // it makes the form submit an empty textbox,
 // blanking the article!
 function puDisableEditing(flag) {
   var e = document.editform.wpTextbox1;
   if (flag) {
         e.style.opacity = "0.5";
         e.style.filter = "Alpha(Opacity=50)";
   } else {
         e.style.opacity = undefined;
         e.style.filter = undefined;
   };
 };

 function puSummary(edits) {
  var counts = new Array();
  for(var i = 0; i < puNDESC; i ++) counts.push (0);
 
  for(var l = edits; l != undefined; l = l.tail) {
    if (!l.head.israw) {
      counts[l.head.what] ++;
      // alert("!" + l.head.what + "(" + puDESCRIPTIONS[l.head.what] + ") = " + counts[l.head.what]);
    }
  }
 
  var s = "";
 
  for(var j = 0; j < puNDESC; j ++) {
    if (counts[j] > 0) {
      if (s != "") s = s + "; ";
      s = s + counts[j] + " " + puDESCRIPTIONS[j];
    }
    // alert("@" + j + ": " + counts[j] + "/" + puDESCRIPTIONS[j] + " -> " + s);
  }
 
  if (s == "") return punctuationOriginalSummary;
  else {
    if (punctuationOriginalSummary == punctuationPageOriginalSummary) {
      // user never did anything except run punctuation, so minor
      document.editform.wpMinoredit.checked = true;
    }
 
    return punctuationOriginalSummary + 
      (punctuationOriginalSummary == "" ? "" : " ") + "(auto: " + s + ")";
  }
 };

 function puKindButtons(edits) {
  var counts = new Array();
  for(var i = 0; i < puNDESC; i ++) counts.push (0);
 
  for(var l = edits; l != undefined; l = l.tail) {
    if (!l.head.israw) {
      counts[l.head.what] ++;
    }
  }
 
  // now for any edit kind we did do, give buttons for them.
  var s = "<table><tr>"
  for(var j = 0; j < puNDESC; j ++) {
    if (counts[j] > 0) {
      s = s +
          '<td><div style="padding : 3px; margin-right: 6px; border : 2px solid #333377; background : #DDDDFF"><b><center>' + 
             counts[j] + " " + puDESCRIPTIONS[j] + '</center></b>' +
          '<br/> <span style="cursor : hand; cursor : pointer;" onClick="puAllOn(' + j + ');">ON</span>&nbsp;' +
                '<span style="cursor : hand; cursor : pointer;" onClick="puAllOff(' + j + ');">OFF</span>&nbsp;' +
                '<span style="cursor : hand; cursor : pointer;" onClick="puAllHide(' + j + ');">HIDE</span>' +
          '</div></td>';
// onClick="puUndo(' + l.head.id +');"
    }
  }
  s = s + '</tr></table>';
  return s;
 };

 function puContextBefore(ol, ne) {
   var s = ol + ne;
   if (s.length < puCONTEXT) return s;
   else return s.substring(s.length - puCONTEXT);
 };

 function puContextAfter(l) {
   var s = "";
   for(var z = l; z != undefined; z = z.tail) {
     if (z.head.israw) s = s + z.head.text;
     else s = s + z.head.rep;
     if (s.length >= puCONTEXT) return s.substr(0, puCONTEXT);
   }
   return s;
 };

 // creates the menu for punctuation while in showchanges mode.
 // for now just a 'done' button
 function puMenu() {
   return('<div onclick="puDoneClick();" style="cursor:hand; cursor:pointer; border:2px outset #559955;'  +
          'padding:4px;margin:4px;background:#DDFFDD">click this when done with changes</div>');
 };

 // when clicked, get rid of all the shown changes and re-enable
 // the textbox.
 function puDoneClick() {
   puDisableEditing(false);
   var e = document.getElementById('siteSub');
   e.innerHTML = '';
 };

 // from a chunk list, give an HTML summary with edit buttons
 // pass in the context c of some previous characters.
 function puShowChanges(c, l) {
   var e = document.getElementById('siteSub');
   // XXX actually, if all are deactivated too...
   if (l == undefined) {
      e.innerHTML = '<p>Punctuation: no changes.</p>';
   } else {
      e.innerHTML = puShowSomeChanges(c, l);
   }
 };

 function puShowSomeChanges(c, l) {
   var o = puMenu();
   o = o + puKindButtons(l) + "<br />";
   while (l != undefined) {
     if (l.head.israw) {
       var nc = puContextBefore(c, l.head.text);
       o = o + '<span style="color:#AAAAAA">(...)</span>';
       c = nc;
     } else if (l.head.hidden) {
       var nc = puContextBefore(c, l.head.rep);
       o = o + '<span style="color:#AAAAAA">(hidden)</span>'
       c = nc;
     } else {
       // XXX hover could select in edit box??
       var nc = puContextBefore(c, l.head.rep);
       var ca = puContextAfter(l.tail);
       var src = (l.head.dispsrc == undefined)?l.head.orig:l.head.dispsrc;
       var dst = (l.head.dispdst == undefined)?l.head.rep:l.head.dispdst;
       o = o + '<br/> (' + puHighlightContext(puEscape(c)) +
               '<span id="puEdit' + l.head.id + '" style="border : 1px solid #FF9999; background : #FFDDDD; cursor : hand; cursor : pointer;"' +
               ' onClick="puUndo(' + l.head.id +');">' +
                 puHighlight(puEscape(src)) + "&rarr;" + puHighlight(puEscape(dst)) + '</span>'
               + puHighlightContext(puEscape(ca)) +
               ') ';
       c = nc;
     }
     l = l.tail;
   }
   return (o + puMenu());
 };

 // show spaces as light underscores, since many of these involve the deletion/insertion of spaces
 function puHighlight(s) {
   // first or it will mess up spaces in our html
   s = s.replace(/ /g, '<span style="color:#888888">_</span>');
   return s.replace(/__PUREF__/g, '<span style="color:#AA55AA">&lt;REF&gt;</span>');
 };

 function puHighlightContext(s) {
   s = s.replace(/\[/g, '<span style="color:#FF0000">[</span>');
   s = s.replace(/\]/g, '<span style="color:#FF0000">]</span>');
   s = s.replace(/\{/g, '<span style="color:#00FF00">{</span>');
   s = s.replace(/\}/g, '<span style="color:#00FF00">}</span>');
   s = s.replace(/\|/g, '<span style="color:#0000FF">|</span>');
   // these occur next to false positives for en dashes, commonly
   s = s.replace (/issn/gi, '<span style="color:#FF7722">ISSN</span>'); 
   s = s.replace (/isbn/gi, '<span style="color:#FF7722">ISBN</span>');
   // template requires literal dash
   s = s.replace (/scotus/gi, '<span style="color:#FF7722">SCOTUS</span>'); 
   return s;
 };

 function puEscape(s) {
   var s1 = s.replace(/</g, "&lt;");
   var s2 = s1.replace(/>/g, "&gt;");
   return s2;
 };
 
 // called from generated html; hides (just don't display) all
 // from this kind
 function puAllHide(k) {
    for(var h = punctuationEdits; h != undefined; h = h.tail) {
       if (h.head.what == k) {
         h.head.hidden = true;
       }
    }
    // always keep these up to date (actually this should never need a rewrite, right?)
    // document.editform.wpTextbox1.value = puRewrite(punctuationEdits);
    document.editform.wpSummary.value = puSummary(punctuationEdits);
    puShowChanges("", punctuationEdits);

    return ;
 };

 // called from generated html above. undoes the specified edit, making
 // the chunk into a raw chunk and rewriting the textarea.
 function puUndo(i) {
    // alert('undo unimplemented for #' + i);
    for(var h = punctuationEdits; h != undefined; h = h.tail) {
       if (h.head.id == i) {
         h.head.text = h.head.orig;
         h.head.israw = true;
 
         // undo edit where it matters         
         document.editform.wpTextbox1.value = puRewrite(punctuationEdits);
         document.editform.wpSummary.value = puSummary(punctuationEdits);
 
         var e = document.getElementById('puEdit' + i);
         e.style.border = "none";
         e.style.opacity = "0.5";
         e.style.filter = "Alpha(Opacity=50)";
         return;
       }
    }
    alert("Oops, can't undo? " + i + " ... " + punctuationEdits);
 };

 // generate the raw text from a chunk list
 function puRewrite(l) {
   var o = "";
   while(l != undefined) {
     if (l.head.israw && l.head.text != undefined) o = o + l.head.text;
     else if (!l.head.israw && l.head.rep != undefined) o = o + l.head.rep;
     else o = o + "???";
     l = l.tail;
   }
   return o;
 };

 // given a function (f : string -> chunk list) and (l : chunk list)
 // build a new list where each raw chunk within l has f applied to
 // it and the result flattened. edit chunks are not modified.
 function puRawMapConcat(f, l) {
   if (l == undefined) return l;
   if (l.head.israw) {
     var nl = f(l.head.text);
     return puAppend(nl, puRawMapConcat(f, l.tail));
   } else return puCons(l.head, puRawMapConcat(f, l.tail));
 };

 function puAppend (l1, l2) {
   if (l1 == undefined) return l2;
   else return puCons(l1.head, puAppend(l1.tail, l2));
 };

 // lists are represented as head/tail cons cells
 // with nil = undefined
 function puCons(h, t) {
   // if they are both raw, then flatten.
   if (t != undefined && t.head.israw && h.israw) {
     var nh = new Object();
     nh.israw = true;
     nh.text = h.text + t.head.text;
     var o = new Object;
     o.head = nh;
     o.tail = t.tail;
     return o;
   } else {
     var o = new Object();
     o.head = h;
     o.tail = t;
     return o;
   }
 }

 function puRaw(s) {
   var o = new Object();
   o.israw = true;
   o.text = s;
   return o;
 };

 // puCleave(small, large)
 // find the next match of small in large.
 // return a two-element array of the
 // string preceding the match, and the string
 // following the match. If there are no matches,
 // return undefined.
 function puCleave(small, large) {
   var x = large.indexOf(small);
   if (x == -1) return undefined;
   else return new Array(large.substr(0, x),
                         large.substring(x + small.length));
 };

 function puBorn(edits) {
  return puRawMapConcat(puSpellRep("(b. ", "(born ", puBORN), edits);
 };

 function puXhtml(edits) {
  edits = puRawMapConcat(puSpellRep("<br>", "<br />", puXHTML), edits);
  edits = puRawMapConcat(puSpellRep("<BR>", "<br />", puXHTML), edits);
  return edits;
 };

 function puSpell(edits) {
  edits = puRawMapConcat(puSpellRep("seperat", "separat", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("embarass", "embarrass", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("existance", "existence", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("supercede", "supersede", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("accomodat", "accommodat", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("foreward", "foreword", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("liason", "liaison", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("millenium", "millennium", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("accomoda", "accommoda", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("occassion", "occasion", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("occurrance", "occurrence", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("privelege", "privilege", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("priviledge", "privilege", puSPELL), edits);
  edits = puRawMapConcat(puSpellRep("withold", "withhold", puSPELL), edits);
  return edits;
 };

 function puSpellRep(src, dst, wh) {
   return (function(t) {
             // spelling is kinda slow, and most misspellings never appear at all
             if (t.indexOf(src) == -1) return puCons(puRaw(t), undefined);
             else return puSpellOne (t, src, dst, wh);
           });
 };

 function puSpellOne (t, src, dst, wh) {
   var a = puCleave(src, t);
   if (a == undefined) return puCons(puRaw(t), undefined);
   var subst = puEdit(src, dst, wh);
   return puCons(puRaw(a[0]), puCons(subst, puSpellOne(a[1], src, dst, wh)));
 };

 function puCityState(edits) {
  /* for every US State... (could do countries here, too.) */
  edits = puRawMapConcat(puCityStateFn("Alabama"), edits);
  edits = puRawMapConcat(puCityStateFn("Alaska"), edits);
  edits = puRawMapConcat(puCityStateFn("Arizona"), edits);
  edits = puRawMapConcat(puCityStateFn("Arkansas"), edits);
  edits = puRawMapConcat(puCityStateFn("California"), edits);
  edits = puRawMapConcat(puCityStateFn("Colorado"), edits);
  edits = puRawMapConcat(puCityStateFn("Connecticut"), edits);
  edits = puRawMapConcat(puCityStateFn("Delaware"), edits);
  edits = puRawMapConcat(puCityStateFn("Florida"), edits);
  edits = puRawMapConcat(puCityStateFn("Georgia", "Georgia (U.S. state)|Georgia"), edits);
  edits = puRawMapConcat(puCityStateFn("Hawaii"), edits);
  edits = puRawMapConcat(puCityStateFn("Idaho"), edits);
  edits = puRawMapConcat(puCityStateFn("Illinois"), edits);
  edits = puRawMapConcat(puCityStateFn("Indiana"), edits);
  edits = puRawMapConcat(puCityStateFn("Iowa"), edits);
  edits = puRawMapConcat(puCityStateFn("Kansas"), edits);
  edits = puRawMapConcat(puCityStateFn("Kentucky"), edits);
  edits = puRawMapConcat(puCityStateFn("Louisiana"), edits);
  edits = puRawMapConcat(puCityStateFn("Maine"), edits);
  edits = puRawMapConcat(puCityStateFn("Maryland"), edits);
  edits = puRawMapConcat(puCityStateFn("Massachusetts"), edits);
  edits = puRawMapConcat(puCityStateFn("Michigan"), edits);
  edits = puRawMapConcat(puCityStateFn("Minnesota"), edits);
  edits = puRawMapConcat(puCityStateFn("Mississippi"), edits);
  edits = puRawMapConcat(puCityStateFn("Missouri"), edits);
  edits = puRawMapConcat(puCityStateFn("Montana"), edits);
  edits = puRawMapConcat(puCityStateFn("Nebraska"), edits);
  edits = puRawMapConcat(puCityStateFn("Nevada"), edits);
  edits = puRawMapConcat(puCityStateFn("New Hampshire"), edits);
  edits = puRawMapConcat(puCityStateFn("New Jersey"), edits);
  edits = puRawMapConcat(puCityStateFn("New Mexico"), edits);
  edits = puRawMapConcat(puCityStateFn("New York"), edits);
  edits = puRawMapConcat(puCityStateFn("North Carolina"), edits);
  edits = puRawMapConcat(puCityStateFn("North Dakota"), edits);
  edits = puRawMapConcat(puCityStateFn("Ohio"), edits);
  edits = puRawMapConcat(puCityStateFn("Oklahoma"), edits);
  edits = puRawMapConcat(puCityStateFn("Oregon"), edits);
  edits = puRawMapConcat(puCityStateFn("Pennsylvania"), edits);
  edits = puRawMapConcat(puCityStateFn("Rhode Island"), edits);
  edits = puRawMapConcat(puCityStateFn("South Carolina"), edits);
  edits = puRawMapConcat(puCityStateFn("South Dakota"), edits);
  edits = puRawMapConcat(puCityStateFn("Tennessee"), edits);
  edits = puRawMapConcat(puCityStateFn("Texas"), edits);
  edits = puRawMapConcat(puCityStateFn("Utah"), edits);
  edits = puRawMapConcat(puCityStateFn("Vermont"), edits);
  edits = puRawMapConcat(puCityStateFn("Virginia"), edits);
  edits = puRawMapConcat(puCityStateFn("Washington"), edits);
  edits = puRawMapConcat(puCityStateFn("West Virginia"), edits);
  edits = puRawMapConcat(puCityStateFn("Wisconsin"), edits);
  edits = puRawMapConcat(puCityStateFn("Wyoming"), edits);
  return edits;
 };

 function puCityStateFn(state, statelink) {
   return (function(t) {
             // citystate is kind of slow and there are 50 states; only run a state 
             // if it appears at all...
             if (t.indexOf(', ' + state + ']]') == -1) return puCons(puRaw(t), undefined);
             else return puCityStateOne (t, state, statelink);
           });
 };

 function puSplitWhiteEnd(s) {
   for(var i = s.length - 1; i >= 0; i --) {
      if (s.charAt(i) != ' '.charAt(0))
         return new Array(s.substr(0, i + 1), s.substring(i + 1));
   }
   // all whitespace!
   return new Array("", s);
 };

 function puSplitWhiteStart(s) {
   for(var i = 0; i < s.length; i ++) {
     if (s.charAt(i) != ' '.charAt(0))
        return new Array(s.substr(0, i), s.substring(i));
   }
   return new Array(s, "");
 };

 // XXX allow decimal places
 function puNumberEnd(s) {
   var n = "";
   for(var i = s.length - 1; i >= 0; i --) {
      if ((s.charCodeAt(i) >= '0'.charCodeAt(0) &&
           s.charCodeAt(i) <= '9'.charCodeAt(0)) ||
           s.charAt(i) == '-')
         n = s.charAt(i) + n;
      // years are often linked
      else if (s.charAt(i) == '[' || s.charAt(i) == ']')
         /* nothing */ ;
      else return n;
   }
   return n;
 };

 // XXX now just takes the next token up to whitespace or |, ignoring [[brackets]]
 function puNumberStart(s) {
   var n = "";
   for(var i = 0; i < s.length; i ++) {
      if (s.charAt(i) == '[' || s.charAt(i) == ']')
         /* nothing */ ;
      else if (s.charAt(i) != ' ' && s.charAt(i) != '\n' && s.charAt(i) != '|')
         n = n + s.charAt(i);
      else return n;
   }
   return n;
 };

 // does this string end with a (partial) http link?
 function puEndsHTTP (s) {
    // only http since we want to catch https too
    var h = s.lastIndexOf('http');
    if (h == -1) return false;
    // is there a space or ] terminating the link, though?
    if (s.lastIndexOf(' ') > h ||
        s.lastIndexOf(']') > h) return false;
    else return true;
 };

 // are we inside an HTML element?
 function puIsElement(s) {
    var h = s.lastIndexOf('&');
    if (h == -1) return false;
    // is there a space or ; terminating the element?
    if (s.lastIndexOf(' ') > h ||
        s.lastIndexOf(';') > h) return false;
    else return true;
 };

 function puEnDash (t) {
   // split on every dash
   var a = puCleave("-", t);
   if (a == undefined) return puCons(puRaw(t), undefined);
 
   // check if dash is preceded by a number and followed by
   // a number.
 
   var bef = puSplitWhiteEnd(a[0]);
   var aft = puSplitWhiteStart(a[1]);
 
   var befn = puNumberEnd(bef[0]);
   var aftn = puNumberStart(aft[1]);
   
   // alert("[" + bef[0] + "][" + bef[1] + "]-[" + aft[0] + "][" + aft[1] + "] .. [" + befn + "]–[" + aftn + "]");
 
   var befnn = befn * 1;
   var aftnn = aftn * 1;
 
   // exclude ISBNs and certain dates by making sure the number doesn't have dash in it
   if (befn.length > 0 && aftn.length > 0 &&
       puEnDashBefOK(befn) && puEnDashAftOK(aftn) &&
       !(puInLink(a[0], a[1])) &&
       !puEndsHTTP(bef[0]) &&
       // ranges are usually lo-hi, but sometimes we see 1987-8
       (isNaN(befnn) || isNaN(aftnn) || befnn <= aftnn
        || (befnn >= 1000 && befnn <= 9999 && aftn <= 99) )) {
     // src has whitespace around dash, replacement does not
     // (note unicode en dash)
     return puCons(puRaw(bef[0]), puCons(puEdit(bef[1] + "-" + aft[0], "–", puENDASH), puEnDash(aft[1])));
   } else {
     // don't match. but if we found dashes to the right, we shouldn't look at those
     // again. (e.g. in ISBN 01-1234-6789, once we look at the first dash and reject it,
     // we don't want to then consider 1234-6789, which looks like a match.)
     var skip = puEnSkip(aft[1]);
     return puCons(puRaw(a[0] + "-" + aft[0] + skip[0]), puEnDash(skip[1]));
   }
 };

 // no more hyphens in the number (like when considering the second dash in ISBN 01-1234-6789)
 function puEnDashBefOK(s) {
   return (s.indexOf('-') == -1);
 };

 // Sees if this is in a link. That means as a {{ template }},
 // or {{ template | with args }}, (but not in the argument part),
 // or a [[wiki link]], or a [[target of a piped|link]] (but not
 // when in display portion).
 function puInLink(a,b) {
    var aa = puFindAnyLeft(a, ["}}", "]]", "{{", "[[", "|"]);
    var bb = puFindAnyRight(b, ["}}", "]]", "{{", "[[", "|"]);

    return ( (aa == "{{" && bb == "}}") ||
             (aa == "{{" && bb == "|") ||
             (aa == "[[" && bb == "|") ||
             (aa == "[[" && bb == "]]") );
 };

 function puFindAnyLeft(str, finds) {
    var latest = undefined;
    var latesti = -1;
    for(var i = 0; i < finds.length; i ++) {
       var x = str.lastIndexOf(finds[i]);
       if (x > latesti) {
        latest = finds[i];
        latesti = x;
       }
    }
    return latest;
 };

 function puFindAnyRight(str, finds) {
    var earliest = undefined;
    var earliesti = str.length;
    for(var i = 0; i < finds.length; i ++) {
       var x = str.indexOf(finds[i]);
       if (x < earliesti) {
        earliest = finds[i];
        earliesti = x;
       }
    }
    return earliest;
 };

 function puEnDashAftOK(s) {
   // some prefix has to be a number...
   if (s.charCodeAt(0) >= '0'.charCodeAt(0) && s.charCodeAt(0) <= '9'.charCodeAt(0)) {
      // but we should avoid certain stuff...
      return (s.indexOf('-') == -1 && 
              s.indexOf('.htm') == -1 &&
              s.indexOf('.pdf') == -1 &&
              s.indexOf('.png') == -1 &&
              s.indexOf('.jpg') == -1 &&
              s.indexOf('.gif') == -1 &&
              s.indexOf('.svg') == -1 &&
              s.indexOf('.stm') == -1);
   } else {
      // otherwise something special:
      var ss = s.toLowerCase();
      return (
       puStartswith(ss, "january") ||
       puStartswith(ss, "february") ||
       puStartswith(ss, "march") ||
       puStartswith(ss, "april") ||
       puStartswith(ss, "may") ||
       puStartswith(ss, "june") ||
       puStartswith(ss, "july") ||
       puStartswith(ss, "august") ||
       puStartswith(ss, "september") ||
       puStartswith(ss, "october") ||
       puStartswith(ss, "november") ||
       puStartswith(ss, "december") ||
       puStartswith(ss, "today") ||
       puStartswith(ss, "bc") ||
       puStartswith(ss, "present"));
   }
 };

 function puStartswith(lng, sht) {
    return (lng.indexOf(sht) == 0);
 };

 // after not matching a dash for en dash replacement,
 // split a string into two parts: the first is what we
 // should skip, the rest is what we should look for
 // more dashes within.
 function puEnSkip(s) {
   for(var i = 0; i < s.length; i ++) {
      if ((s.charCodeAt(i) >= '0'.charCodeAt(0) &&
           s.charCodeAt(i) <= '9'.charCodeAt(0)) ||
           s.charAt(i) == '-' ||
           s.charAt(i) == '[' ||
           s.charAt(i) == ']')
        /* nothing */ ;
      else return new Array(s.substr(0, i), s.substring(i));
   }
   return new Array(s, "");
 };

 function puEdit(src, dst, what) {
     return puEditExt(src, dst, what, undefined, undefined); 
 };

 function puEditExt(src, dst, what, dispsrc, dispdst) {
     var subst = new Object();
     subst.orig = src;
     subst.rep = dst;
     subst.israw = false;
     subst.what = what;
     subst.hidden = false;
     subst.dispsrc = dispsrc;
     subst.dispdst = dispdst;
     //     alert (src + "&rarr;" + dst);
     punctuationID ++;
     subst.id = punctuationID;
     return subst;
 };

 /* Fix faux em dashes.
    "--"  almost anywhere should almost always be a real em dash (unless there are four or as
          part of an html comment)
    TODO: " - " between words should usually be an em dash.
 */
 function puEmDash(t) {
   var a = puCleave("--", t);
   if (a == undefined) return puCons(puRaw(t), undefined);
 
   // must be preceded by a word and followed by a word 
   var bef = puSplitWhiteEnd(a[0]);
   var aft = puSplitWhiteStart(a[1]);
    
   if (aft[1].length > 0 && puEmOKChar(aft[1].charAt(0)) &&
       bef[0].length > 0 && puEmOKChar(bef[0].charAt(bef[0].length - 1))) {
     return puCons(puRaw(bef[0]), 
                   puCons(puEdit(bef[1] + "--" + aft[0], "—", puEMDASH),
                          puEmDash(aft[1])));
   } else {
     /* not an em dash. */
     return puCons(puRaw(a[0] + "--"), puEmDash(a[1]));
   }
 };

 function puEmOKChar(c) {
   //   alert ("check char: [" + c + "]");
   if (c == '>' || c == '!' || c == '<' || c == '-' || c == '|') return false;
   else return true;
 };

 function puIsDigit(c) {
    return (c.charCodeAt(0) >= '0'.charCodeAt(0) && c.charCodeAt(0) <= '9'.charCodeAt(0));
 };

 // [[Pittsburgh, Pennsylvania]] to [[Pittsburgh, Pennsylvania|Pittsburgh]], [[Pennsylvania]].
 function puCityStateOne(t, state, statelink) {
  var a = puCleave(", " + state + "]]", t);
  // XXX could be improved by generating pipe trick expansion automatically
  // (pipe trick doesn't work in ref tags, etc.)
  // but that makes it a little trickier because we have to find "Pittsburgh" in the above
  // and might fail (because of other edits)
  // XXX when doing that should detect Image: and Category:
  if (a == undefined) return puCons(puRaw(t), undefined);
  
  var st = (statelink == undefined) ? state : statelink;
 
  return puCons(puRaw(a[0]),
                puCons(puEdit(", " + state + "]]", ", " + state + "|]], [[" + st + "]]", puCITYSTATE),
                       puCityStateOne(a[1], state, statelink)));
 };

 // 1980's to 1980s ([[Wikipedia:Manual of Style (dates and numbers)]])
 // note this isn't always a mistake:
 // "1981 was a cold year compared to 1980's record temperatures" would be okay
 // so some context awareness is appropriate (but it is almost always wrong)
 function puDecade(t) {
   var a = puCleave("0's", t);
   if (a == undefined) return puCons(puRaw(t), undefined);
 
   if (// date before? (only do it for 4 or 2 digit dates)
       (
       (a[0].length >= 4 && 
        puIsDigit(a[0].charAt(a[0].length - 1)) &&
        puIsDigit(a[0].charAt(a[0].length - 2)) &&
        puIsDigit(a[0].charAt(a[0].length - 3)) &&
       !puIsDigit(a[0].charAt(a[0].length - 4))) ||
 
       (a[0].length >= 2 && 
        puIsDigit(a[0].charAt(a[0].length - 1)) &&
       !puIsDigit(a[0].charAt(a[0].length - 2)))
        )
        &&

       // safe to correct?
       a[1].length > 0 && puDecadeOKChar(a[1].charAt(0))) {
 
     return puCons(puRaw(a[0]), 
                   puCons(puEdit("0's", "0s", puDECADE),
                          puDecade(a[1])));
   } else {
     /* no problem. */
     return puCons(puRaw(a[0] + "0's"), puDecade(a[1]));
   }
 };

 function puDecadeOKChar(c) {
   // should be the end of a word
   if (c == '\n' || c == ' ' || c == ',' || c == '.' ||
       c == '&' || c == '—' || c == '-' || c == '–' ||
       // text in tables?
       c == '|' || c == '\t' || c == '<' || c == ')' ||
       c == ';' || c == '!' || c == "'" || c == ':' ||
       c == '/' 
       ) return true;
   else return false;
 };

 // space before/around(parentheses )
 // closing parens are basically the same as commas below.
 function puParen(t) {
   var a = puCleave(")", t);
   if (a == undefined) return puCons(puRaw(t), undefined);
 
   // must be preceded by a word and followed by a word 
   var bef = puSplitWhiteEnd(a[0]);
   var aft = puSplitWhiteStart(a[1]);
 
   // alert('paren: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
     
   if (// needs correction?
       (bef[1].length > 0 || aft[0].length == 0) &&
       // safe to correct?
       aft[1].length > 0 && puRParenOKChar(aft[1].charAt(0)) &&
       bef[0].length > 0 && puRParenOKChar(bef[0].charAt(bef[0].length - 1))) {
 
     return puCons(puRaw(bef[0]), 
                   puCons(puEdit(bef[1] + ")" + aft[0], ") ", puPAREN),
                          puParen(aft[1])));
   } else {
     /* no problem. */
     return puCons(puRaw(a[0] + ")"), puParen(a[1]));
   }
 };

 // XXX perhaps should be okay-on-right and okay-on-left; this may be too conservative
 function puRParenOKChar(c) {
   if (c == ")" || c == "(" || c == '|' ||
       // otherwise we undo our linkspace fix ;)
       c == ']' ||
       // title markup
       c == '=' ||
       // sometimes people do &nbsp;
       c == '&' ||
       // quotes, obviously
       c == '"' || c == '”' || c == '’' || c == "'" ||
       // History of Russia (1900-1950)#World War II
       c == "#" ||
       // other stuff
       c == '\n' || c == ':' || c == ';' || c == '.' || c == '-' || c == '—' || c == ',' || 
       c == '}' || '{' || c == '<') return false;
   else return true;
 };

 function puComma(t) {
   return puCommaLike(',', puCOMMA, t);
 };

 function puSemicolon(t) {
   return puCommaLike(';', puSEMICOLON, t);
 };

 // TODO: very important to filter out URL hits, since comma appears in lots of news URLs
 function puCommaLike(ch, what, t) {
   var a = puCleave(ch, t);
   if (a == undefined) return puCons(puRaw(t), undefined);
 
   // must be preceded by a word and followed by a word 
   var bef = puSplitWhiteEnd(a[0]);
   var aft = puSplitWhiteStart(a[1]);
 
   // alert('comma: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
     
   if (// needs correction?
       (bef[1].length > 0 || aft[0].length == 0) &&
       // safe to correct?
       !puEndsHTTP(bef[0]) &&
       !puIsElement(bef[0]) &&
       aft[1].length > 0 && puCommaOKChar(aft[1].charAt(0)) &&
       bef[0].length > 0 && puCommaOKChar(bef[0].charAt(bef[0].length - 1))) {
     // alert('fix!');
     return puCons(puRaw(bef[0]), 
                   puCons(puEdit(bef[1] + ch + aft[0], ch + ' ', what),
                          puCommaLike(ch, what, aft[1])));
   } else {
     /* no problem. */
     return puCons(puRaw(a[0] + ch), puCommaLike(ch, what, a[1]));
   }
 };

 function puLinkSpace(t) {
   var a = puCleave(" ]]", t);
   if (a == undefined) return puCons(puRaw(t), undefined);
 
   // maybe multiple spaces...
   var bef = puSplitWhiteEnd(a[0]);
 
   // alert('linkspace: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
 
   // filter out the common idiom <nowiki>[[Category:United States| ]]</nowiki>
   if (a[0].length > 0 && a[0].charAt(a[0].length - 1) != '|') {
     return puCons(puRaw(bef[0]), 
                   puCons(puEdit(bef[1] + " ]]", "]]", puLINKSPACE),
                          puLinkSpace(a[1])));
   } else {
     return puCons(puRaw(a[0] + " ]]"), puLinkSpace(a[1]));
   }
 };

 /// XXX not hooked up -- did I finish implementing this?
 // between number and %, remove space.
 function puPercent(t) {
   var a = puCleave("%", t);
   if (a == undefined) return puCons(puRaw(t), undefined);
 
   // must be preceded by a word and followed by a word 
   var bef = puSplitWhiteEnd(a[0]);
   var aft = puSplitWhiteStart(a[1]);
 
   // alert('pct: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
     
   if (// needs correction?
       (bef[1].length > 0 || aft[0].length == 0) &&
       // safe to correct?
       aft[1].length > 0 && puPercentBeforeChar(aft[1].charAt(0)) &&
       bef[0].length > 0 && puPercentAfterChar(bef[0].charAt(bef[0].length - 1))) {
     // alert('fix!');
     return puCons(puRaw(bef[0]), 
                   puCons(puEdit(bef[1] + "%" + aft[0], "% ", puPERCENT),
                          puPercent(aft[1])));
   } else {
     /* no problem. */
     return puCons(puRaw(a[0] + "%"), puPercent(a[1]));
   }
 };

 function puCommaOKChar(c) {
   // definitely not inside numbers
   if ((c.charCodeAt(0) >= '0'.charCodeAt(0) && c.charCodeAt(0) <= '9'.charCodeAt(0)) ||
       // text in tables?
       c == '|' ||
       // quotes, obviously
       c == '"' || c == '”' || c == '’' || c == "'" ||
       // link w/ underscores instead of spaces
       c == '_' ||
       c == '\n' || c == '&' || c == ',' || 
       // ref tags
       c == '{' || c == '<') return false;
   else return true;
 };

 function puRefSpaceOKChar(c) {
   if (// text in tables?
       c == '|' ||
       // parenthetical
       c == ')' ||
       // or space already...
       c == ' ' ||
       // ending image: tags
       c == ']' ||
       // ending template text
       c == '}' ||
       // before em dashes (see MOS)
       c == '—' ||
       // ending quotes...
       c == '"' || c == '”' || c == '’' || c == "'" ||
       c == '\n' || c == '&' || c == ',' || 
       // ref tags
       c == '{' || c == '<') return false;
   else return true;
 };

 // for references, we want to find the ref tags, but
 // they can appear in several common forms:
 // <ref>...</ref>
 // <ref name="first">...</ref>
 // <ref name="reused" />
 // this function returns a three-element array consisting of
 // [the text before the first ref tag, the ref tag, the text following]
 // (or it returns undefined if there are no ref tags to be found)
 function puGetRef(t) {
   var m = '<ref';
   // but not this tag!
   var nm = '<references';
   for(var i = 0; i < t.length; i ++) {
     if (t.substr(i, m.length) == m &&
         t.substr(i, nm.length) != nm) {
       // now, decide what kind of ref
       // appearance this is. keep looking
       // at characters until we see 
       // > (bracketing)
       // or
       // /> (unitary)
       for(var j = i + m.length; j < t.length; j ++) {
	 if (t.charAt(j) == '/') {
           if (j < (t.length - 1) && t.charAt(j + 1) == '>') {
              var rt = t.substr(i, (j + 2) - i);
              var bef = t.substr(0, i);
              var aft = t.substr(j + 2, t.length - (j + 2));
              return new Array(bef, rt, aft);
           } else {
              // XXX report problem?
              return undefined;
           }
	 } else if (t.charAt(j) == '>') {
	   // found bracketing ref tag.
	   // so now eat until </ref> is
	   // encountered.
	   var rest = t.substr(j, t.length - j);
 
	   var a = puCleave('</ref>', rest);
	   if (a == undefined) {
	     // XXX warn: unclosed ref tag??
	     return undefined;
	   }
 
 	   var rt = t.substr(i, j - i) + a[0] + '</ref>';
 	   var bef = t.substr(0, i);
 	   var aft = a[1];
 	   // alert("REF. bef: [" + bef + "]\n" +
 	   //	 "rt: [" + rt + "]\n" +
 	   //	 "aft: [" + aft + "]\n");
 	   return new Array(bef, rt, aft);
	 }
       }
     }
   }
   // none found...
   return undefined;
 };

 // If we find a ref tag, we need to ensure the following:
 //  1. there should never be any space before the tag.
 //  2. the ref tag should appear after punctuation (except dashes)
 //       UNLESS the reference is to a specific term rather than
 //       to the sentence or comma/semicolon-separated phrase
 //       (we'll leave it up to the user to reject these false positives)
 //  3. there shouldn't be double punctuation before/after the ref
 //  4. there should be space after the ref
 //       UNLESS the reference is followed by another reference
 //       (or a dash, or legal punctuation as above)
 //
 // (this is according to the manual of style at [[wikipedia:footnotes]];
 //  and conforms to the Chicago Manual of Style)
 //
 // So, we grab any punctuation that follows the reference,
 // erase all space before the reference,
 // insert space after the ref if needed
 // and insert any trailing punctuation before the reference,
 // unless there is already punctuation there.
 function puRef(t) {
   var a = puGetRef(t);
   if (a == undefined) return puCons(puRaw(t), undefined);
   var bef = puSplitWhiteEnd(a[0]);
   var tag = a[1];
   var aft = puSplitWhiteStart(a[2]);
 
   // boolean flags
   // insist on two newlines since people frequently put refs on their own lines.
   var parend = aft[1].length > 1 && aft[1].charAt(0) == '\n' && aft[1].charAt(1) == '\n';
   var nopuncbefore = bef[0].length == 0 || !(puRefPuncChar(bef[0].charAt(bef[0].length - 1)));
   var needspuncbefore = nopuncbefore && bef[0].length > 0 && puRefNeedsPunc(bef[0].charAt(bef[0].length - 1));

   // the punctuation char or undefined if none
   var puncafter =  (aft[1].length > 0)?aft[1].charAt(0):undefined;
   if (puncafter != undefined && !puRefPuncChar(puncafter)) puncafter = undefined;
   if (puncafter != undefined) {
      aft[1] = aft[1].substr(1, aft[1].length - 1);
   }
 
   var needspaceafter = aft[1].length > 0 && puRefSpaceOKChar(aft[1].charAt(0));
 
   // DEBUG
   // var what = '';
   // if (nopuncbefore) what = what + " NOPUNCBEFORE.";
   // if (parend) what = what + " PAREND.";
   // if (puncafter != undefined) what = what + " puncafter: " + puncafter;
   // if (needspaceafter) what = what + " NEEDSPACEAFTER.";
   // alert(what);
 
   if (// whitespace before?
       bef[1].length > 0 ||
       // missing necessary whitespace after?
       (aft[0].length == 0 && needspaceafter) ||
       // punctuation after?
       (puncafter != undefined) ||
       // or there is no punctuation at all and this is
       // the end of the paragraph
       (parend && needspuncbefore)) {
 
         // There's something to fix.
         // the before part will be whatever's before, plus any additional punctuation,
         // but minus any whitespace.
         var befplus;
         if (parend  // implies no punctuation after ref
             && needspuncbefore) {
            // assume period at end of paragraph.
            // XXX note, this will put the period before only the last
            // reference in a series of references at the end of
            // a paragraph, sigh
            befplus = '.';
         } else if (nopuncbefore && puncafter != undefined) {
            befplus = puncafter;
         } else befplus = '';
 
         var aftoldplus = '';
         if (puncafter != undefined) aftoldplus = puncafter;
 
         // XXX: should elide contents of ref in display somehow.
         return puCons(puRaw(bef[0]),
                       puCons(puEditExt(// old: 
                                        bef[1] + tag + aft[0] + aftoldplus,
                                        // new:
                                        befplus + tag + (needspaceafter?' ':''),
                                        puREF,
                                        // display versions elide the ref itself:
                                        bef[1] + '__PUREF__' + aft[0] + aftoldplus,
                                        befplus + '__PUREF__' + (needspaceafter?' ':'')),
                              puRef(aft[1]) ));
   } else {
     // no change
     return puCons(puRaw(a[0] + a[1]), puRef(a[2]));
   }
 };

 function puRefPuncChar(c) {
   // eta-expansion necessary??
   if (c == '.' || c == ';' || c == ',' || c == '?' ||
       c == '!' || c == ':') return true;
   else return false;
 };

 function puRefNeedsPunc(c) {
   return (c.charCodeAt(0) >= 'a' && c.charCodeAt(0) <= 'z') ||
          (c.charCodeAt(0) >= 'A' && c.charCodeAt(0) <= 'Z') ||
          (c.charCodeAt(0) >= '0' && c.charCodeAt(0) <= '9') ||
          c == ']';
 };


 // ----------------------------------------------

 // install it..
 addOnloadHook(function() {
  // not on talk pages...
  if (document.title.indexOf("talk:") != -1) {
     return;
  }
  if (document.title.indexOf("Editing ") != -1) {
  addOnloadHook(addPunctuation);
  }
 });

 function addPunctuation() {
   // need to see later if user has done any editing...
   punctuationPageOriginalSummary = document.editform.wpSummary.value;
   addTab("javascript:doPunctuation()", "punctuation", "ca-punctuation", "Punctuation", "");
   akeytt();
 };

/* </nowiki> */