Jump to content

User:Interiot2/Tool2/code.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// see http://paperlined.org/apps/wikipedia/Tool2/ for instructions on adding this to your monobook.js

// To run this tool on other servers:
//	1. copy this script to the target server (this is required because of javascript cross-site security restrictions)

//	2. update the following URL
//		for example: "User:Interiot/Tool2/code.js"
var tool2_url = "User:Interiot2/Tool2/code.js";

//	3. update this namespace list, extracted from something like http://en.wikipedia.org/wiki/Special:Export//
//			These *should not* have colons after them.
var namespaces = [
"Talk",
"User",
"User talk",
"Wikiquote",
"Wikiquote talk",
"Image",
"Image talk",
"MediaWiki",
"MediaWiki talk",
"Template",
"Template talk",
"Help",
"Help talk",
"Category",
"Category talk",
		// 3b. these two project project entries are not added by Special:Export, and might or might not need to be updated
"Wikipedia",
"Wikipedia talk"
];

//	4. update this date-parser to match the format and language of your specific wiki.  Feel free to contact Interiot regarding this, if you can't find another
//		copy of this script that uses the same language.
// input: a text string from Special:Contributions.    output: a javascript Date object
// documentation:  http://www.quirksmode.org/js/introdate.html#parse, http://www.elated.com/tutorials/programming/javascript/dates/
function date_parse(text) {
	var matches = text.match(/^([0-9:]+), +([0-9]+) +([a-z]+) +([0-9]+)$/i);
	if (!matches) {
		//dump_text("XXX");			// for debugging
		return matches;
	}

	parseme = matches[3] + ", " + matches[2] + " "  + matches[4] + " " + matches[1] + ":00";

	//dump_text(parseme);				// for debugging

	var dt = new Date();
	dt.setTime( Date.parse(parseme));

	//dump_text(dt.toLocaleString());		// for debugging

	return dt;
}

// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ end of server-specific configuration ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^



// TODO:
//	- the current document.location method doesn't work when the page is accessed sans-mod_rewrite
//	- test with non-ASCII characters
//		- non-ascii usernames
//		- ??



var prefix = "";
var params = parse_params();

var path_len = document.location.pathname.length;
// trigger once we view the right page
if (document.location.pathname.substring(path_len - tool2_url.length, path_len) == tool2_url) {
	// get the prefix (needs to be fixed to work sans-mod_rewrite
	prefix = document.location.protocol + "//" + document.location.host + "/"
		+ document.location.pathname.substring(1, path_len - tool2_url.length);

	writeCSS();


	addOnloadFunction(function() {
	    // blank the inner contents of the page
	    var bodyContent = document.getElementById("bodyContent");
	    while (bodyContent.childNodes.length > 0) bodyContent.removeChild(bodyContent.lastChild);

	    if (document.location.search.length == 0) {
	      generate_input_form(bodyContent);
	    } else {
	      generate_main_report(bodyContent);
	    }
	});
}

function generate_input_form(bodyContent) {
  bodyContent.innerHTML =
            "<form><table class='IT_form'>" +
            "             <tr><td>Username <td><input maxlength=128 name=username value='' id=username title='username'>" +
            "             <tr><td>         <td><input type=submit value='Submit'>" +
            "</table></form>";

  var form = bodyContent.getElementsByTagName("form")[0];
  form.method = "get";
  form.action = document.location;

  document.getElementById("username").focus();
}

function generate_main_report() {
  fetch_data(params["username"], "", output_main_report, 0, []);
}


	function add_stats_row(left_col, right_col) {
		var row = document.createElement("tr");
		var left = document.createElement("td");
		var right = document.createElement("td");
	
		document.getElementById("basic_stats").appendChild(row);
		row.appendChild(left);
		row.appendChild(right);
		//left.innerHTML = left_col;
		left.appendChild( document.createTextNode(left_col) );
		right.appendChild( document.createTextNode(right_col) );
		return row;
	}

function output_main_report(history) {
	// -- generate summary statistics
	var unique_articles = new Array();
	var namespace_numedits = new Array();
	for (var i=0; i<namespaces.length; i++) {
		namespace_numedits[ namespaces[i] ] = 0;
	}
	namespace_numedits[""] = 0;
	for (var i=0; i<history.length; i++) {
		var h = history[i];
		unique_articles[  h["title"] ]++;
		namespace_numedits[  h["namespace"] ]++;
	}
	var unique_articles_keys = keys(unique_articles);

	// -- output report
	var table = document.createElement("table");
	table.id = "basic_stats";
	document.getElementById("bodyContent").appendChild(table);

	add_stats_row("Username", params["username"]);
	add_stats_row("Total edits", history.length);
	add_stats_row("Distinct pages edited", unique_articles_keys.length);
	add_stats_row("Average edits/page", new Number(history.length / unique_articles_keys.length).toFixed(3));
	add_stats_row("First edit", history[ history.length-1 ]["date_text"] );

	// add a blank row
	add_stats_row("", "").childNodes[0].style.height = "1em";

	add_stats_row("(main)", namespace_numedits[""]);
	for (var i=0; i<namespaces.length; i++) {
		var nmspc = namespaces[i];
		if (namespace_numedits[nmspc]) {
			add_stats_row(nmspc, namespace_numedits[nmspc]);
		}
	}
}



// ===================================== HTML-scraping backend =========================================

function add_loading_notice() {
	if (document.getElementById("loading_notice"))
		return;
	var loading = document.createElement("div");
	loading.id = "loading_notice";
	loading.innerHTML = "<br><br>Retrieving data<blink>...</blink>";
	document.getElementById("bodyContent").appendChild(loading);
}
function remove_loading_notice() {
	var loading = document.getElementById("loading_notice");
	if (!loading) return;
	loading.parentNode.removeChild(loading);
}

var offset_regexp = /href="[^"]+:Contributions[^"]+offset=(\d+)/gi;
function fetch_data(username, end_date, handler, offset, page_list) {
	add_loading_notice();
	var url = prefix + "Special:Contributions/" + username + "?offset=" + offset + "&limit=5000";
	loadXMLDoc(url, 
		function (request) {
			var next_offset = 0;
			if (request.readyState != 4)   return;
			if (request.status == 200) {
				page_list.push(request.responseText);
				//dump_text(request.responseText);

				// see if there's another pageful to get
				var matches = map( function(p){
						return p.match( /(\d+)$/ )[0];
					}, request.responseText.match( offset_regexp ) );
				for (var i=0; i<matches.length; i++) {
					var v = matches[i] * 1;
					if (v != 0 && (offset == 0 || v < offset)) {
						next_offset = v;
						break;
					}
				}
			}

			//next_offset = 0;			// for testing only, retrieve just the first page of results

			if (next_offset == 0) {
				parse_data(page_list, handler);
			} else {
				// tail recurse
				fetch_data(username, end_date, handler, next_offset, page_list);
			}
		});
}


// input: a list of strings, each string containing the HTML from a single page
// output: a list, where each individual entry is a specific edit from history
function parse_data(page_list, handler) {
	//var total_len = 0;
	//for (var i=0; i<page_list.length; i++) total_len += page_list[i].length;
	//alert("parsing " + page_list.length + " pages comprising " + total_len + " total bytes");

	var last_history_ent = [];
	last_history_ent["title"] = "";
	last_history_ent["oldid"] = "";

	var edit_history = new Array();
	for (var pagecnt=0; pagecnt<page_list.length; pagecnt++) {
		var matches = page_list[pagecnt].match( /^<li>[^(]+\(<a href="[^"]+action=history.*/gim );
		//dump_lines(matches);
		for (var matchcnt=0; matchcnt<matches.length; matchcnt++) {
			var history_text = matches[matchcnt];

			var history_entry = new Array();
			history_entry["date_text"] = history_text.match( /^<li>([^(<]+)/i )[1]
					.replace( / +$/, "");
			history_entry["date"] = date_parse( history_entry["date_text"] );
			history_entry["title"] = history_text.match( /title="([^"]+)"/i )[1]
					.replace( /&quot;/g, "\"")
					.replace( /&amp;/g, "&");
			var find_comment = history_text.replace(/<span class="autocomment">.*?<\/span> ?/, "");
			history_entry["comment"] = ifmatch(find_comment.match( /<span class='comment'>(.*?)<\/span>/ ))
					.replace(/^\((.*)\)$/, "$1");
			history_entry["minor"] = /<span class="minor"/.test(history_text);
			history_entry["oldid"] = ifmatch(history_text.match(/oldid=([0-9]+)/i));

			history_entry["namespace"] = "";
			for (var nmspc_ctr=0; nmspc_ctr<namespaces.length; nmspc_ctr++) {
				var nmspc = namespaces[nmspc_ctr] + ":";
				if (history_entry["title"].substring(0, nmspc.length) == nmspc) {
					history_entry["namespace"] = namespaces[nmspc_ctr];
					break;
				}
			}

			//dump_object(history_entry);

			if (history_entry["title"] != last_history_ent["title"] || history_entry["oldid"] != last_history_ent["oldid"])
				edit_history.push(history_entry);
			last_history_ent = history_entry;
		}
	}

	remove_loading_notice();

	handler(edit_history);
}




// ===================================== test/debug functions =========================================

function dump_text(text) {
  //alert("dump_text, with text of size " + text.length);

  var pre = document.createElement("pre");

  var div = document.createElement("div");
  div.style.width = "60em";
  div.style.maxHeight = "40em";
  div.style.overflow = "auto";

  pre.appendChild(document.createTextNode(text));
  div.appendChild(pre);
  document.getElementById("bodyContent").appendChild(div);
}

function dump_lines(ary) {
  dump_text("--> " + ary.join("\n--> "));
}

function dump_object(obj) {
	var toString = "";
	for (var prop in obj) {
		toString += prop + ": " + obj[prop] + "\n";
	}
	dump_text(toString);
}


// ===================================== utility functions =========================================

function addOnloadFunction(f) {
  if (window.addEventListener) window.addEventListener("load",f,false);
  else if (window.attachEvent) window.attachEvent("onload",f);
  else {
    var oldOnload='_old_onload_'+addOnloadFunction.uid;
    addOnloadFunction[oldOnload] = window.onload ? window.onload : function () {};
    window.onload = function() { addOnloadFunction[oldOnload]();  f(); }
    ++addOnloadFunction.uid;
  }
}


function parse_params() {
  var pairs = document.location.search.substring(1).split("&");
  var ret = [];
  for (var i=0; i < pairs.length; i++) {
    var values = pairs[i].split("=");
    ret[values[0]] = unescape(values[1]);
  }
  return ret; 
}


function loadXMLDoc(url, handler)
{
    // branch for native XMLHttpRequest object
    if (window.XMLHttpRequest) {
        req = new XMLHttpRequest();
	req.onreadystatechange = function () {handler(req)};
        req.open("GET", url, true);
        req.send(null);
    // branch for IE/Windows ActiveX version
    } else if (window.ActiveXObject) {
        req = new ActiveXObject("Microsoft.XMLHTTP");
        if (req) {
            req.onreadystatechange = function () {handler(req)};
            req.open("GET", url, true);
            req.send();
        }
    }
}


// see http://search.cpan.org/dist/perl/pod/perlfunc.pod#map
function map (handler, list) {
  var ret = new Array();
  for (var i=0; i<list.length; i++) {
    ret[i] = handler( list[i] );
    // ret.push( handler( list[i] ) );
  }
  return ret;
}

// see http://search.cpan.org/dist/perl/pod/perlfunc.pod#keys
function keys (obj) {
	var ret = new Array();
	for (var key in obj) {
		ret.push(key);
	}
	return ret;
}


function ifmatch(ary) {
	if (ary && ary.length >= 2) {
		return ary[1];
	} else {
		return "";
	}
}


// ===================================== other =========================================

function writeCSS () {
	document.write(
		"<style>" +
		"  table.IT_form td {background-color:purple} " +
		"</style>"
	);
}