Jump to content

User:Proteins/articlestructure.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//<pre>
// Analyze the article's structure
// with kind respects to Dr. pda, whose excellent prosesizebytes.js script was the inspiration
//
// To use this script, add "importScript('User:Proteins/articlestructure.js');" to your monobook.js subpage 
// under your user page, as you can see at User:Proteins/monobook.js

function articleStructure() {
	var alert_string = "";
	var diagnostic_string = "";
	var read_entire_article = true;

	var show_lead_diagnostics = true;
	var show_section_diagnostics = false;
	var display_individual_words = false;

	var using_Internet_Explorer = false;

	var spaced_text = "";
	var untagged_text = "";
	var stripped_text = "";
	var unescaped_text = "";

	var anchors;
	var temp_anchor;
	var section_name = "";
	var temp_anchor_name = "";

	var num_anchors = 0;
	var anchor_index = 0;
	var anchor_level = 0;
	var prev_anchor_level = 0;
	var num_H2_anchors = 0;
	var H2_anchor_index = 0;

	var cutoff_anchor_index = 0;
	var cutoff_H2_anchor_index = 0;
	var cutoff_child_node_index = 0;
	var last_P_child_node_index = 0;
	var cutoff_element_node_index = 0;

	var num_sections = 0;
	var section_index = 0;

	var element_node;
	var num_element_nodes = 0;
	var element_node_index = 0;

	var temp_node_name = "";

	var parent_node;
	var grandparent_node;
	var greatgrandparent_node;

	var sibling_node;
	var next_sibling_node;

	var child_node;
	var child_nodes;
	var prev_child_node;
	var num_child_nodes = 0;
	var child_node_index = 0;
	var child_node_name = "";
	var num_prose_counted_nodes = 0;

	var grandchild_node;
	var grandchild_nodes;
	var num_grandchild_nodes = 0;
	var grandchild_node_index = 0;

	var path_names;
	var file_name = "";

	var num_characters = 0;
	var del_num_characters = 0;
	var temp_num_characters = 0;

	var temp_word = "";
	var num_words = 0;
	var word_count = 0;
	var word_index = 0;
	var nonempty_word_index = 0;
	var tentative_num_words = 0;

	var num_spaces = 0;
	var paragraph_count = 0;
	var list_item_count = 0;
	var prose_size_bytes = 0;
	var total_word_count = 0;
	var total_paragraph_count = 0;
	var total_list_item_count = 0;
	var total_prose_size_bytes = 0;
	var section_word_count = new Array();
	var section_paragraph_count = new Array();
	var section_list_item_count = new Array();
	var section_prose_size_bytes = new Array();

	var word_count_string = "";
	var paragraph_count_string = "";
	var list_item_count_string = "";
	var prose_size_bytes_string = "";

	var temp_paragraph;
	var text_paragraphs;
	var num_paragraphs = 0;
	var paragraph_index = 0;

	var temp_list_item;
	var text_list_items;
	var num_list_items = 0;
	var list_item_index = 0;

	var temp_image;
	var num_pixels = 0;
	var image_index = 0;
	var image_counter = 0;
	var num_raw_images = 0;
	var num_nonicon_images = 0;

	var num_anchors = 0;
	var num_raw_links = 0;
	var num_raw_tables = 0;
	var num_raw_references = 0;


// check for Internet Explorer browser 

	using_Internet_Explorer = false;
	if (navigator.userAgent.indexOf("MSIE") > -1) { 
		using_Internet_Explorer = true;
//		alert_string = "This script works correctly in every browser — except Internet Explorer.  Please be patient!"
//		window.alert(alert_string);	
	}

// Find the cutoff H2 anchor index, where we stop counting things
	alert_string = "";
	num_H2_anchors = 0;
	section_name = "lead section";
	prev_anchor_level = 1;  //begin at the H1 heading
	read_entire_article = true;
	anchors = document.anchors;
	num_anchors = anchors.length;
	for (anchor_index=1; anchor_index<num_anchors; anchor_index++) {
		temp_anchor = anchors[anchor_index];

		parent_node = temp_anchor.parentNode; 
		if (!parent_node) { continue; }

		sibling_node = parent_node.nextSibling;
		if (!sibling_node) { continue; }

// Check headings for jumps upwards in heading level
		anchor_level = 0;
		if (sibling_node.nodeName == "H1") {
			alert_string += "  WARNING: Illegal H1 heading in this section\n";
		} else if (sibling_node.nodeName == "H2") {
			anchor_level = 2;	
		} else if (sibling_node.nodeName == "H3") {
			anchor_level = 3;	
		} else if (sibling_node.nodeName == "H4") {
			anchor_level = 4;	
		} else if (sibling_node.nodeName == "H5") {
			anchor_level = 5;	
		}  else { 
			next_sibling_node = sibling_node.nextSibling;
			if (!next_sibling_node) { continue; }

// Check headings for jumps upwards in heading level
			if (next_sibling_node.nodeName == "H1") {
				alert_string += "  WARNING: Illegal H1 heading in this section\n";
			} else if (next_sibling_node.nodeName == "H2") {
				anchor_level = 2;	
			} else if (next_sibling_node.nodeName == "H3") {
				anchor_level = 3;	
			} else if (next_sibling_node.nodeName == "H4") {
				anchor_level = 4;	
			} else if (next_sibling_node.nodeName == "H5") {
				anchor_level = 5;	
			}
		} // closes assignment of the anchor level, if any 
 	
		if (((anchor_level - prev_anchor_level) > 1) && (prev_anchor_level != 0)) {
			if (num_H2_anchors == 0) {
				alert_string += "  WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in the lead\n";
			} else { 
				alert_string += "  WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in \"" + section_name.replace(/(_+)/ig, " ") + "\"\n";
			}
		}
		if (anchor_level > 0) { prev_anchor_level = anchor_level; }

//Check major section headings for closing sections
		if (anchor_level == 2) { 
			num_H2_anchors++;
			section_name = temp_anchor.name;
			temp_anchor_name = temp_anchor.name;

			alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + "\n";
//			alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + " " + temp_anchor.parentNode.nodeName + " " + sibling_node.nodeName + "\n";

			temp_anchor_name = temp_anchor_name.replace(/:$/ig,""); // eliminate colons at end
			temp_anchor_name = temp_anchor_name.replace(/s$/ig,""); // eliminate plurals at end
			temp_anchor_name = temp_anchor_name.replace(/See_also/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Related_topic/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Related_article/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Further_reading/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/External_link/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Footnote/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Note/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Reference/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Citation/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Source/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Link/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/s([_\s]+)and([_\s]+)/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/([_\s]+)and([_\s]+)/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/([_\s]+)/ig,"");
			if (temp_anchor_name == "") { break; }

//			diagnostic_string = "Section " + num_H2_anchors + " : " + temp_anchor_name + " L: " + temp_anchor_name.length;
//			window.alert(diagnostic_string);

		} // closes check for H2 anchor
	} // closes loop over the anchors
	cutoff_anchor_index = anchor_index;
	cutoff_H2_anchor_index = num_H2_anchors;
	if (cutoff_anchor_index < num_anchors) {
		read_entire_article = false;
		alert_string += "\nProse counting will stop before the \"" + temp_anchor.name.replace(/(_+)/ig, " ") + "\" section.\n";
	} else {
		read_entire_article = true;
		alert_string += "\nProse counting will cover the entire article.\n";
	}
	window.alert(alert_string);


// Count child and element nodes 
	alert_string = "";
	num_element_nodes = 0;
	child_nodes = document.getElementById("bodyContent").childNodes;
	num_child_nodes = child_nodes.length;
//	if (num_child_nodes > 40) { num_child_nodes = 40;} // truncate loop for testing

	for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) {
		child_node = child_nodes[child_node_index];
		if (child_node.nodeType != 1) {
//			alert_string += "Child node " + child_node_index + " : " + child_node.nodeName + "\n"; 
			continue; 
		} // examine only Element nodes
		num_element_nodes++;
//		alert_string += "Element node " + num_element_nodes + " : " + child_node.nodeName + "\n"; 
	} // closes loop counting the element nodes
//	window.alert(alert_string);

// Determine the corresponding childNode index cutoff
	alert_string = "";
	if (read_entire_article == true) {
		cutoff_child_node_index = num_child_nodes;
		cutoff_element_node_index = num_element_nodes;
	} else {
		H2_anchor_index = 0;
		element_node_index = 0;
		last_P_child_node_index = -1;
		last_P_element_node_index = -1;
		for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) {
			child_node = child_nodes[child_node_index];
			if (child_node.nodeType != 1) { continue; } // examine only Element nodes
			element_node_index++;

			if (child_node.nodeName == "P") {
				last_P_child_node_index = child_node_index;
				last_P_element_node_index = num_element_nodes;
			} else if (child_node.nodeName == "H2") {
				H2_anchor_index++;
				if (H2_anchor_index == cutoff_H2_anchor_index) { 
					cutoff_child_node_index = last_P_child_node_index;
					cutoff_element_node_index = last_P_element_node_index;
					break; 
				}
			}
//			alert_string += "Section " + H2_anchor_index + ", Element node " + num_element_nodes + " : " + child_node.nodeName + " " + child_node.childNodes.length + "\n";
//			if (num_element_nodes > 45) { break; } // for debugging
		} // closes loop over the childNodes of the Document
		if (last_P_child_node_index < 0) { // if no cutoff was discovered; should never happen
			cutoff_child_node_index = num_child_nodes;
			cutoff_element_node_index = num_element_nodes;
		}
	} // closes check whether to read entire article
	alert_string = "\nThe child_node_index and element_node_index cutoffs are " + cutoff_child_node_index + " and " + cutoff_element_node_index + ", respectively.\n";
//	window.alert(alert_string);


// Count the words, paragraphs and prose size bytes by section
	word_count = 0;
	paragraph_count = 0;
	list_item_count = 0;
	prose_size_bytes = 0;
	num_prose_counted_nodes = 0;

	H2_anchor_index = 0;
	for (child_node_index=0; child_node_index < cutoff_child_node_index; child_node_index++) {
		child_node = child_nodes[child_node_index];
		if (child_node.nodeType != 1) { continue; } // examine only Element nodes
		element_node_index++;

		if (child_node.nodeName == "H2") {
			section_word_count.push(word_count);
			section_paragraph_count.push(paragraph_count);
			section_list_item_count.push(list_item_count);
			section_prose_size_bytes.push(prose_size_bytes);

			H2_anchor_index++;
			word_count = 0;
			paragraph_count = 0;
			list_item_count = 0;
			prose_size_bytes = 0;
		}

// if the child node meets the criteria, add to the prose size, word and paragraph counts
		if ((child_node.nodeName == "P") || (child_node.nodeName == "PRE")) {

			untagged_text = child_node.innerHTML;
			untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
			untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
			untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
			untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
			untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
			untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
			untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

			spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
			spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
			spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

//			spaced_text = filterStringForProseSizeCounting(untagged_text);
			words = spaced_text.split(' ');		
			tentative_num_words = words.length;
			if (tentative_num_words > 0) { // verify that the paragraph contributes text
				num_words = 0;
				num_characters = 0;
				for (word_index=0; word_index<tentative_num_words; word_index++) {
					temp_word = words[word_index];
					del_num_characters = temp_word.length;
					if (del_num_characters > 0) {
						num_words++;
						num_characters += del_num_characters; 
					}
				}
				if (num_words > 0) {
					paragraph_count++;
					num_prose_counted_nodes++;
					word_count += num_words;
					prose_size_bytes += num_characters;
					num_spaces = num_words - 1;
					prose_size_bytes += num_spaces; // add spaces to character count
					child_node.style.cssText = "background-color:yellow";
					
					// Code for testing output
					if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
						continue;
					}
					diagnostic_string = "";
					nonempty_word_index = 0;
					temp_num_characters = 0; 
					for (word_index=0; word_index<tentative_num_words; word_index++) {
						if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
							window.alert(diagnostic_string);
							diagnostic_string = "Continued from previous screen:\n\n";
						}
						temp_word = words[word_index];
						del_num_characters = temp_word.length;
						if (del_num_characters > 0) {
							nonempty_word_index++;
							temp_num_characters += del_num_characters;  
							diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "  " + temp_num_characters + "\n";
						}
					}
					temp_num_characters += num_spaces;
					diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
					if (display_individual_words) {
						diagnostic_string += "\nEND of paragraph " + paragraph_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
						window.alert(diagnostic_string); 
					}
				} // closes check for non-empty paragraph
			} // tentative check for words
		} else if ((child_node.nodeName == "UL") || (child_node.nodeName == "OL")) { // unordered and ordered lists
			grandchild_nodes = child_node.childNodes; // not all LI elements because of possible nesting
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if (grandchild_node.nodeName == "LI") {
					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							list_item_count++;
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0; 
							temp_num_characters = 0; 
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a list item (LI) node
			} // closes loop over grandchild nodes of an ordered (OL) or unordered (UL) list
		} else if (child_node.nodeName == "DL") { // discursive lists
			grandchild_nodes = child_node.childNodes;			
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if ((grandchild_node.nodeName == "DT") || (grandchild_node.nodeName == "DD")) {
					// Exceptions that shouldn't be counted
					if (grandchild_node.childNodes.length > 0) {
						temp_node_name = grandchild_node.childNodes[0].nodeName;
						if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; }
					}
					if (grandchild_node.childNodes.length > 1) {
						temp_node_name = grandchild_node.childNodes[1].nodeName;
						if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; }
					}

					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							list_item_count++;
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0;
							temp_num_characters = 0;  
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of discursive list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a discursive list item (DT or DD) node
			} // closes loop over grandchild nodes of a discursive list DL
		} else if (child_node.nodeName == "BLOCKQUOTE") {
			grandchild_nodes = child_node.getElementsByTagName("P");			
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if (grandchild_node.nodeName == "P") {
					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							// don't count blockquotes, for now
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0;
							temp_num_characters = 0;  
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of BLOCKQUOTE in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a paragraph (P) node in a BLOCKQUOTE
			} // closes loop over grandchild nodes in a BLOCKQUOTE
		} else if (child_node.nodeName == "TABLE") {
			if (child_node.className != "cquote") { continue; } // count only tables that are cquotes
			grandchild_nodes = child_node.getElementsByTagName("TD");			
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if (grandchild_node.nodeName == "TD") {
					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							// don't count cquotes, for now
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0; 
							temp_num_characters = 0; 
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of CQUOTE paragraph in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a paragraph (P) node in a CQUOTE
			} // closes loop over grandchild nodes in a CQUOTE
		} else if (child_node.nodeName == "DIV") { // Poems
			if (child_node.className != "poem") { continue; } // allow only poem DIV's
			grandchild_nodes = child_node.getElementsByTagName("P");			
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if (grandchild_node.nodeName == "P") {
					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							// don't count blockquotes, for now
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0; 
							temp_num_characters = 0; 
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of <poem> in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a paragraph (P) node in a poem
			} // closes loop over grandchild nodes in a poem
		} // closes check for appropriate elements
	} // closes loop over the child nodes
	section_word_count.push(word_count);
	section_paragraph_count.push(paragraph_count);
	section_list_item_count.push(list_item_count);
	section_prose_size_bytes.push(prose_size_bytes);


// Output the various counts

	word_count_string = " word";
	paragraph_count_string = " paragraph";
	list_item_count_string = " list item";
	prose_size_bytes_string = " byte";
	if (section_word_count[0] != 1) { word_count_string += "s";}
	if (section_paragraph_count[0] != 1) { paragraph_count_string += "s";}
	if (section_list_item_count[0] != 1) { list_item_count_string += "s";}
	if (section_prose_size_bytes[0] != 1) { prose_size_bytes_string += "s";}

	alert_string = "Lead section: " + section_paragraph_count[0] + paragraph_count_string + ", " + section_list_item_count[0] + list_item_count_string + ", " + section_word_count[0] + word_count_string + ", " + section_prose_size_bytes[0] + prose_size_bytes_string + "\n\n";

	total_word_count = section_word_count[0];
	total_paragraph_count = section_paragraph_count[0];
	total_list_item_count = section_list_item_count[0];
	total_prose_size_bytes = section_prose_size_bytes[0];

	num_sections = section_word_count.length;
	for (section_index=1; section_index<num_sections; section_index++) {
		total_word_count += section_word_count[section_index];
		total_paragraph_count += section_paragraph_count[section_index];
		total_list_item_count += section_list_item_count[section_index];
		total_prose_size_bytes += section_prose_size_bytes[section_index];
	
		word_count_string = " word";
		paragraph_count_string = " paragraph";
		list_item_count_string = " list item";
		prose_size_bytes_string = " byte";
		if (section_word_count[section_index] != 1) { word_count_string += "s";}
		if (section_paragraph_count[section_index] != 1) { paragraph_count_string += "s";}
		if (section_list_item_count[section_index] != 1) { list_item_count_string += "s";}
		if (section_prose_size_bytes[section_index] != 1) { prose_size_bytes_string += "s";}

		alert_string += "Section " + section_index + " : " + section_paragraph_count[section_index] + paragraph_count_string + ", " + section_list_item_count[section_index] + list_item_count_string + ", " + section_word_count[section_index] + word_count_string + ", " + section_prose_size_bytes[section_index] + prose_size_bytes_string + "\n";
	}
	if (num_sections>1) {alert_string += "\n";} // Make space for the totals

	word_count_string = " word";
	paragraph_count_string = " paragraph";
	list_item_count_string = " list item";
	prose_size_bytes_string = " byte";
	if (total_word_count != 1) { word_count_string += "s";}
	if (total_paragraph_count != 1) { paragraph_count_string += "s";}
	if (total_list_item_count != 1) { list_item_count_string += "s";}
	if (total_prose_size_bytes != 1) { prose_size_bytes_string += "s";}
	alert_string += "Totals: " + total_paragraph_count + paragraph_count_string + ", " + total_list_item_count + list_item_count_string + ", " + total_word_count + word_count_string + ", " + total_prose_size_bytes + prose_size_bytes_string + "\n";
	window.alert(alert_string);


// Count the article images
	num_nonicon_images = 0;
	num_raw_images = document.images.length;
	alert_string = "This document has " + num_raw_images + " images.\n";
	for (image_index=0; image_index<num_raw_images; image_index++) {
		temp_image = document.images[image_index];
		parent_node = temp_image.parentNode;
		grandparent_node = parent_node.parentNode;
		greatgrandparent_node = grandparent_node.parentNode;

		num_pixels = temp_image.width * temp_image.height;
		if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; }
		if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; }
		if (num_pixels > 5000) { num_nonicon_images++; }
	}
	if (num_nonicon_images == 1) {
		alert_string = "This document has 1 image with more than 5000 pixels.\n\n";
	} else {
		alert_string = "This document has " + num_nonicon_images + " images with more than 5000 pixels.\n\n";
	}
	image_counter = 0;
	for (image_index=0; image_index<num_raw_images; image_index++) {
		temp_image = document.images[image_index];
		parent_node = temp_image.parentNode;
		grandparent_node = parent_node.parentNode;
		greatgrandparent_node = grandparent_node.parentNode;

		num_pixels = temp_image.width * temp_image.height;
		if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; }
		if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; }
		if (num_pixels < 5001) { continue; }

		image_counter++;
		alert_string += image_counter + " " + temp_image.width + "x" + temp_image.height + " " + num_pixels + " ";
		path_names = temp_image.src.split("/");
		file_name = path_names.pop();
		file_name = file_name.replace(/^(\d+)px-/, "");
		alert_string += file_name + "\n";
	}
	window.alert(alert_string);
	return;


// Count the article tables and check for infoboxes and navigation templates 
	num_raw_tables = document.getElementsByTagName("table").length;

	// Check for className = "infobox vcard" or "navbox-group" 

	alert_string = "This document has " + num_raw_tables + " tables.\n";
	window.alert(alert_string);

// Count the article references
	num_raw_references = document.getElementsByTagName("li").length;

// Count the article interwikis
	num_raw_interwikis = document.getElementsByTagName("li").length;

// Count the article categories
	num_raw_categories = document.getElementsByTagName("table").length;

// Count the article anchors; for each anchor...
	alert_string = "This document has " + document.anchors.length + " anchors:\n";
	for (anchor_index=0; anchor_index<document.anchors.length; anchor_index++) {
		temp_anchor = document.anchors[anchor_index];
		alert_string += "Name " + anchor_index + ": " + temp_anchor.name + "\n";
	}
	window.alert(alert_string);

} // closes function articleStructure() 

addOnloadHook(function () {
            mw.util.addPortletLink('p-cactions', 'javascript:articleStructure()', 'structure', 'ca-structure', 'Structure of the article', 'g', '');
});

//</pre>