User:MastCell/dermimages.py
Appearance
# File: Dermimages.py # By MastCell # Released for any and all reuse and modification # Use at your own risk. # ======================== # This script does the following: # 1. Load all articles linked to the Dermatology Task Force, # using Category:Dermatology task force articles. # 2. Check each page for images, removing those which are part of # common templates # 3. Output the results in a sortable wikitable which can be # cut-and-pasted onto Wikipedia. # # The goal is to assess how many dermatology-related articles currently # lack images, and to assess the overall prevalence of images across # derm-related articles. # # The script can be run from the command line. It will create a file # called "dermimages_output.txt" in the same directory where the script # itself resides. Note that if such a file already exists, it will be # overwritten. The output format is meant to be cut-and-pasted into # Wikipedia as a wikitable. # ========================= # MWclient module for Wikimedia API calls import mwclient # Global set of image names to exclude # (include images from templates, featured article stars, etc # which should not be counted as "content" images) # Modify as needed. global_exclude_list =\ set(["Normal Epidermis and Dermis with Intradermal Nevus 10x.JPG",\ "LinkFA-star.png",\ "Featured article star.svg",\ "Symbol support vote.svg",\ "Rod of Asclepius2.svg",\ "Mergefrom.svg",\ "Gray944.png",\ "Question book-new.svg",\ "Ambox contradict.svg",\ "Mitotic spindle color micrograph.gif",\ "Ambox content.png",\ "Text document with red question mark.svg",\ "Edit-clear.svg",\ "UK-Medical-Bio-Stub.svg",\ "Flag of Germany.svg",\ "Commons-logo.svg",\ "Wiki letter w.svg",\ "Chromosome.svg",\ "DNA stub.png",\ "Merge-arrow.svg"]) # Global dictionary to count how many pages have zero images, one image, two images, ... global_imagenums = {"Total pages": 0,\ "Pages with zero images": 0,\ "Pages with one image": 0,\ "Pages with two images": 0,\ "Pages with three images": 0,\ "Pages with four or more images": 0} # Open the site and collect pages from category # (Note that these will generally be article talk pages, since that's # where the Derm task force template is typically placed) wpHandle = mwclient.Site('en.wikipedia.org') dermTalkPages = wpHandle.Pages['Category:Dermatology task force articles'] # Main program loop: load and process each page def main_program(): setUpTable() for page in dermTalkPages: # Make sure we're dealing with the article page, rather than talk page page = wpHandle.Pages[page.page_title] # Increment the total page count global_imagenums["Total pages"] += 1 # Load images and process them imageHandle = page.images imageList = imageHandle() processPage(page, imageList) closeTable() outputDictionary() # Page processing function # Note to self: need to encode the page and image names. Otherwise # the script will eventually choke with a UnicodeEncodingError. # Hopefully the XMLchar replacements will be properly rendered by # Wikimedia. def processPage(page, imageList): outputFile.write("|-\n") outputFile.write("| [[") outputFile.write(page.name.encode("iso-8859-15", "xmlcharrefreplace")) outputFile.write("]] ||") imageCount = 0 for image in imageList: if (image.page_title not in global_exclude_list): if (imageCount > 0): outputFile.write("<br>\n") imageCount += 1 outputFile.write('[[:' + image.name.encode("iso-8859-15", "xmlcharrefreplace")) outputFile.write(']]') outputFile.write("\n" + ' || ' + str(imageCount) + "\n") incrementPageCounter(imageCount) # Function to update the dictionary of pages # I'm sure there's a more elegant way to do this, but... def incrementPageCounter(numImages): if (numImages is 0): global_imagenums["Pages with zero images"] += 1 elif (numImages is 1): global_imagenums["Pages with one image"] += 1 elif (numImages is 2): global_imagenums["Pages with two images"] += 1 elif (numImages is 3): global_imagenums["Pages with three images"] += 1 else: global_imagenums["Pages with four or more images"] += 1 # Output the table header boilerplate def setUpTable(): outputFile.write('{| class="wikitable sortable" border="1"') outputFile.write("\n") outputFile.write('! Page !! class="unsortable" | Images !! Number of images') outputFile.write("\n") # Output the table footer boilerplate def closeTable(): outputFile.write('|}') # Output the dictionary counts def outputDictionary(): outputFile.write("\n== Totals by number of images ==\n") for key, value in global_imagenums.iteritems(): outputFile.write(key + ": ") outputFile.write(str(value)) outputFile.write("\n") ######################################## # Main program # ------------ # Opens a handle to the output file, then runs the main loop ######################################## with open('dermimages_output.txt', 'w') as outputFile: main_program()