User:Broc/update table FEB24.py
Appearance
import requests
import re
import time
from bs4 import BeautifulSoup
from datetime import datetime
import pywikibot
def download_from_wikipedia(page_title):
site = pywikibot.Site('en', 'wikipedia') # Change 'en' to the language code of the Wikipedia you're working with
page = pywikibot.Page(site, page_title)
content = page.text
return content
def upload_to_wikipedia(page_title, modified_text):
site = pywikibot.Site('en', 'wikipedia') # Change 'en' to the language code of the Wikipedia you're working with
page = pywikibot.Page(site, page_title)
# Edit the page with the modified text
page.text = modified_text
page.save("Updated leaderboard using pywikibot")
def get_n1_value(username, session):
# Construct the URL for the sigma tool
url = f'https://sigma.toolforge.org/summary.py?name={username}&search=FEB24&max=500&server=enwiki&ns=Main&enddate=20240201&startdate='
# Send a GET request to the URL using the session
response = session.get(url)
# Check if the request was successful (status code 200)
if response.status_code == 200:
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
# Find all unordered lists on the page
ul_elements = soup.find_all('ul')
# Check if there is a second <ul>
if len(ul_elements) >= 2:
second_ul_element = ul_elements[1]
# Find all list items in the second <ul>
li_elements = second_ul_element.find_all('li')
# List to store unique PAGENAMEs
unique_pagename_list = []
# Iterate through list items
for li_element in li_elements:
# Find all <a> elements within the <li>
a_elements = li_element.find_all('a')
# Check if there are at least 4 <a> elements
if len(a_elements) >= 4:
# Get the URL in the 4th <a> element
pagename_url = a_elements[3].get('href', '')
# Extract PAGENAME from the URL (you may need to adjust this based on the actual URL structure)
pagename_match = re.search(r'/wiki/([^?&#]+)', pagename_url)
if pagename_match:
pagename = pagename_match.group(1)
# Find the <i> tag following the last <a> tag
i_tag = a_elements[-1].find_next('i')
# Check if "feb24" is contained in the <i> tag and "feb24review" is not
if i_tag and 'feb24' in i_tag.get_text().lower() and 'feb24review' not in i_tag.get_text().lower():
unique_pagename_list.append(pagename)
# Print the list of PAGENAMEs per user
#print(f"User: {username}, PAGENAMEs: {unique_pagename_list}")
# Count the number of unique PAGENAMEs
n1_value = len(set(unique_pagename_list))
return n1_value, unique_pagename_list
# If the request was not successful or no second <ul> was found, return None
return None, None
def parse_table(text, review_text):
# Create a session for making requests
session = requests.Session()
# Keep track of processed usernames to avoid duplicate processing
processed_usernames = set()
# Find the starting index of the table
start_index = text.find('{| class="wikitable sortable"')
# Check if the table exists in the text
if start_index != -1:
# Find the ending index of the table
end_index = text.find('|}', start_index)
# Extract the table content
table_content = text[start_index:end_index]
# Split the table content into lines
lines = table_content.split('\n')
# Iterate through lines
for i, line in enumerate(lines):
if '![[User:' in line:
# Extract username from the line
username = re.search(r'\[\[User:([^]]+)', line).group(1)
# Check if the username has already been processed
if username not in processed_usernames:
# Get N1 value using the external function
n1_value, _ = get_n1_value(username, session)
n2_value = count_reviews_in_section(review_text, username)
if n1_value is not None:
# Two lines after the username
n1_line_index = i + 2
# One more line after N1
n2_line_index = i + 3
if n1_line_index < len(lines):
# Update the N1 value in the line
lines[n1_line_index] = f"| {n1_value}"
lines[n2_line_index] = f"| {n2_value/2}".rstrip('0').rstrip('.')
# Debug prints
print(f"User: {username}")
print(f"New N1 line: {lines[n1_line_index]}")
print(f"New N2 line: {lines[n2_line_index]}")
# Mark the username as processed
processed_usernames.add(username)
# Introduce a delay between calls (adjust as needed)
time.sleep(0.1) # 0.1-second delay
# Join the lines back together
modified_table = '\n'.join(lines)
# Replace the original table with the modified one
text = text[:start_index] + modified_table + text[end_index:]
# Define the regex pattern to match the date string
date_pattern = re.compile(r'Last updated ([A-Za-z]+ \d{1,2}, \d{1,2}:\d{2} UTC\.)')
# Find the date string in the content
match = date_pattern.search(text)
print(match)
if match:
# Extract the matched date string
old_date_string = match.group(0)
# Get the current timestamp in the same format
current_timestamp = datetime.utcnow().strftime('Last updated %B %d, %H:%M UTC.')
print(current_timestamp)
# Replace the old date string with the current timestamp
text = text.replace(old_date_string, current_timestamp)
# Save the modified content to a new file
with open('updated_table.txt', 'w') as new_file:
new_file.write(text)
def count_reviews_in_section(file_content, section_name):
lines = file_content.split('\n')
in_target_section = False
item_count = 0
for line in lines:
section_match = re.match(r'^==\s*([^=]+)\s*==$', line)
if section_match:
current_section = section_match.group(1).strip()
in_target_section = (current_section == section_name)
elif in_target_section and line.startswith('#') and not line.startswith('#:'):
item_count += 1
return item_count
if __name__ == "__main__":
wikipedia_page_title = "User:BaranBOT/FEB24DriveLeaderboard" # Replace with the title of the Wikipedia page you want to edit
content = download_from_wikipedia(wikipedia_page_title)
reviews_page = "Wikipedia:WikiProject_Unreferenced_articles/Backlog_drives/February 2024/Reviews"
reviews_text = download_from_wikipedia(reviews_page)
parse_table(content, reviews_text)
with open('updated_table.txt', 'r') as file:
updated_content = file.read()
upload_to_wikipedia(wikipedia_page_title, updated_content)