User:Rick Bot/scripts/gethopefuls
Appearance
< User:Rick Bot | scripts
#!/bin/bash
WGET="/usr/bin/curl" # on a mac OS X
# WGET="wget -q -O -" # on a linux box with wget
# files
ACTIVE="hopeful.active"
SEMIACTIVE="hopeful.semi-active"
INACTIVE="hopeful.inactive"
CONTRIBS=".contrib.times"
ALLRFAS="rfas"
RFAS=".rfas"
WPHopefuls="wphopefuls"
function prevmonth () {
case $1 in
January) echo "December";;
February) echo "January";;
March) echo "February";;
April) echo "March";;
May) echo "April";;
June) echo "May";;
July) echo "June";;
August) echo "July";;
September) echo "August";;
October) echo "September";;
November) echo "October";;
December) echo "November";;
esac
}
function inactive () {
# $1 is day number of latest contrib
# $2 is month of latest contrib
# $3 is year of latest contrib
# $4 - $6 are day, month, year for today
# if no contribs, arg count is not 6
[ $# -ne 6 ] && return 0
# if latest contrib is this month, not inactive
[ $2 = $5 -a $3 = $6 ] && return 1
# if latest contrib is last month, not inactive
MONTH=`prevmonth $5`
YEAR=$6
[ $MONTH = "December" ] && let YEAR=$YEAR-1
[ $2 = $MONTH -a $3 = $YEAR ] && return 1
# if latest contrib is two months ago, not inactive
MONTH=`prevmonth $MONTH`
[ $MONTH = "December" ] && let YEAR=$YEAR-1
[ $2 = $MONTH -a $3 = $YEAR ] && return 1
# if latest contrib is less than three months ago, not inactive
MONTH=`prevmonth $MONTH`
[ $MONTH = "December" ] && let YEAR=$YEAR-1
[ $2 = $MONTH -a $3 = $YEAR -a $1 -gt $4 ] && return 1
return 0
}
function semiactive () {
# $1 is day number of 30th most recent contrib
# $2 is month of 30th most recent contrib
# $3 is year of 30th most recent contrib
# $4 - $6 are day, month, year for today
# if 30th most recent contrib is this month, not semi-active
if [ $2 = $5 -a $3 = $6 ]; then
return 1
fi
# if 30th most recent contrib is last month, not semi-active
MONTH=`prevmonth $5`
YEAR=$6
[ $MONTH = "December" ] && let YEAR=$YEAR-1
[ $2 = $MONTH -a $3 = $YEAR ] && return 1
# if 30th most recent contrib is less than two months ago, not semi-active
MONTH=`prevmonth $MONTH`
[ $MONTH = "December" ] && let YEAR=$YEAR-1
[ $2 = $MONTH -a $3 = $YEAR -a $1 -gt $4 ] && return 1
return 0
}
TODAY=`date +"%e %B %Y"`
rm -f $ACTIVE
rm -f $INACTIVE
rm -f $SEMIACTIVE
let n=1
./listcat Wikipedia_administrator_hopefuls | grep "^User" >hopefuls
HOPEFULSSIZE=`cat hopefuls | wc -l`
if [ $HOPEFULSSIZE -lt 500 ]; then
./listcat Wikipedia_administrator_hopefuls | grep "^User" >hopefuls
fi
HOPEFULSSIZE=`cat hopefuls | wc -l`
if [ $HOPEFULSSIZE -lt 500 ]; then
echo "Can't fetch hopefuls list!"
exit 0
fi
# make a list of current admins
> alreadyadmins
cat ../Admins/A* ../Admins/G* ../Admins/P* ../Admins/Semi-active ../Admins/Inactive | grep "# {{user3" | sed -e "s/# {{user3.//" -e "s/}}.*//" >adminlist
LINES=`cat adminlist | wc -l`
LINES=`expr $LINES`
if [ "$LINES" -lt 1000 ]; then
echo "Don't have good list of admins"
exit 0
fi
# get a list of all RFAs
./getallpages "Requests_for_adminship" 4 >$ALLRFAS
LINES=`cat $ALLRFAS | wc -l`
LINES=`expr $LINES`
if [ "$LINES" -lt 3000 ]; then
./getallpages "Requests_for_adminship" 4 >$ALLRFAS
LINES=`cat $ALLRFAS | wc -l`
LINES=`expr $LINES`
if [ "$LINES" -lt 2000 ]; then
echo "Problem fetching RFAs" >&2
exit
fi
fi
last=""
cat hopefuls | while read line; do
realname="${line##User:}"
realname="${realname##User talk:}"
realname="${realname%%/*}"
if [ "$realname" = "$last" ]; then
continue
fi
last="$realname"
urlname=`./urlencode "$realname"`
grep "^$realname$" adminlist >/dev/null
if [ $? -eq 0 ]; then
echo "* {{admin|" $realname "}}" >>alreadyadmins
continue
fi
# echo $realname
# echo $urlname
# get previous RFAs
if [ "$1" != "-" -o ! -f "$RFAS.$urlname" ]; then
cat $ALLRFAS | egrep "/$realname[ 0-9]*$" >$RFAS.$urlname
fi
# figure out if user is active based on contribs we already know about
if [ -s $CONTRIBS.$urlname ]; then
LATEST=`head -1 $CONTRIBS.$urlname | cut -c8-`
THIRTIETH=`tail -1 $CONTRIBS.$urlname | cut -c8-`
inactive $LATEST $TODAY
if [ $? -ne 0 ]; then
# not inactive, how about semiactive?
semiactive $THIRTIETH $TODAY
if [ $? -ne 0 ]; then
if [ ! -s "$CONTRIBS.$urlname.earliest" ]; then
$WGET "http://en.wikipedia.org/w/index.php?title=Special:Contributions&target=$urlname&limit=30&dir=prev" | grep "<li cl[^>]*><a href" | sed -e 's/^<li[^>]*><a href[^>]*>//' -e 's/<.*//' >$CONTRIBS.$urlname.earliest
fi
EARLIEST=`tail -1 $CONTRIBS.$urlname.earliest | cut -c8-`
echo "$realname || $EARLIEST" >>$ACTIVE
continue
fi
fi
fi
# get latest contribs
let n=n+1
if [ $n -ge 10 -a "$1" != "-" ]; then
echo $realname
sleep 10
let n=1
fi
if [ "$1" != "-" -o ! -s $CONTRIBS.$urlname ]; then
echo $WGET "http://en.wikipedia.org/w/index.php?title=Special:Contributions&target=$urlname&limit=30"
$WGET "http://en.wikipedia.org/w/index.php?title=Special:Contributions&target=$urlname&limit=30" | grep "<li cl[^>]*><a href" | sed -e 's/^<li[^>]*><a href[^>]*>//' -e 's/<.*//' >$CONTRIBS.$urlname
fi
LATEST=`head -1 $CONTRIBS.$urlname | cut -c8-`
THIRTIETH=`tail -1 $CONTRIBS.$urlname | cut -c8-`
if [ ! -s "$CONTRIBS.$urlname.earliest" ]; then
$WGET "http://en.wikipedia.org/w/index.php?title=Special:Contributions&target=$urlname&limit=30&dir=prev" | grep "<li cl[^>]*><a href" | sed -e 's/^<li[^>]*><a href[^>]*>//' -e 's/<.*//' >$CONTRIBS.$urlname.earliest
fi
EARLIEST=`tail -1 $CONTRIBS.$urlname.earliest | cut -c8-`
# inactive if LATEST contrib not within last three months
# semi-active if 30th most recent contrib is more than two months ago
inactive $LATEST $TODAY
if [ $? -eq 0 ]; then
case "$urlname" in
*) echo "$realname || $LATEST" >>$INACTIVE;;
esac
continue
fi
semiactive $THIRTIETH $TODAY
if [ $? -eq 0 ]; then
echo "$realname || $LATEST" >>$SEMIACTIVE
else
echo "$realname || $EARLIEST" >>$ACTIVE
fi
done
# fix the format and sort order
sort -fdu $ACTIVE >tmp.$ACTIVE
( grep "[|][^a-zA-Z]" tmp.$ACTIVE ; grep "[|][a-zA-Z]" tmp.$ACTIVE) | awk >$ACTIVE '
BEGIN {
FS=" \\|\\| "
monthabbr["January"] = "Jan"
monthabbr["February"] = "Feb"
monthabbr["March"] = "Mar"
monthabbr["April"] = "Apr"
monthabbr["May"] = "May"
monthabbr["June"] = "Jun"
monthabbr["July"] = "Jul"
monthabbr["August"] = "Aug"
monthabbr["September"] = "Sep"
monthabbr["October"] = "Oct"
monthabbr["November"] = "Nov"
monthabbr["December"] = "Dec"
}
{
# add "1=" to template invocation for User:Until(1 == 2)
if ( $0 ~ "=" ) {
sub("user.","user|1=",$0)
}
print "|-"
split($2,date," ")
if (date[1] < 10 ) {
date[1] = "0" date[1]
}
printf ("%s", "| {{user20|" $1 "}} || " date[1] "-" monthabbr[date[2]] "-" date[3] " ||" )
rfanum = 1
rfafile = ".rfas." $1
rfa=""
getline rfa < rfafile
while (rfa != "") {
printf ("%s"," [[" rfa "|" rfanum "]]" )
rfa=""
getline rfa <rfafile
rfanum = rfanum + 1
}
close(rfafile)
printf "\n"
}'
# fix the format and sort order
sort -fdu $INACTIVE $SEMIACTIVE >tmp.$INACTIVE
( grep "[|][^a-zA-Z]" tmp.$INACTIVE ; grep "[|][a-zA-Z]" tmp.$INACTIVE) | awk >$INACTIVE '
BEGIN {
FS=" \\|\\| "
monthabbr["January"] = "Jan"
monthabbr["February"] = "Feb"
monthabbr["March"] = "Mar"
monthabbr["April"] = "Apr"
monthabbr["May"] = "May"
monthabbr["June"] = "Jun"
monthabbr["July"] = "Jul"
monthabbr["August"] = "Aug"
monthabbr["September"] = "Sep"
monthabbr["October"] = "Oct"
monthabbr["November"] = "Nov"
monthabbr["December"] = "Dec"
}
{
# add "1=" to template invocation for User:Until(1 == 2)
if ( $0 ~ "=" ) {
sub("user.","user|1=",$0)
}
print "|-"
if ( $2 == "" ) {
print "| {{user20|" $1 "}} || No edits ||"
} else {
split($2,date," ")
if (date[1] < 10 ) {
date[1] = "0" date[1]
}
printf ("%s", "| {{user20|" $1 "}} || " date[1] "-" monthabbr[date[2]] "-" date[3] " ||" )
rfanum = 1
rfafile = ".rfas." $1
rfa=""
getline rfa < rfafile
while (rfa != "") {
printf ("%s"," [[" rfa "|" rfanum "]]" )
rfa=""
getline rfa <rfafile
rfanum = rfanum + 1
}
close(rfafile)
printf "\n"
}
}'
# get the current contents of WP:admin_hopefuls
$WGET 'http://en.wikipedia.org/w/index.php?title=Wikipedia:List_of_administrator_hopefuls&action=raw' >$WPHopefuls
echo >>$WPHopefuls
# remove dups from alreadyadmins
sort -fdu alreadyadmins >tmp.alreadyadmins
mv tmp.alreadyadmins alreadyadmins
awk <$WPHopefuls >tmp.$WPHopefuls '
/^== / {
skip = 0
if (file == "") {
file = "hopeful.active"
} else if (file == "hopeful.active") {
file = "hopeful.inactive"
} else {
file = "alreadyadmins"
}
}
/^== Users who are already/ {
print $0
skip = 1
next
}
/^[{][|]/ {
print $0
# next line should be "|-"
getline
print $0
# next line is the table header
getline
print $0
skip = 1
next
}
/^[|]}/ {
skip = 0
# and now print the table contents
while ( ( getline <file ) > 0 ) {
print $0
}
print "|}"
next
}
/^[|*]/ {
if (skip == 1) {
next
}
}
/^None./ {
if (skip == 1) {
next
}
}
{
if (skip == 1) {
skip = 0
line = $0
if (file == "alreadyadmins") {
none="None."
while ( ( getline <file ) > 0 ) {
none=""
print $0
}
if (none == "None.") {
print "None."
}
}
$0 = line
}
print $0
}
'