User:Dr pda/extractRefCount.stx
<?xml version="1.0"?> <stx:transform version="1.0"
xmlns:stx="http://stx.sourceforge.net/2002/ns" xmlns:m="http://www.mediawiki.org/xml/export-0.3/" pass-through="none" output-method="xml" exclude-result-prefixes="#all"
>
<stx:variable name="namespace-prefixes"/>
<stx:template match="m:namespace">
<stx:if test="@key!=0"> <stx:assign name="namespace-prefixes" select="($namespace-prefixes, .)"/> </stx:if>
</stx:template>
<stx:template match="/m:mediawiki">
<mediawiki> <stx:process-children /> </mediawiki>
</stx:template>
<stx:template match="m:siteinfo">
<stx:process-children />
</stx:template>
<stx:template match="m:namespaces">
<stx:process-children />
</stx:template>
<stx:variable name="page-title"/> <stx:variable name="page-text"/> <stx:variable name="page-id"/>
<stx:template match="m:title">
<stx:assign name="page-title" select="string(.)"/>
</stx:template>
<stx:template match="m:text">
<stx:assign name="page-text" select="string(.)"/>
</stx:template>
<stx:variable name="first-revision" select="true()"/>
<stx:template match="m:revision">
<stx:if test="$first-revision"> <stx:assign name="first-revision" select="false()"/> <stx:process-children/> </stx:if>
</stx:template>
<stx:buffer name="parsed"/> <stx:variable name="pd-count" select="0"/> <stx:variable name="pnd-count" select="0"/> <stx:variable name="found-something"/>
<stx:template match="m:id">
<stx:if test="$first-revision"> <stx:assign name="page-id" select="normalize-space(.)"/> </stx:if>
</stx:template>
<stx:template match="m:page">
<stx:assign name="first-revision" select="true()"/> <stx:process-children /> <stx:variable name="prefix" select="substring-before($page-title,':')"/> <stx:variable name="skip" select="false()"/> <stx:if test="$prefix"> <stx:value-of select="$prefix"/> <stx:for-each-item name="p" select="$namespace-prefixes"> <stx:if test="string($p) = string($prefix)"> <stx:assign name="skip" select="true()"/> </stx:if> </stx:for-each-item> </stx:if> <stx:if test="not($skip)"> <stx:assign name="found-something" select="false()"/> <stx:assign name="pnd-count" select="0"/> <stx:assign name="pd-count" select="$pd-count+1"/> <stx:result-buffer name="parsed" clear="yes"> <stx:variable name="text" select="$page-text"/> <stx:if test="not(starts-with($text,'#REDIRECT') or starts-with($text,'#Redirect') or starts-with($text,'#redirect'))"> <stx:while test="string-length($text) > 0"> <stx:variable name="before" select="substring-before($text,'<')"/> <stx:assign name="text" select="substring-after($text,'<')"/> <stx:call-procedure name="template"> <stx:with-param name="content" select="substring-before($text,'>')"/> </stx:call-procedure> <stx:assign name="text" select="substring-after($text,'>')"/> </stx:while> <template name="PND"> <param> <stx:value-of select="normalize-space($pnd-count)"/> </param> </template> </stx:if> </stx:result-buffer> <stx:message> <stx:value-of select="$pd-count"/> <stx:text>/</stx:text> <stx:value-of select="$pnd-count"/> </stx:message> <stx:text> </stx:text> <page> <title><stx:value-of select="$page-title"/></title> <id><stx:value-of select="$page-id"/></id> <stx:text> </stx:text> <revision> <parsed> <stx:process-buffer name="parsed" group="copy"/> </parsed> <stx:text> </stx:text> </revision> </page> <stx:text> </stx:text> </stx:if>
</stx:template>
<stx:group name="copy">
<stx:template match="*"> <stx:element name="{name(.)}"> <stx:process-attributes/> <stx:process-children/> </stx:element> </stx:template> <stx:template match="@*"> <stx:attribute name="{name(.)}" select="."/> </stx:template> <stx:template match="text()"> <stx:value-of select="."/> </stx:template>
</stx:group>
<stx:procedure name="template">
<stx:param name="content" required="yes"/> <stx:if test="starts-with($content,'ref')"> <stx:if test="not(starts-with($content,'references'))"> <stx:assign name="pnd-count" select="$pnd-count+1"/> <stx:assign name="found-something" select="true()"/> </stx:if> </stx:if>
</stx:procedure>
</stx:transform>