Jump to content

User:Tom Morris/External Links Statistics

From Wikipedia, the free encyclopedia

Code

[edit]

This code was used to convert data from the externallinks dump into linkscount.

import java.net.URI
import java.sql._
import com.mysql.jdbc.Driver
Class.forName("com.mysql.jdbc.Driver").newInstance()
val conn = DriverManager.getConnection("jdbc:mysql://localhost/wikipedia", "root", "")

val count = (0 until 100001).map (_ * 1000)
count foreach {offset =>

  val sql = ("select `el_to` from externallinks LIMIT 1000 OFFSET " + offset.toString() + ";")
  var pstmt = conn.prepareStatement(
          sql,
          ResultSet.TYPE_FORWARD_ONLY,
          ResultSet.CONCUR_READ_ONLY)
  pstmt.setFetchSize(1)

  var srs = pstmt.executeQuery()

  while(srs.next()) {
    try {
      val uri = new URI(srs.getString("el_to"))
      val update = """INSERT INTO `linkscount` (`domain`, `count`) VALUES ('""" + uri.getHost() + """', '1')
        ON DUPLICATE KEY UPDATE `count`= count + 1;"""
      val stmt2 = conn.createStatement()
      stmt2.execute(update)
      stmt2.close()
    } catch {
      case e: Exception => None
    }
  }
  srs.close()
}