yacy_search_server/source/net/yacy/document/content/dao/PhpBB3Dao.java
2010-01-10 23:09:48 +00:00

335 lines
12 KiB
Java

// PhpBB3Dao.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 26.05.2009 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.document.content.dao;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Date;
import java.util.HashMap;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import net.yacy.document.content.DCEntry;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
public class PhpBB3Dao implements Dao {
protected DatabaseConnection conn = null;
private final String urlstub, prefix;
private final HashMap<Integer, String> users;
public PhpBB3Dao(
String urlstub,
String dbType,
String host,
int port,
String dbname,
String prefix,
String user,
String pw) throws Exception {
this.conn = new DatabaseConnection(dbType, host, port, dbname, user, pw);
this.urlstub = urlstub;
this.prefix = prefix;
this.users = new HashMap<Integer, String>();
}
protected void finalize() throws Throwable {
close();
}
public Date first() {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
rs = stmt.executeQuery("select min(post_time) from " + prefix + "posts");
if (rs.next()) {
return new Date(rs.getLong(1) * 1000L);
}
return null;
} catch (SQLException e) {
Log.logException(e);
return null;
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
public Date latest() {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
rs = stmt.executeQuery("select max(post_time) from " + prefix + "posts");
if (rs.next()) {
return new Date(rs.getLong(1) * 1000L);
}
return null;
} catch (SQLException e) {
Log.logException(e);
return null;
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
public int size() throws SQLException {
return this.conn.count(prefix + "posts");
}
public DCEntry get(int item) {
return getOne("select * from " + prefix + "posts where post_id = " + item);
}
public BlockingQueue<DCEntry> query(int from, int until, int queueSize) {
// define the sql query
final StringBuilder sql = new StringBuilder(256);
sql.append("select * from " + prefix + "posts where post_id >= ");
sql.append(from);
if (until > from) {
sql.append(" and post_id < ");
sql.append(until);
}
sql.append(" order by post_id");
// execute the query and push entries to a queue concurrently
return toQueue(sql, queueSize);
}
public BlockingQueue<DCEntry> query(Date from, int queueSize) {
// define the sql query
final StringBuilder sql = new StringBuilder(256);
sql.append("select * from " + prefix + "posts where post_time >= ");
sql.append(from.getTime() / 1000);
sql.append(" order by post_id");
// execute the query and push entries to a queue concurrently
return toQueue(sql, queueSize);
}
private DCEntry getOne(String sql) {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
rs = stmt.executeQuery(sql);
if (rs.next()) {
try {
return parseResultSet(rs);
} catch (MalformedURLException e) {
Log.logException(e);
}
}
return null;
} catch (SQLException e) {
Log.logException(e);
return null;
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
private BlockingQueue<DCEntry> toQueue(final StringBuilder sql, int queueSize) {
// execute the query and push entries to a queue concurrently
final BlockingQueue<DCEntry> queue = new ArrayBlockingQueue<DCEntry>(queueSize);
Thread dbreader = new Thread() {
public void run() {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
rs = stmt.executeQuery(sql.toString());
while (rs.next()) {
try {
queue.put(parseResultSet(rs));
} catch (MalformedURLException e) {
Log.logException(e);
}
}
queue.put(DCEntry.poison);
} catch (InterruptedException e) {
Log.logException(e);
} catch (SQLException e) {
Log.logException(e);
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
};
dbreader.start();
return queue;
}
protected DCEntry parseResultSet(ResultSet rs) throws SQLException, MalformedURLException {
DigestURI url;
int item = rs.getInt("post_id");
url = new DigestURI(this.urlstub + "/viewtopic.php?t=" + item);
String subject = rs.getString("post_subject");
String text = xmlCleaner(rs.getString("post_text"));
String user = getUser(rs.getInt("poster_id"));
Date date = new Date(rs.getLong("post_time") * 1000L);
return new DCEntry(url, date, subject, user, text);
}
public static String xmlCleaner(String s) {
if (s == null) return null;
StringBuilder sbOutput = new StringBuilder(s.length());
char c;
for (int i = 0; i < s.length(); i++ ) {
c = s.charAt(i);
if ((c >= 0x0020 && c <= 0xD7FF) ||
(c >= 0xE000 && c <= 0xFFFD) ||
c == 0x0009 ||
c == 0x000A ||
c == 0x000D ) {
sbOutput.append(c);
}
}
return sbOutput.toString().trim();
}
private String getUser(int poster_id) {
String nick = this.users.get(poster_id);
if (nick != null) return nick;
StringBuilder sql = new StringBuilder(256);
sql.append("select * from " + prefix + "users where user_id = ");
sql.append(poster_id);
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.statement();
rs = stmt.executeQuery(sql.toString());
if (rs.next()) nick = rs.getString("username");
if (nick == null) nick = "";
this.users.put(poster_id, nick);
return nick;
} catch (SQLException e) {
Log.logException(e);
return "";
} finally {
if (rs != null) try {rs.close();} catch (SQLException e) {}
if (stmt != null) try {stmt.close();} catch (SQLException e) {}
}
}
public int writeSurrogates(
BlockingQueue<DCEntry> queue,
File targetdir,
String versioninfo,
int maxEntriesInFile
) {
try {
// generate output file name and attributes
String targethost = new DigestURI(this.urlstub, null).getHost();
int fc = 0;
File outputfiletmp = null, outputfile = null;
// write the result from the query concurrently in a file
OutputStreamWriter osw = null;
DCEntry e;
int c = 0;
while ((e = queue.take()) != DCEntry.poison) {
if (osw == null) {
outputfiletmp = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml.prt");
outputfile = new File(targetdir, targethost + "." + versioninfo + "." + fc + ".xml");
if (outputfiletmp.exists()) outputfiletmp.delete();
if (outputfile.exists()) outputfile.delete();
osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(outputfiletmp)), "UTF-8");
osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n");
}
e.writeXML(osw);
c++;
if (c >= maxEntriesInFile) {
osw.write("</surrogates>\n");
osw.close();
outputfiletmp.renameTo(outputfile);
osw = null;
c = 0;
fc++;
}
}
osw.write("</surrogates>\n");
osw.close();
outputfiletmp.renameTo(outputfile);
return fc + 1;
} catch (MalformedURLException e) {
Log.logException(e);
} catch (UnsupportedEncodingException e) {
Log.logException(e);
} catch (IOException e) {
Log.logException(e);
} catch (InterruptedException e) {
Log.logException(e);
}
return 0;
}
public void close() {
this.conn.close();
}
public static void main(String[] args) {
PhpBB3Dao db;
try {
db = new PhpBB3Dao(
"http://forum.yacy-websuche.de",
"mysql",
"localhost",
3306,
"forum",
"forum_",
"root",
""
);
System.out.println("Posts in database : " + db.size());
System.out.println("First entry : " + db.first());
System.out.println("Last entry : " + db.latest());
File targetdir = new File("x").getParentFile();
db.writeSurrogates(db.query(0, -1, 100), targetdir, "id0-current", 3000);
} catch (Exception e) {
Log.logException(e);
}
}
}