Added Solr GrepHTML writer support for responses from remote instances

2024-09-19 00:01:41 +02:00 · 2019-03-20 18:24:16 +01:00 · 2019-03-20 18:24:16 +01:00 · 74e6d6e984
commit 74e6d6e984
parent 5e6501974d
2 changed files with 211 additions and 74 deletions
--- a/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java
@ -28,10 +28,10 @@ import java.util.LinkedHashMap;
 import java.util.Set;
 import java.util.regex.Pattern;

-import net.yacy.document.SentenceReader;
-import net.yacy.search.schema.CollectionSchema;
-
 import org.apache.lucene.document.Document;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
@ -43,7 +43,12 @@ import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
+import org.apache.solr.search.ReturnFields;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.SolrReturnFields;
+
+import net.yacy.document.SentenceReader;
+import net.yacy.search.schema.CollectionSchema;

 /**
 * this response writer shows a list of documents with the lines containing matches
@ -52,9 +57,9 @@ import org.apache.solr.search.SolrIndexSearcher;
 * text part and each sentence is shown as separate line. grep attributes can be used to
 * show leading and trainling lines.
 */
-public class GrepHTMLResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter {
+public class GrepHTMLResponseWriter implements QueryResponseWriter, SolrjResponseWriter {

-    private static final Set<String> DEFAULT_FIELD_LIST = new HashSet<String>();
+    private static final Set<String> DEFAULT_FIELD_LIST = new HashSet<>();
    private static final Pattern dqp = Pattern.compile("\"");
    static {
        DEFAULT_FIELD_LIST.add(CollectionSchema.id.getSolrFieldName());
@ -63,10 +68,6 @@ public class GrepHTMLResponseWriter implements QueryResponseWriter, EmbeddedSolr
        DEFAULT_FIELD_LIST.add(CollectionSchema.text_t.getSolrFieldName());
    }
    
-    public GrepHTMLResponseWriter() {
-        super();
-    }
-
    @Override
    public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
        return "text/html";
@ -78,72 +79,216 @@ public class GrepHTMLResponseWriter implements QueryResponseWriter, EmbeddedSolr

    @Override
    public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
-        NamedList<?> values = rsp.getValues();
-        assert values.get("responseHeader") != null;
-        assert values.get("response") != null;
-
-        writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n");
-        writer.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n");
-        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"all\" href=\"../env/base.css\" />\n");
-        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"../env/style.css\" />\n");
-        SolrParams params = request.getOriginalParams();
-        String grep = params.get("grep");
-        String query = "";
-        String q = params.get(CommonParams.Q); if (q == null) q = "";
-        int p = q.indexOf(':');
-        if (p >= 0) {
-            int r = q.charAt(p + 1) == '"' ? q.indexOf(p + 2, '"') : q.indexOf(' ');
-            if (r < 0) r = q.length();
-            query = q.substring(p + 1, r);
-            if (query.length() > 0) {
-                if (query.charAt(0) == '"') query = query.substring(1);
-                if (query.charAt(query.length() - 1) == '"') query = query.substring(0, query.length() - 1);
-            }
-        }
-        if (grep == null && query.length() > 0) grep = query;
-        if (grep.length() > 0) {
-            if (grep.charAt(0) == '"') grep = grep.substring(1);
-            if (grep.charAt(grep.length() - 1) == '"') grep = grep.substring(0, grep.length() - 1);
-        }
-        NamedList<Object> paramsList = params.toNamedList();
-        paramsList.remove("wt");
-        String xmlquery = dqp.matcher("select?" + SolrParams.toSolrParams(paramsList).toString()).replaceAll("%22");
+        writeHtmlHead(writer);
        
-        DocList response = ((ResultContext) values.get("response")).getDocList();
-        final int sz = response.size();
-        if (sz > 0) {
-            SolrIndexSearcher searcher = request.getSearcher();
-            DocIterator iterator = response.iterator();
-            IndexSchema schema = request.getSchema();
-            String h1 = "Document Grep for query \"" + query + "\" and grep phrase \"" + grep + "\"";
-            writer.write("<title>" + h1 + "</title>\n</head><body>\n<h1>" + h1 + "</h1>\n");
-            writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
-            writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see an example call to the search rss API.</span></div>\n");
-            for (int i = 0; i < sz; i++) {
-                int id = iterator.nextDoc();
-                Document doc = searcher.doc(id, DEFAULT_FIELD_LIST);
-                LinkedHashMap<String, String> tdoc = HTMLResponseWriter.translateDoc(schema, doc);
-                String sku = tdoc.get(CollectionSchema.sku.getSolrFieldName());
-                String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
-                String text = tdoc.get(CollectionSchema.text_t.getSolrFieldName());
+        final SolrParams params = request.getOriginalParams();
+        
+        final String query = getQueryParam(params);
+        final String grep = getGrepParam(params, query);

-                ArrayList<String> sentences = new ArrayList<String>();
-                if (title != null) sentences.add(title);
-                SentenceReader sr = new SentenceReader(text);
-                StringBuilder line;
-                while (sr.hasNext()) {
-                    line = sr.next();
-                    if (line.length() > 0) sentences.add(line.toString());
+        
+        final Object responseObj = rsp.getResponse();
+        
+        if(responseObj instanceof SolrDocumentList) {
+			/*
+			 * The response object can be a SolrDocumentList when the response is partial,
+			 * for example when the allowed processing time has been exceeded
+			 */
+        	final SolrDocumentList docList = ((SolrDocumentList)responseObj);
+        	
+            writeSolrDocumentList(writer, params, query, grep, docList);
+        	
+        } else if(responseObj instanceof ResultContext) {
+        	/* Regular response object */
+        	final DocList documents = ((ResultContext)responseObj).getDocList();
+        	
+            final int sz = documents.size();
+            if (sz > 0) {
+                final SolrIndexSearcher searcher = request.getSearcher();
+                final DocIterator iterator = documents.iterator();
+                final IndexSchema schema = request.getSchema();
+                writeTitleAndHeadeing(writer, grep, query);
+                writeApiLink(writer, params);
+                for (int i = 0; i < sz; i++) {
+                    int id = iterator.nextDoc();
+                    final Document doc = searcher.doc(id, DEFAULT_FIELD_LIST);
+                    final LinkedHashMap<String, String> tdoc = HTMLResponseWriter.translateDoc(schema, doc);
+                    final String sku = tdoc.get(CollectionSchema.sku.getSolrFieldName());
+                    final String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
+                    final String text = tdoc.get(CollectionSchema.text_t.getSolrFieldName());
+
+                    final ArrayList<String> sentences = extractSentences(title, text);
+                    writeDoc(writer, sku, sentences, grep);
                }
-                writeDoc(writer, sku, sentences, grep);
+            } else {
+                writer.write("<title>No Document Found</title>\n</head><body>\n");
            }
        } else {
-            writer.write("<title>No Document Found</title>\n</head><body>\n");
+        	writer.write("<title>Unable to process Solr response</title>\n</head><body>\n");
        }
        
        writer.write("</body></html>\n");
    }

+    /**
+     * Process the solr documents list and append a representation to the output writer.
+     * @param writer an open output writer. Must not be null.
+     * @param params the original Solr parameters
+     * @param query the query parameter value
+     * @param grep the grep parameter value
+     * @param docList the solr documents list
+     * @throws IOException when a write error occurred
+     */
+	private void writeSolrDocumentList(final Writer writer, final SolrParams params, final String query,
+			final String grep, final SolrDocumentList docList) throws IOException {
+		if (docList == null || docList.isEmpty()) {
+		    writer.write("<title>No Document Found</title>\n</head><body>\n");
+		} else {
+		    writeTitleAndHeadeing(writer, grep, query);
+		    writeApiLink(writer, params);
+		    
+		    final ReturnFields fieldsToReturn = new SolrReturnFields();
+		    for (final SolrDocument doc : docList) {
+		        final LinkedHashMap<String, String> tdoc = HTMLResponseWriter.translateDoc(doc, fieldsToReturn);
+		        final String sku = tdoc.get(CollectionSchema.sku.getSolrFieldName());
+		        final String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
+		        final String text = tdoc.get(CollectionSchema.text_t.getSolrFieldName());
+
+		        final ArrayList<String> sentences = extractSentences(title, text);
+		        writeDoc(writer, sku, sentences, grep);
+		    }
+		}
+	}
+
+    /**
+     * Write the html header beginning
+     * @param writer an open output writer
+     * @throws IOException when a write error occurred
+     */
+	private void writeHtmlHead(final Writer writer) throws IOException {
+		writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n");
+        writer.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n");
+        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"all\" href=\"../env/base.css\" />\n");
+        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"../env/style.css\" />\n");
+	}
+
+	/**
+	 * @param params the original request parameters. Must not be null.
+	 * @param query the query parameter value
+	 * @return the grep parameter value
+	 */
+	private String getGrepParam(final SolrParams params, String query) {
+		String grep = params.get("grep");
+        if (grep == null) {
+        	if(query.length() > 0) {
+        		grep = query;
+        	} else {
+        		grep = "";
+        	}
+        }
+        if (grep.length() > 0) {
+            if (grep.charAt(0) == '"') {
+            	grep = grep.substring(1);
+            }
+            if (grep.charAt(grep.length() - 1) == '"') {
+            	grep = grep.substring(0, grep.length() - 1);
+            }
+        }
+		return grep;
+	}
+
+	/**
+	 * @param params the original request parameters. Must not be null.
+	 * @return the query parameter value
+	 */
+	private String getQueryParam(final SolrParams params) {
+		final String q = params.get(CommonParams.Q, "");
+        String query = "";
+        int p = q.indexOf(':');
+        if (p >= 0) {
+            int r = q.charAt(p + 1) == '"' ? q.indexOf(p + 2, '"') : q.indexOf(' ');
+            if (r < 0) {
+            	r = q.length();
+            }
+            query = q.substring(p + 1, r);
+            if (query.length() > 0) {
+                if (query.charAt(0) == '"') {
+                	query = query.substring(1);
+                }
+                if (query.charAt(query.length() - 1) == '"') {
+                	query = query.substring(0, query.length() - 1);
+                }
+            }
+        }
+		return query;
+	}
+
+    /**
+     * Append the response title and level 1 html heading
+     * @param writer an open output writer. Must not be null.
+     * @param grep the grep phrase
+     * @param query the search query
+     * @throws IOException when a write error occurred
+     */
+	private void writeTitleAndHeadeing(final Writer writer, final String grep, final String query) throws IOException {
+		final String h1 = "Document Grep for query \"" + query + "\" and grep phrase \"" + grep + "\"";
+		writer.write("<title>" + h1 + "</title>\n</head><body>\n<h1>" + h1 + "</h1>\n");
+	}
+
+	/**
+	 * Append a link to the related Solr api
+	 * @param writer an open output writer. Must not be null.
+	 * @param solrParams the original request parameters. Must not be null.
+	 * @throws IOException when a write error occurred
+	 */
+	private void writeApiLink(final Writer writer, final SolrParams solrParams) throws IOException {
+        final NamedList<Object> paramsList = solrParams.toNamedList();
+        paramsList.remove("wt");
+        String xmlquery = dqp.matcher("select?" + SolrParams.toSolrParams(paramsList).toString()).replaceAll("%22");
+        
+		writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
+		writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see an example call to the search rss API.</span></div>\n");
+	}
+
+	/**
+	 * @param title
+	 * @param text
+	 * @return a list of sentences extracted from the given document text and title 
+	 */
+	private ArrayList<String> extractSentences(final String title, final String text) {
+		final ArrayList<String> sentences = new ArrayList<>();
+		if (title != null) {
+			sentences.add(title);
+		}
+		if(text != null) {
+			final SentenceReader sr = new SentenceReader(text);
+			StringBuilder line;
+			while (sr.hasNext()) {
+				line = sr.next();
+				if (line.length() > 0) {
+					sentences.add(line.toString());
+				}
+			}
+		}
+		return sentences;
+	}
+    
+    @Override
+    public void write(Writer writer, SolrQueryRequest request, String coreName, QueryResponse rsp) throws IOException {
+        writeHtmlHead(writer);
+        
+        final SolrParams params = request.getOriginalParams();
+        
+        final String query = getQueryParam(params);
+        final String grep = getGrepParam(params, query);
+
+        writeSolrDocumentList(writer, params, query, grep, rsp.getResults());
+        	
+        
+        writer.write("</body></html>\n");
+    	
+    }
+
    private static final void writeDoc(Writer writer, String url, ArrayList<String> sentences, String grep) throws IOException {
        writer.write("<form name=\"yacydoc" + url + "\" method=\"post\" action=\"#\" enctype=\"multipart/form-data\" accept-charset=\"UTF-8\">\n");
        writer.write("<fieldset>\n");
--- a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
@ -72,10 +72,6 @@ public class HTMLResponseWriter implements QueryResponseWriter, SolrjResponseWri

    public static final Pattern dqp = Pattern.compile("\"");
    
-    public HTMLResponseWriter() {
-        super();
-    }
-
    @Override
    public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
        return "text/html";
@ -288,10 +284,6 @@ public class HTMLResponseWriter implements QueryResponseWriter, SolrjResponseWri

    @Override
    public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
-        NamedList<?> values = rsp.getValues();
-        assert values.get("responseHeader") != null;
-        assert values.get("response") != null;
-
        writeHtmlHead(writer, request);

        final String coreName = request.getCore().getName();
@ -643,7 +635,7 @@ public class HTMLResponseWriter implements QueryResponseWriter, SolrjResponseWri
 	 *            restrict the actually returned fields. May be null.
 	 * @return a map of field names to field values
 	 */
-	private static final LinkedHashMap<String, String> translateDoc(final SolrDocument doc,
+	public static final LinkedHashMap<String, String> translateDoc(final SolrDocument doc,
 			final ReturnFields returnFields) {
 		LinkedHashMap<String, String> kv = new LinkedHashMap<String, String>();
 		for (final Entry<String, Object> entry : doc) {