yacy_search_server/htroot/IndexControlURLs_p.html
Michael Peter Christen 6e59ca4ebf removed jena library and all code that depended on jena. When jena was
introduced, it was also used for search facets. The generic search
facets are now deduced from generic solr fields which makes jena as tool
for facet semantics superfluous.
2014-02-07 01:20:06 +01:00

265 lines
15 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "DTD/xhtml1-transitional.dtd">
<!-- This page is only XHTML 1.0 Transitional because target is being used in a links -->
<html xmlns="http://www.w3.org/1999/xhtml">
#(reload)#::<meta http-equiv="REFRESH" content="5; url=/IndexControlURLs_p.html">#(/reload)#
<head>
<title>YaCy '#[clientname]#': URL Database Administration</title>
#%env/templates/metas.template%#
<script type="text/javascript">
//<![CDATA[
function xmlhttpPost() {
var searchform = document.getElementById('searchform');
search(searchform.urlstring.value);
}
function search(query) {
var xmlHttpReq = false;
var self = this;
if (window.XMLHttpRequest) { // Mozilla/Safari
self.xmlHttpReq = new XMLHttpRequest();
}
else if (window.ActiveXObject) { // IE
self.xmlHttpReq = new ActiveXObject("Microsoft.XMLHTTP");
}
self.xmlHttpReq.open('GET', "/solr/select?q=sku:\"" + query + "\" OR host_s:\"" + query + "\" OR host_dnc_s:\"" + query + "\" OR host_organization_s:\"" + query + "\" OR host_organizationdnc_s:\"" + query + "\" OR host_subdomain_s:\"" + query + "\" OR url_paths_sxt:\"" + query + "\" OR url_file_name_s:\"" + query + "\"&start=0&rows=100&wt=yjson", true);
self.xmlHttpReq.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded');
self.xmlHttpReq.onreadystatechange = function() {
if (self.xmlHttpReq.readyState == 4) {
updatepage(self.xmlHttpReq.responseText);
}
}
self.xmlHttpReq.send(null);
}
function updatepage(str) {
var raw = document.getElementById("raw");
if (raw != null) raw.innerHTML = str;
var rsp = eval("("+str+")");
var firstChannel = rsp.channels[0];
var totalResults = firstChannel.totalResults.replace(/[,.]/,"");
var startIndex = firstChannel.startIndex;
var itemsPerPage = firstChannel.itemsPerPage;
var navigation = firstChannel.navigation;
var html = "";
if (totalResults > 0 && firstChannel.items.length > 0) {
var item;
html += "<table class=\"networkTable\" border=\"0\" cellpadding=\"2\" cellspacing=\"1\" width=\"99%\">";
html += "<tr class=\"TableHeader\" valign=\"bottom\">";
html += "<td>URL from index (total results = " + totalResults + ")<\/td>";
for (var i = 0; i < firstChannel.items.length; i++) {
item = firstChannel.items[i];
html += "<tr class=\"TableCellLight\"><td align=\"left\"><a href=\"IndexControlURLs_p.html?urlstringsearch=&amp;urlstring=" + item.link + "\">" + item.link + "<\/a><\/td>";
}
html += "<\/table>";
}
document.getElementById("searchresults").innerHTML = html;
}
//]]>
</script>
</head>
<body id="IndexControl">
#%env/templates/header.template%#
#%env/templates/submenuIndexControl.template%#
<h2>URL Database Administration</h2>
<p>The local index currently contains #[ucount]# URL references</p>
<form action="IndexControlURLs_p.html" id="searchform" method="post" enctype="multipart/form-data" accept-charset="UTF-8" onkeyup="xmlhttpPost(); return false;">
<fieldset><legend>URL Retrieval</legend>
<dl>
<dt class="TableCellDark">Retrieve by URL:</dt>
<dd><input type="text" name="urlstring" value="#[urlstring]#" size="40" maxlength="250" />
<input type="submit" name="urlstringsearch" value="Show Details for URL" class="submitready" style="width:240px;"/><br />
<div id="searchresults"></div>
</dd>
<dt class="TableCellDark">Retrieve by URL-Hash:</dt>
<dd><input type="text" name="urlhash" value="#[urlhash]#" size="40" maxlength="12" />
<input type="submit" name="urlhashsearch" value="Show Details for URL-Hash" class="submitready" style="width:240px;"/>
</dd>
</dl>
</fieldset>
</form>
#(cleanup)#::
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Cleanup</legend>
<dl>
<dt class="TableCellDark">Index Deletion</dt>
<dd><input type="checkbox" name="deleteIndex" id="deleteIndex"
onclick="x=document.getElementById('deleteIndex').checked;#(rwi)#::document.getElementById('deleteRWI').checked=x;#(/rwi)#document.getElementById('deleteRobots').checked=x;document.getElementById('deleteRobots').checked=x;document.getElementById('deleteCrawlQueues').checked=x;c='disabled';document.getElementById('deleteSearchFl').checked=x;if(x){c='';};document.getElementById('deletecomplete').disabled=c;document.getElementById('deleteCache').disabled=c;document.getElementById('deleteRobots').disabled=c;document.getElementById('deleteCrawlQueues').disabled=c;document.getElementById('deleteSearchFl').disabled=c;"
/><label for="deleteIndex">Delete local search index (embedded Solr and old Metadata)</label><br/>
#(solr)#::<input type="checkbox" name="deleteRemoteSolr" id="deleteRemoteSolr" onclick="x=document.getElementById('deleteRemoteSolr').checked;c='disabled';if(x){c='';};document.getElementById('deletecomplete').disabled=c;" /><label for="deleteRemoteSolr">Delete remote solr index</label><br/>#(/solr)#
#(rwi)#::<input type="checkbox" name="deleteRWI" id="deleteRWI" onclick="x=document.getElementById('deleteRWI').checked;c='disabled';if(x){c='';};document.getElementById('deletecomplete').disabled=c;" /><label for="deleteRWI">Delete RWI Index (DHT transmission words)</label><br/>#(/rwi)#
#(citation)#::<input type="checkbox" name="deleteCitation" id="deleteCitation" onclick="x=document.getElementById('deleteCitation').checked;c='disabled';if(x){c='';};document.getElementById('deletecomplete').disabled=c;" /><label for="deleteCitation">Delete Citation Index (linking between URLs)</label><br/>#(/citation)#
<input type="checkbox" name="deleteCache" id="deleteCache" disabled="disabled" /><label for="deleteCache">Delete HTTP &amp; FTP Cache</label><br/>
<input type="checkbox" name="deleteCrawlQueues" id="deleteCrawlQueues" disabled="disabled" /><label for="deleteCrawlQueues">Stop Crawler and delete Crawl Queues</label><br/>
<input type="checkbox" name="deleteRobots" id="deleteRobots" disabled="disabled" /><label for="deleteRobots">Delete robots.txt Cache</label><br/>
<input type="checkbox" name="deleteSearchFl" id="deleteSearchFl" disabled="disabled" /><label for="deleteSearchFl">Delete cached snippet-fetching failures during search</label><br/><br/>
<input type="submit" name="deletecomplete" id="deletecomplete" value="Delete" disabled="disabled" class="submitready" style="width:240px;"/>
</dd>
</dl>
</fieldset>
</form>
#(/cleanup)#
#(statistics)#::
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Statistics about top-domains in URL Database</legend>
<dl>
<dt class="TableCellLight">&nbsp;</dt>
<dd>Show top <input type="text" name="lines" value="#[lines]#" size="6" maxlength="6" /> domains from all URLs.
<input type="submit" name="statistics" value="Generate Statistics" class="submitready" style="width:240px;"/>
</dd>
</dl>
</fieldset>
</form>
#(/statistics)#
#(statisticslines)#::
<p><em>Statistics about the top-#[domains]# domains in the database:</em></p>
<table cellpadding="2" cellspacing="1" >
<tr class="TableHeader">
<td align="center"></td>
<td><strong>Domain</strong></td>
<td><strong>URLs</strong></td>
</tr>
#{domains}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<td>
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<div>
<input type="hidden" name="domain" value="#[domain]#" />
<input type="hidden" name="lines" value="#[lines]#" />
<input type="submit" name="deletedomain" value="delete all" class="submitready" style="width:240px;"/>
</div>
</form>
</td>
<td><a href="http://#[domain]#/" target="_blank">#[domain]#</a></td>
<td>#[count]#</td>
</tr>
#{/domains}#
</table>
#(/statisticslines)#
#(dumprestore)#::
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Dump and Restore of Solr Index</legend>
<dl>
<dt class="TableCellLight">&nbsp;</dt>
<dd><input type="submit" name="indexdump" value="Create Dump" class="submitready" style="width:240px;"/>
</dd>
</dl>
<dl>
<dt class="TableCellDark">Dump File</dt>
<dd><input type="text" name="dumpfile" value="#[dumpfile]#" size="80" maxlength="250" />
</dd>
<dt class="TableCellLight">&nbsp;</dt>
<dd><input type="submit" name="indexrestore" value="Restore Dump" class="submitready" style="width:240px;"/>
</dd>
</dl>
</fieldset>
</form>
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Optimize Solr</legend>
<dl>
<dt class="TableCellLight">&nbsp;</dt>
<dd>merge to max. <input type="text" name="optimizemax" value="#[optimizemax]#" size="6" maxlength="6" /> segments
<input type="submit" name="optimizesolr" value="Optimize Solr" class="submitready" style="width:240px;"/>
</dd>
</dl>
</fieldset>
</form>
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Reboot Solr Core</legend>
<dl>
<dt class="TableCellLight">&nbsp;</dt>
<dd><input type="submit" name="rebootsolr" value="Shut Down and Re-Start Solr" class="submitready" style="width:240px;"/>
</dd>
</dl>
</fieldset>
</form>::
#(/dumprestore)#
#(lurlexport)#::
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset><legend>Loaded URL Export</legend>
<dl>
<dt class="TableCellDark">Export File</dt>
<dd><input type="text" name="exportfile" value="#[exportfile]#" size="80" maxlength="250" />
</dd>
<dt class="TableCellDark">URL Filter</dt>
<dd><input type="text" name="exportfilter" value=".*.*" size="20" maxlength="250" />
</dd>
<dt class="TableCellDark">query</dt>
<dd><input type="text" name="exportquery" value="*:*" size="20" maxlength="250" />
</dd>
<dt class="TableCellDark">Export Format</dt>
<dd>Only Domain:
<input type="radio" name="format" value="dom-text" />Plain Text List (domains only)&nbsp;&nbsp;
<input type="radio" name="format" value="dom-html" checked="checked" />HTML (domains as URLs, no title)<br />
Full URL List:
<input type="radio" name="format" value="url-text" />Plain Text List (URLs only)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<input type="radio" name="format" value="url-html" />HTML (URLs with title)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<input type="radio" name="format" value="url-rss" />XML (RSS)
<br />
</dd>
<dt class="TableCellLight">&nbsp;</dt>
<dd><input type="submit" name="lurlexport" value="Export URLs" class="submitready" style="width:240px;"/>
</dd>
</dl>
</fieldset>
</form>::
<div class="commit" style="text-decoration:blink">Export to file #[exportfile]# is running .. #[urlcount]# URLs so far</div>::
#(/lurlexport)#
#(lurlexportfinished)#::
<div class="commit">Finished export of #[urlcount]# URLs to file <a href="file://#[exportfile]#" target="_">#[exportfile]#</a></div>::
#(/lurlexportfinished)#
#(lurlexporterror)#::
<div class="error">Export to file #[exportfile]# failed: #[exportfailmsg]#</div>::
#(/lurlexporterror)#
#(indexdump)#::
<div class="commit">Stored a solr dump to file #[dumpfile]#</div>::
#(/indexdump)#
#(genUrlProfile)#
::No entry found for URL-hash #[urlhash]#
::<iframe src="/solr/select?defType=edismax&start=0&rows=3&core=collection1&wt=html&q=id:%22#[urlhash]#%22" width="100%" height="420" frameborder="0" scrolling="no"></iframe><br />
<div id="api">
<a href="/solr/select?defType=edismax&start=0&rows=3&core=collection1&wt=html&q=id:%22#[urlhash]#%22">
<img src="env/grafics/api.png" width="60" height="40" alt="API" /></a>
<span>These document details can be retrieved as <a href="http://www.w3.org/TR/xhtml-rdfa-primer/" target="_blank">XHTML+RDFa</a>
document containg <a href="http://www.w3.org/RDF/" target="_blank">RDF</a> annotations in <a href="http://dublincore.org/" target="_blank">Dublin Core</a> vocabulary.
The XHTML+RDFa data format is both a XML content format and a HTML display format and is considered as an important <a href="http://www.w3.org/2001/sw/" target="_blank">Semantic Web</a> content format.
The same content can also be retrieved as pure <a href="/api/yacydoc.xml?urlhash=#[urlhash]#">XML metadata</a> with DC tag name vocabulary.
Click the API icon to see an example call to the search rss API.
To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de/wiki/index.php/Dev:API" target="_blank">API wiki page</a>.</span>
</div>
<form action="ViewFile.html" method="get" accept-charset="UTF-8">
<input type="hidden" name="viewMode" value="parsed" />
<input type="hidden" name="show" value="Show" />
<input type="hidden" name="urlHash" value="#[urlhash]#" />
<input type="submit" value="Show Content" name="showcontent" class="submitready" style="width:240px;"/><br />
</form>
<form action="IndexControlURLs_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<input type="hidden" name="keystring" value="" />
<input type="hidden" name="keyhash" value="" />
<input type="hidden" name="urlstring" value="" />
<input type="hidden" name="urlhash" value="#[urlhash]#" />
<input type="submit" value="Delete URL" name="urlhashdelete" class="submitready" style="width:240px;"/><br />
<span class="small">&nbsp;this may produce unresolved references at other word indexes but they do not harm</span><br /><br />
<input type="submit" value="Delete URL and remove all references from words" name="urlhashdeleteall" class="submitready" style="width:240px;"/><br />
<span class="small">&nbsp;delete the reference to this url at every other word where the reference exists (very extensive, but prevents unresolved references)</span><br />
</form>
#(/genUrlProfile)#
#[result]#
#%env/templates/footer.template%#
</body>
</html>