mirror of
https://github.com/josejuanmartinez/libreCatastro.git
synced 2024-07-06 15:22:28 +02:00
Manually closes ElasticSearch socket. Fixes / Updates tests.
This commit is contained in:
parent
9f7d5fda51
commit
50d4ad6e93
1
main.py
1
main.py
@ -12,6 +12,7 @@ if __name__ == "__main__":
|
||||
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
if args.coords:
|
||||
print(args.filenames)
|
||||
ScrapperHTML.scrap_all_coordinates_files(args.filenames)
|
||||
else:
|
||||
ScrapperXML.scrap_all_addresses(args.provinces)
|
||||
|
@ -158,6 +158,7 @@ class Address:
|
||||
return self.city
|
||||
|
||||
city_text = self.second_line.replace(self.province_parentheses, '')
|
||||
city_text = city_text.replace(config['separator'],'').strip()
|
||||
if self.cp is not None:
|
||||
city_text = city_text.replace(self.cp, '')
|
||||
|
||||
|
@ -4,8 +4,12 @@ from abc import abstractmethod
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
from src.settings import config
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
from src.utils.json_encoder import JSONEncoder
|
||||
|
||||
'''Logger'''
|
||||
logger = CadastroLogger(__name__).logger
|
||||
|
||||
|
||||
class CadasterEntry:
|
||||
|
||||
@ -30,18 +34,30 @@ class CadasterEntry:
|
||||
indent=4, separators=(',', ': '))
|
||||
|
||||
def to_elasticsearch(self):
|
||||
es = Elasticsearch()
|
||||
body = json.dumps(self.to_json(), cls=JSONEncoder,sort_keys=True,
|
||||
indent=4, separators=(',', ': '))
|
||||
try:
|
||||
es = Elasticsearch()
|
||||
body = json.dumps(self.to_json(), cls=JSONEncoder,sort_keys=True,
|
||||
indent=4, separators=(',', ': '))
|
||||
#logger.debug("Sending to Elastic Search\n:{}".format(body))
|
||||
res = es.index(index=config['elasticsearch-index'], doc_type='cadaster_doc', id=self.cadaster, body=body)
|
||||
res = es.index(index=config['elasticsearch-index'], doc_type='cadaster_doc', id=self.cadaster, body=body)
|
||||
#logger.debug(res)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
finally:
|
||||
es.transport.close()
|
||||
|
||||
return res
|
||||
|
||||
def from_elasticsearch(self):
|
||||
es = Elasticsearch()
|
||||
query = '{"query":{"bool":{"must":[{"match":{"cadaster":"' + self.cadaster + '"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"aggs":{}}'
|
||||
res = es.search(index=config['elasticsearch-index'], body=query)
|
||||
#logger.debug(res)
|
||||
res = None
|
||||
|
||||
try:
|
||||
es = Elasticsearch()
|
||||
query = '{"query":{"bool":{"must":[{"match":{"cadaster":"' + self.cadaster + '"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"aggs":{}}'
|
||||
res = es.search(index=config['elasticsearch-index'], body=query)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
finally:
|
||||
es.transport.close()
|
||||
|
||||
return res
|
||||
|
@ -31,4 +31,7 @@ class GeoPolygon:
|
||||
return self.polygon.contains(p)
|
||||
|
||||
def get_bounding_box(self):
|
||||
pass
|
||||
if self.polygon is not None:
|
||||
return self.polygon.bounds
|
||||
else:
|
||||
return None
|
||||
|
@ -11,7 +11,6 @@ from xml.etree import ElementTree
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.librecatastro.domain.cadaster_entry.cadaster_entry_html import CadasterEntryHTML
|
||||
from src.librecatastro.domain.geometry.geo_bounding_box import GeoBoundingBox
|
||||
from src.librecatastro.domain.geometry.geo_polygon import GeoPolygon
|
||||
from src.librecatastro.scrapping.scrapper import Scrapper
|
||||
from src.settings import config
|
||||
@ -33,7 +32,6 @@ class ScrapperHTML(Scrapper):
|
||||
|
||||
@classmethod
|
||||
def scrap_all_coordinates_files(cls, filenames):
|
||||
|
||||
for r, d, files in os.walk(config['coordinates_path']):
|
||||
for file in files:
|
||||
|
||||
@ -52,15 +50,17 @@ class ScrapperHTML(Scrapper):
|
||||
@staticmethod
|
||||
def scrap_polygon(polygon):
|
||||
bb = polygon.get_bounding_box()
|
||||
lon_min = 0
|
||||
lon_max = 0
|
||||
lat_min = 0
|
||||
lat_max = 0
|
||||
lon_min = int(bb[0] * config['scale'])
|
||||
lon_max = int(bb[2] * config['scale'])
|
||||
lat_min = int(bb[1] * config['scale'])
|
||||
lat_max = int(bb[3] * config['scale'])
|
||||
for x in range(lon_min, lon_max):
|
||||
for y in range(lat_min, lat_max):
|
||||
|
||||
x_scaled = x / config['scale']
|
||||
y_scaled = y / config['scale']
|
||||
if not polygon.is_point_in_polygon(x_scaled, y_scaled):
|
||||
continue
|
||||
|
||||
''' Adding to tracking file'''
|
||||
logger.info('{},{}'.format(x_scaled, y_scaled))
|
||||
@ -263,7 +263,11 @@ class ScrapperHTML(Scrapper):
|
||||
descriptive_data[field_name] = field_value.text.strip()
|
||||
|
||||
'''Constructions'''
|
||||
constructions = parsed_html.find(id='ctl00_Contenido_tblLocales').find_all('tr')
|
||||
constructions_table = parsed_html.find(id='ctl00_Contenido_tblLocales');
|
||||
if constructions_table is None:
|
||||
constructions = []
|
||||
else:
|
||||
constructions = constructions_table.find_all('tr')
|
||||
header = True
|
||||
for construction in constructions:
|
||||
if header:
|
||||
@ -273,8 +277,6 @@ class ScrapperHTML(Scrapper):
|
||||
|
||||
descriptive_data[u'Construcciones'].append(dict(uso=columns[0].text, escalera=columns[1].text, planta=columns[2].text, puerta=columns[3].text, superficie=columns[4].text, tipo=columns[5].text, fecha=columns[6].text))
|
||||
|
||||
|
||||
|
||||
cadaster_entry = CadasterEntryHTML(descriptive_data)
|
||||
return cadaster_entry
|
||||
|
||||
|
@ -7,7 +7,7 @@ config = {
|
||||
"elasticsearch-index": "cadaster",
|
||||
"error_log_file": os.path.join(root_path, 'logs', 'log'),
|
||||
"tracking_log_file": os.path.join(root_path, 'logs', 'track'),
|
||||
"scale": 1000000,
|
||||
"scale": 10000,
|
||||
"coordinates_path": os.path.join(root_path, 'coordinates'),
|
||||
"not_available_via_XML": "(Not available via XML)",
|
||||
"sleep_time": 5,
|
||||
|
@ -89,8 +89,9 @@ class ScrapperHTMLTests(unittest.TestCase):
|
||||
self.assertIsNotNone(cadaster.from_elasticsearch())
|
||||
|
||||
def scrap_random_until_x_times_found(self, times):
|
||||
coord = GeoBoundingBox.get_bb_from_file(os.path.join(config['coordinates_path'], 'central_peninsulae.json'))
|
||||
cadaster_list = ScrapperHTML.scrap_results_random_x_times(times, coord[0], coord[1], coord[2], coord[3])
|
||||
polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json'))
|
||||
coord = polygon.get_bounding_box()
|
||||
cadaster_list = ScrapperHTML.scrap_results_random_x_times(times, int(coord[0]*config['scale']), int(coord[2]*config['scale']), int(coord[1]*config['scale']), int(coord[3]*config['scale']))
|
||||
self.assertTrue(len(cadaster_list) >= times)
|
||||
return cadaster_list
|
||||
|
||||
@ -117,6 +118,10 @@ class ScrapperHTMLTests(unittest.TestCase):
|
||||
polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json'))
|
||||
self.assertFalse(polygon.is_point_in_polygon(lon=-1.9335937500000002, lat=48.31242790407178))
|
||||
|
||||
def test_polygon_has_correct_bounding_box(self):
|
||||
polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json'))
|
||||
self.assertIsNotNone(polygon.get_bounding_box())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user