mirror of
https://github.com/josejuanmartinez/libreCatastro.git
synced 2024-07-06 15:22:28 +02:00
Adds a JsonEncoder class to recursively manage serialization for ES. Changes scrapping function to return lists (because of multiparcelas).
This commit is contained in:
parent
103c87778b
commit
7aa3e17912
@ -141,6 +141,7 @@ class CadastroScrapper:
|
||||
descriptive_data = dict()
|
||||
descriptive_data[u'Longitud'] = x
|
||||
descriptive_data[u'Latitud'] = y
|
||||
|
||||
''' Parcela '''
|
||||
fields = description.find_all('div')
|
||||
for field in fields:
|
||||
@ -157,7 +158,6 @@ class CadastroScrapper:
|
||||
descriptive_data[field_name] = field_value.encode_contents().decode('utf-8').replace('<br/>',config['separator']).replace('<br>', config['separator'])
|
||||
|
||||
cadaster_entry = CadasterEntry(descriptive_data)
|
||||
logger.info(cadaster_entry.to_json())
|
||||
return cadaster_entry
|
||||
|
||||
@staticmethod
|
||||
|
@ -1,4 +1,3 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from src.settings import config
|
||||
@ -9,10 +8,11 @@ logger = CadastroLogger(__name__).logger
|
||||
|
||||
|
||||
class Address:
|
||||
""" Domain class for storing Address in Catastro format"""
|
||||
def __init__(self, address):
|
||||
self.full_address = address
|
||||
logger.info("Full address: {}".format(self.full_address))
|
||||
logger.info("Separator: {}".format(config['separator']))
|
||||
|
||||
''' Initialization in case some data is not present'''
|
||||
self.first_line = None
|
||||
self.second_line = None
|
||||
self.street = None
|
||||
@ -28,6 +28,7 @@ class Address:
|
||||
self.site = None
|
||||
self.lot = None
|
||||
|
||||
''' NLP search '''
|
||||
self.first_line = self.get_first_line()
|
||||
self.second_line = self.get_second_line()
|
||||
|
||||
@ -159,5 +160,4 @@ class Address:
|
||||
return province_parentheses_text, province_text
|
||||
|
||||
def to_json(self):
|
||||
return json.dumps(self, default=lambda o: o.__dict__,
|
||||
sort_keys=True, indent=4)
|
||||
return dict(full_address=self.full_address, first_line=self.first_line, second_line=self.second_line, street=self.street, cp=self.cp, city=self.city, province_parantheses=self.province_parentheses, province=self.province, doorway=self.doorway, floor=self.floor, door=self.door, site=self.site, lot=self.lot)
|
||||
|
@ -7,6 +7,7 @@ from src.librecatastro.domain.address import Address
|
||||
from src.librecatastro.domain.location import Location
|
||||
from src.settings import config
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
from src.utils.json_enconder import JSONEncoder
|
||||
|
||||
logger = CadastroLogger(__name__).logger
|
||||
|
||||
@ -21,16 +22,18 @@ class CadasterEntry:
|
||||
self.use = description_data[u'Uso principal'] if u'Uso principal' in description_data else None
|
||||
self.surface = description_data[u'Superficie construida'] if u'Superficie construida' in description_data else None
|
||||
self.year = description_data[u'Año construcción'] if u'Año construcción' in description_data else None
|
||||
self.location = Location(description_data[u'Longitud'], description_data[u'Latitud']) if u'Longitud' in description_data and u'Latitud' in description_data else None
|
||||
self.location = Location(description_data[u'Longitud'], description_data[u'Latitud'])
|
||||
self.timestamp = str(datetime.now())
|
||||
|
||||
def to_json(self):
|
||||
return json.dumps(self, default=lambda o: o.__dict__,
|
||||
sort_keys=True, indent=4)
|
||||
return dict(address=self.address, cadaster=self.cadaster, type=self.type, use=self.use, surface=self.surface, year=self.year, location=self.location, timestamp=self.timestamp)
|
||||
|
||||
def to_elasticsearch(self):
|
||||
es = Elasticsearch()
|
||||
res = es.index(index=config['elasticsearch-index'], doc_type='cadaster_doc', id=self.cadaster, body=self.to_json())
|
||||
body = json.dumps(self.to_json(), cls=JSONEncoder,sort_keys=True,
|
||||
indent=4, separators=(',', ': '))
|
||||
logger.info("Sending to Elastic Search\n:{}".format(body))
|
||||
res = es.index(index=config['elasticsearch-index'], doc_type='cadaster_doc', id=self.cadaster, body=body)
|
||||
logger.info(res)
|
||||
return res
|
||||
|
||||
|
@ -1,8 +1,17 @@
|
||||
import json
|
||||
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
|
||||
logger = CadastroLogger(__name__).logger
|
||||
|
||||
|
||||
class Location:
|
||||
def __init__(self, longitude, latitude):
|
||||
self.lon = longitude
|
||||
self.lat = latitude
|
||||
self.lon = float(longitude) if longitude is not None else None
|
||||
self.lat = float(latitude) if latitude is not None else None
|
||||
|
||||
def to_json(self):
|
||||
return "{'location': {'lon': {}, 'lat': {}}".format(float(self.lon) if self.lon is not None else None,
|
||||
float(self.lat) if self.lat is not None else None)
|
||||
if self.lon is None and self.lat is None:
|
||||
return None
|
||||
else:
|
||||
return dict(lon=self.lon, lat=self.lat)
|
@ -15,54 +15,70 @@ class MyTestCase(unittest.TestCase):
|
||||
assert True
|
||||
|
||||
def test_coordinate_creates_cadaster(self):
|
||||
cadaster = CadastroScrapper.scrap_coord(-3.68, 40.47)
|
||||
cadaster_list = CadastroScrapper.scrap_coord(-3.68, 40.47)
|
||||
self.assertEqual(len(cadaster_list), 1)
|
||||
cadaster = cadaster_list[0]
|
||||
self.assertEqual(cadaster.cadaster, '2302909VK4820A0001GK')
|
||||
|
||||
def test_coordinate_multiparcela_creates_cadaster_2(self):
|
||||
cadaster = CadastroScrapper.scrap_coord(-0.33, 39.47)
|
||||
self.assertTrue(len(cadaster) > 0)
|
||||
cadaster_list = CadastroScrapper.scrap_coord(-0.33, 39.47)
|
||||
self.assertTrue(len(cadaster_list) > 1)
|
||||
|
||||
def test_coordinate_creates_cadaster_and_stores_in_elasticsearch(self):
|
||||
cadaster = CadastroScrapper.scrap_coord(-3.68, 40.47)
|
||||
cadaster_list = CadastroScrapper.scrap_coord(-3.68, 40.47)
|
||||
self.assertEqual(len(cadaster_list), 1)
|
||||
cadaster = cadaster_list[0]
|
||||
cadaster.to_elasticsearch()
|
||||
self.assertIsNotNone(cadaster.from_elasticsearch())
|
||||
|
||||
def test_cadaster_site_lot_creates_cadaster_and_sets_site_lot(self):
|
||||
cadaster = CadastroScrapper.scrap_cadaster('45134A02500003')
|
||||
cadaster_list = CadastroScrapper.scrap_cadaster('45134A02500003')
|
||||
self.assertEqual(len(cadaster_list), 1)
|
||||
cadaster = cadaster_list[0]
|
||||
self.assertEqual(cadaster.address.site, '25')
|
||||
self.assertEqual(cadaster.address.lot, '3')
|
||||
|
||||
def test_cadaster_full_creates_cadaster(self):
|
||||
cadaster = CadastroScrapper.scrap_cadaster('0083101WK2008S0001PD')
|
||||
cadaster_list = CadastroScrapper.scrap_cadaster('0083101WK2008S0001PD')
|
||||
self.assertEqual(len(cadaster_list), 1)
|
||||
cadaster = cadaster_list[0]
|
||||
self.assertEqual(cadaster.address.city, 'ALMONACID DEL MARQUESADO')
|
||||
self.assertEqual(cadaster.address.province, 'CUENCA')
|
||||
|
||||
def test_cadaster_half_creates_cadaster(self):
|
||||
cadaster = CadastroScrapper.scrap_cadaster('0183001WK2008S')
|
||||
cadaster_list = CadastroScrapper.scrap_cadaster('0183001WK2008S')
|
||||
self.assertEqual(len(cadaster_list), 1)
|
||||
cadaster = cadaster_list[0]
|
||||
self.assertEqual(cadaster.address.city, 'ALMONACID DEL MARQUESADO')
|
||||
self.assertEqual(cadaster.address.province, 'CUENCA')
|
||||
|
||||
def test_cadaster_half_creates_cadaster_2(self):
|
||||
cadaster = CadastroScrapper.scrap_cadaster('21012A03100046')
|
||||
cadaster_list = CadastroScrapper.scrap_cadaster('21012A03100046')
|
||||
self.assertEqual(len(cadaster_list), 1)
|
||||
cadaster = cadaster_list[0]
|
||||
self.assertEqual(cadaster.address.province, 'HUELVA')
|
||||
|
||||
def test_cadaster_no_cp_creates_cadaster(self):
|
||||
cadaster = CadastroScrapper.scrap_cadaster('06145A00500028')
|
||||
cadaster_list = CadastroScrapper.scrap_cadaster('06145A00500028')
|
||||
self.assertEqual(len(cadaster_list), 1)
|
||||
cadaster = cadaster_list[0]
|
||||
self.assertIsNone(cadaster.address.cp)
|
||||
self.assertEqual(cadaster.address.province, 'BADAJOZ')
|
||||
|
||||
def test_cadaster_multiparcela_returns_list(self):
|
||||
cadaster_list= CadastroScrapper.scrap_cadaster('22282A00900547')
|
||||
def test_cadaster_multiparcela_returns_list_of_2(self):
|
||||
cadaster_list = CadastroScrapper.scrap_cadaster('22282A00900547')
|
||||
self.assertEqual(len(cadaster_list), 2)
|
||||
|
||||
def test_cadaster_is_stored_in_elasticsearch(self):
|
||||
cadaster = CadastroScrapper.scrap_cadaster('0183001WK2008S')
|
||||
cadaster_list = CadastroScrapper.scrap_cadaster('0183001WK2008S')
|
||||
self.assertEqual(len(cadaster_list), 1)
|
||||
cadaster = cadaster_list[0]
|
||||
cadaster.to_elasticsearch()
|
||||
self.assertIsNotNone(cadaster.from_elasticsearch())
|
||||
|
||||
def scrap_random_until_x_times_found(self, times):
|
||||
cadaster_list = CadastroScrapper.scrap_results_random_x_times(times)
|
||||
self.assertEqual(len(cadaster_list), times)
|
||||
self.assertTrue(len(cadaster_list) >= times)
|
||||
return cadaster_list
|
||||
|
||||
def test_scrap_random_until_5_found(self):
|
||||
|
9
src/utils/json_enconder.py
Normal file
9
src/utils/json_enconder.py
Normal file
@ -0,0 +1,9 @@
|
||||
import json
|
||||
|
||||
|
||||
class JSONEncoder(json.JSONEncoder):
|
||||
def default(self, obj):
|
||||
if hasattr(obj, 'to_json'):
|
||||
return obj.to_json()
|
||||
else:
|
||||
return json.JSONEncoder.default(self, obj)
|
Loading…
Reference in New Issue
Block a user