mirror of
https://github.com/josejuanmartinez/libreCatastro.git
synced 2024-06-13 04:04:37 +02:00
Adds XML multiparcela. Fixes several bugs.
This commit is contained in:
parent
ee90545bb6
commit
d5b280f6eb
1
main.py
1
main.py
|
@ -34,5 +34,4 @@ if __name__ == "__main__":
|
||||||
if args.coords:
|
if args.coords:
|
||||||
CoordinatesInput.scrap_coordinates(scrapper, filenames, pictures)
|
CoordinatesInput.scrap_coordinates(scrapper, filenames, pictures)
|
||||||
else:
|
else:
|
||||||
print(pictures)
|
|
||||||
ProvincesInput.scrap_provinces(scrapper, provinces, pictures)
|
ProvincesInput.scrap_provinces(scrapper, provinces, pictures)
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
dotmap
|
||||||
shapely
|
shapely
|
||||||
beautifulsoup4==4.8.0
|
beautifulsoup4==4.8.0
|
||||||
elasticsearch>=6.0.0,<7.0.0
|
elasticsearch>=6.0.0,<7.0.0
|
||||||
|
|
|
@ -26,9 +26,10 @@ class CadasterEntry:
|
||||||
self.constructions = cadaster_entry.constructions
|
self.constructions = cadaster_entry.constructions
|
||||||
self.picture = cadaster_entry.picture
|
self.picture = cadaster_entry.picture
|
||||||
self.timestamp = cadaster_entry.timestamp
|
self.timestamp = cadaster_entry.timestamp
|
||||||
|
logger.debug(self.to_json_recursive())
|
||||||
|
|
||||||
def to_json(self):
|
def to_json(self):
|
||||||
return dict(address=self.address, cadaster=self.cadaster, type=self.type, use=self.use, surface=self.surface, year=self.year, location=self.location, gsurface=self.gsurface, constructions=self.constructions, picture=str(self.picture), timestamp=self.timestamp)
|
return dict(address=self.address, cadaster=self.cadaster, type=self.type, use=self.use, surface=self.surface, year=self.year, location=self.location, gsurface=self.gsurface, constructions=self.constructions, picture=str(self.picture) if self.picture is not None else None, timestamp=self.timestamp)
|
||||||
|
|
||||||
def to_json_recursive(self):
|
def to_json_recursive(self):
|
||||||
return json.dumps(self.to_json(), cls=JSONEncoder, sort_keys=True,
|
return json.dumps(self.to_json(), cls=JSONEncoder, sort_keys=True,
|
||||||
|
@ -43,8 +44,9 @@ class CadasterEntry:
|
||||||
res = es.index(index=config['elasticsearch-index'], doc_type='cadaster_doc', id=self.cadaster, body=body)
|
res = es.index(index=config['elasticsearch-index'], doc_type='cadaster_doc', id=self.cadaster, body=body)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
finally:
|
|
||||||
es.transport.close()
|
es.transport.close()
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def from_elasticsearch(self):
|
def from_elasticsearch(self):
|
||||||
|
@ -55,7 +57,7 @@ class CadasterEntry:
|
||||||
res = es.search(index=config['elasticsearch-index'], body=query)
|
res = es.search(index=config['elasticsearch-index'], body=query)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
finally:
|
|
||||||
es.transport.close()
|
es.transport.close()
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from dotmap import DotMap
|
||||||
|
|
||||||
from src.librecatastro.domain.address import Address
|
from src.librecatastro.domain.address import Address
|
||||||
from src.librecatastro.domain.cadaster_entry.cadaster_entry import CadasterEntry
|
from src.librecatastro.domain.cadaster_entry.cadaster_entry import CadasterEntry
|
||||||
from src.librecatastro.domain.construction import Construction
|
from src.librecatastro.domain.construction import Construction
|
||||||
|
@ -14,33 +16,48 @@ logger = CadastroLogger(__name__).logger
|
||||||
class CadasterEntryXML(CadasterEntry):
|
class CadasterEntryXML(CadasterEntry):
|
||||||
"""Cadaster class, that stores all the information about a surface and its properties"""
|
"""Cadaster class, that stores all the information about a surface and its properties"""
|
||||||
|
|
||||||
def __init__(self, xml, lon, lat):
|
def __init__(self, xml, lon=None, lat=None, picture=None):
|
||||||
|
self.address = None
|
||||||
|
if xml.consulta_dnp.bico.bi.ldt != DotMap():
|
||||||
|
self.address = Address(xml.consulta_dnp.bico.bi.ldt)
|
||||||
|
|
||||||
self.address = Address(xml['consulta_dnp']['bico']['bi']['ldt'])
|
self.cadaster = xml.consulta_dnp.bico.bi.idbi.rc.pc1 if xml.consulta_dnp.bico.bi.idbi.rc.pc1 != DotMap() else ''
|
||||||
|
self.cadaster += xml.consulta_dnp.bico.bi.idbi.rc.pc2 if xml.consulta_dnp.bico.bi.idbi.rc.pc2 != DotMap() else ''
|
||||||
|
self.cadaster += xml.consulta_dnp.bico.bi.idbi.rc.car if xml.consulta_dnp.bico.bi.idbi.rc.car != DotMap() else ''
|
||||||
|
self.cadaster += xml.consulta_dnp.bico.bi.idbi.rc.cc1 if xml.consulta_dnp.bico.bi.idbi.rc.cc1 != DotMap() else ''
|
||||||
|
self.cadaster += xml.consulta_dnp.bico.bi.idbi.rc.cc2 if xml.consulta_dnp.bico.bi.idbi.rc.cc2 != DotMap() else ''
|
||||||
|
|
||||||
self.cadaster = xml['consulta_dnp']['bico']['bi']['idbi']['rc']['pc1'] + \
|
self.year = None
|
||||||
xml['consulta_dnp']['bico']['bi']['idbi']['rc']['pc2'] + \
|
if xml.consulta_dnp.bico.bi.debi is not None:
|
||||||
xml['consulta_dnp']['bico']['bi']['idbi']['rc']['car'] + \
|
self.year = xml.consulta_dnp.bico.bi.debi.ant
|
||||||
xml['consulta_dnp']['bico']['bi']['idbi']['rc']['cc1'] + \
|
if self.year == DotMap():
|
||||||
xml['consulta_dnp']['bico']['bi']['idbi']['rc']['cc2']
|
self.year = None
|
||||||
|
|
||||||
self.year = xml['consulta_dnp']['bico']['bi']['debi']['ant'] \
|
self.type = xml.consulta_dnp.bico.bi.idbi.cn
|
||||||
if 'debi' in xml['consulta_dnp']['bico']['bi'] and\
|
if self.type != DotMap() and self.type == 'UR':
|
||||||
'ant' in xml['consulta_dnp']['bico']['bi']['debi'] else None
|
self.type = u'Urbano'
|
||||||
|
else:
|
||||||
|
self.type = u'Rústico'
|
||||||
|
|
||||||
self.type = xml['consulta_dnp']['bico']['bi']['idbi']['cn'] if 'cn' in xml['consulta_dnp']['bico']['bi']['idbi'] else None
|
self.use = None
|
||||||
if self.type is not None:
|
if xml.consulta_dnp.bico.bi.debi is not None:
|
||||||
self.type = u'Urbano' if self.type == 'UR' else u'Rústico'
|
self.use = xml.consulta_dnp.bico.bi.debi.luso
|
||||||
|
if self.use == DotMap():
|
||||||
|
self.use = None
|
||||||
|
|
||||||
|
self.surface = None
|
||||||
|
if xml.consulta_dnp.bico.bi.debi is not None:
|
||||||
|
self.surface = xml.consulta_dnp.bico.bi.debi.sfc + 'm2'
|
||||||
|
if self.surface == DotMap():
|
||||||
|
self.surface = None
|
||||||
|
|
||||||
self.use = xml['consulta_dnp']['bico']['bi']['debi']['luso'] if 'luso' in xml['consulta_dnp']['bico']['bi']['debi'] else None
|
|
||||||
self.surface = xml['consulta_dnp']['bico']['bi']['debi']['sfc'] + 'm2' if 'sfc' in xml['consulta_dnp']['bico']['bi']['debi'] else None
|
|
||||||
self.location = Location(lon, lat)
|
self.location = Location(lon, lat)
|
||||||
self.gsurface = config['not_available_via_XML']
|
self.gsurface = config['not_available_via_XML']
|
||||||
self.constructions = []
|
self.constructions = []
|
||||||
|
|
||||||
constructions = []
|
constructions = []
|
||||||
if 'lcons' in xml['consulta_dnp']['bico']:
|
if xml.consulta_dnp.bico.lcons.cons != DotMap():
|
||||||
constructions = xml['consulta_dnp']['bico']['lcons']['cons']
|
constructions = xml.consulta_dnp.bico.lcons.cons
|
||||||
|
|
||||||
''' Bad XML design, instead of returning a list with 1 element, it returns
|
''' Bad XML design, instead of returning a list with 1 element, it returns
|
||||||
the element'''
|
the element'''
|
||||||
|
@ -48,11 +65,26 @@ class CadasterEntryXML(CadasterEntry):
|
||||||
constructions = [constructions]
|
constructions = [constructions]
|
||||||
|
|
||||||
for construction in constructions:
|
for construction in constructions:
|
||||||
use = construction['lcd'] if 'lcd' in construction else None
|
use = construction.lcd
|
||||||
doorway = construction['dt']['lourb']['loint']['es'] if 'dt' in construction else None
|
if use == DotMap():
|
||||||
floor = construction['dt']['lourb']['loint']['pt'] if 'dt' in construction else None
|
use = None
|
||||||
door = construction['dt']['lourb']['loint']['pu'] if 'dt' in construction else None
|
|
||||||
surface = construction['dfcons']['stl'] if 'dfcons' in construction and 'stl' in construction['dfcons'] else None
|
doorway = construction.dt.lourb.loint.es
|
||||||
|
if doorway == DotMap():
|
||||||
|
doorway = None
|
||||||
|
|
||||||
|
floor = construction.dt.lourb.loint.pt
|
||||||
|
if floor == DotMap():
|
||||||
|
floor = None
|
||||||
|
|
||||||
|
door = construction.dt.lourb.loint.pu
|
||||||
|
if door == DotMap():
|
||||||
|
door = None
|
||||||
|
|
||||||
|
surface = construction.dfcons.stl
|
||||||
|
if surface == DotMap():
|
||||||
|
surface = None
|
||||||
|
|
||||||
reform_type = config['not_available_via_XML']
|
reform_type = config['not_available_via_XML']
|
||||||
reform_date = config['not_available_via_XML']
|
reform_date = config['not_available_via_XML']
|
||||||
|
|
||||||
|
@ -60,5 +92,6 @@ class CadasterEntryXML(CadasterEntry):
|
||||||
dict(uso=use, escalera=doorway, planta=floor, puerta=door, superficie=surface, tipo=reform_type,
|
dict(uso=use, escalera=doorway, planta=floor, puerta=door, superficie=surface, tipo=reform_type,
|
||||||
fecha=reform_date)))
|
fecha=reform_date)))
|
||||||
|
|
||||||
|
self.picture = picture
|
||||||
self.timestamp = str(datetime.now())
|
self.timestamp = str(datetime.now())
|
||||||
super().__init__(self)
|
super().__init__(self)
|
|
@ -5,6 +5,7 @@ from time import sleep
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from dotmap import DotMap
|
||||||
|
|
||||||
from src.librecatastro.domain.cadaster_entry.cadaster_entry_html import CadasterEntryHTML
|
from src.librecatastro.domain.cadaster_entry.cadaster_entry_html import CadasterEntryHTML
|
||||||
from src.librecatastro.scrapping.scrapper import Scrapper
|
from src.librecatastro.scrapping.scrapper import Scrapper
|
||||||
|
@ -12,7 +13,6 @@ from src.settings import config
|
||||||
|
|
||||||
from src.utils.cadastro_logger import CadastroLogger
|
from src.utils.cadastro_logger import CadastroLogger
|
||||||
|
|
||||||
|
|
||||||
'''Logger'''
|
'''Logger'''
|
||||||
logger = CadastroLogger(__name__).logger
|
logger = CadastroLogger(__name__).logger
|
||||||
|
|
||||||
|
@ -30,15 +30,17 @@ class ScrapperHTML(Scrapper):
|
||||||
URL_REF_FULL = "https://www1.sedecatastro.gob.es/CYCBienInmueble/OVCConCiud.aspx?RefC={}&RCCompleta={}&del={}&mun={}"
|
URL_REF_FULL = "https://www1.sedecatastro.gob.es/CYCBienInmueble/OVCConCiud.aspx?RefC={}&RCCompleta={}&del={}&mun={}"
|
||||||
|
|
||||||
'''Information to scrap from HTML'''
|
'''Information to scrap from HTML'''
|
||||||
description_field_names = [u'Referencia catastral', u'Localización', u'Clase', u'Uso principal', u'Superficie construida', u'Año construcción']
|
description_field_names = [u'Referencia catastral', u'Localización', u'Clase', u'Uso principal',
|
||||||
|
u'Superficie construida', u'Año construcción']
|
||||||
gsurface_field_names = [u'Superficie gráfica']
|
gsurface_field_names = [u'Superficie gráfica']
|
||||||
|
|
||||||
""" Scrapping calls """
|
""" Scrapping calls """
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrap_coord(cls, x, y, pictures=False):
|
def scrap_coord(cls, x, y, pictures=False):
|
||||||
logger.debug("====Longitude: {} Latitude: {}====".format(x, y))
|
logger.debug("====Longitude: {} Latitude: {}====".format(x, y))
|
||||||
url = cls.URL.format(x, y)
|
url = cls.URL.format(x, y)
|
||||||
logger.debug("[||| ] URL for coordinates: {}".format(url))
|
logger.debug("URL for coordinates: {}".format(url))
|
||||||
f = urlopen(url)
|
f = urlopen(url)
|
||||||
data = f.read()
|
data = f.read()
|
||||||
root = ElementTree.fromstring(data)
|
root = ElementTree.fromstring(data)
|
||||||
|
@ -46,64 +48,62 @@ class ScrapperHTML(Scrapper):
|
||||||
"{http://www.catastro.meh.es/}coordenadas//{http://www.catastro.meh.es/}coord//{http://www.catastro.meh.es/}pc//{http://www.catastro.meh.es/}pc1")
|
"{http://www.catastro.meh.es/}coordenadas//{http://www.catastro.meh.es/}coord//{http://www.catastro.meh.es/}pc//{http://www.catastro.meh.es/}pc1")
|
||||||
pc2 = root.find(
|
pc2 = root.find(
|
||||||
"{http://www.catastro.meh.es/}coordenadas//{http://www.catastro.meh.es/}coord//{http://www.catastro.meh.es/}pc//{http://www.catastro.meh.es/}pc2")
|
"{http://www.catastro.meh.es/}coordenadas//{http://www.catastro.meh.es/}coord//{http://www.catastro.meh.es/}pc//{http://www.catastro.meh.es/}pc2")
|
||||||
if pc1 is None or pc2 is None:
|
|
||||||
return []
|
results = []
|
||||||
else:
|
if pc1 is not None and pc2 is not None:
|
||||||
logger.debug("||||| ] FOUND!")
|
|
||||||
cadaster = ''.join([pc1.text, pc2.text])
|
cadaster = ''.join([pc1.text, pc2.text])
|
||||||
cadaster_entries = cls.scrap_cadaster(cadaster, None, None, x, y, pictures)
|
cadaster_entries = cls.scrap_cadaster(cadaster, None, None, x, y, pictures)
|
||||||
for cadaster_entry in cadaster_entries:
|
for cadaster_entry in cadaster_entries:
|
||||||
cadaster_entry.to_elasticsearch()
|
cadaster_entry.to_elasticsearch()
|
||||||
return cadaster_entries
|
results.append(cadaster_entry)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrap_provinces(cls, prov_list, pictures=False):
|
def scrap_provinces(cls, prov_list, pictures=False):
|
||||||
"""Scraps properties by addresses"""
|
|
||||||
provinces = cls.get_provinces()['consulta_provinciero']['provinciero']['prov']
|
|
||||||
|
|
||||||
for province in provinces:
|
for prov_name, prov_num, city_name, city_num, address, tv, nv in cls.get_address_iter(prov_list):
|
||||||
prov_name = province['np']
|
|
||||||
prov_num = province['cpine']
|
|
||||||
|
|
||||||
if len(prov_list) > 0 and prov_name not in prov_list:
|
if tv == DotMap() or nv == DotMap():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
cities = cls.get_cities(prov_name)['consulta_municipiero']['municipiero']['muni']
|
|
||||||
for city in cities:
|
|
||||||
city_name = city['nm']
|
|
||||||
city_num = city['locat']['cmc']
|
|
||||||
addresses = (cls.get_addresses(prov_name, city_name)['consulta_callejero']['callejero'][
|
|
||||||
'calle'])
|
|
||||||
|
|
||||||
for address in addresses:
|
|
||||||
address_dir = address['dir']
|
|
||||||
tv = address_dir['tv']
|
|
||||||
nv = address_dir['nv']
|
|
||||||
|
|
||||||
num_scrapping_fails = 10
|
num_scrapping_fails = 10
|
||||||
counter = 1
|
counter = 1
|
||||||
while num_scrapping_fails > 0:
|
while num_scrapping_fails > 0:
|
||||||
try:
|
try:
|
||||||
cadaster = cls.get_cadaster_by_address(prov_name, city_name, tv, nv, counter)
|
numerero_map = cls.get_cadaster_by_address(prov_name, city_name, tv, nv, counter)
|
||||||
if 'lerr' in cadaster['consulta_numerero'] and \
|
if numerero_map.consulta_numerero.lerr.err.cod != DotMap():
|
||||||
'err' in cadaster['consulta_numerero']['lerr'] and \
|
|
||||||
'cod' in cadaster['consulta_numerero']['lerr']['err'] and \
|
|
||||||
cadaster['consulta_numerero']['lerr']['err']['cod'] == '43':
|
|
||||||
num_scrapping_fails -= 1
|
num_scrapping_fails -= 1
|
||||||
else:
|
else:
|
||||||
logger.debug("||||| ] FOUND!")
|
|
||||||
numps = cadaster['consulta_numerero']['numerero']['nump']
|
numps = numerero_map.consulta_numerero.numerero.nump
|
||||||
|
|
||||||
if not isinstance(numps, list):
|
if not isinstance(numps, list):
|
||||||
numps = [numps]
|
numps = [numps]
|
||||||
|
|
||||||
for nump in numps:
|
for nump in numps:
|
||||||
num = nump['num']['pnp']
|
if nump.num.pnp == DotMap():
|
||||||
cadaster_num = nump['pc']['pc1'] + nump['pc']['pc2']
|
continue
|
||||||
|
|
||||||
coords = cls.get_coords_from_cadaster(prov_name, city_name,cadaster_num)
|
num = nump.num.pnp
|
||||||
lon = coords['consulta_coordenadas']['coordenadas']['coord']['geo']['xcen']
|
|
||||||
lat = coords['consulta_coordenadas']['coordenadas']['coord']['geo']['ycen']
|
if nump.pc == DotMap():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if nump.pc.pc1 == DotMap() or nump.pc.pc2 == DotMap():
|
||||||
|
continue
|
||||||
|
|
||||||
|
cadaster_num = nump.pc.pc1 + nump.pc.pc2
|
||||||
|
|
||||||
|
coords_map = cls.get_coords_from_cadaster(prov_name, city_name, cadaster_num)
|
||||||
|
|
||||||
|
lon = coords_map.consulta_coordenadas.coordenadas.coord.geo.xcen
|
||||||
|
if lon == DotMap():
|
||||||
|
lon = None
|
||||||
|
|
||||||
|
lat = coords_map.consulta_coordenadas.coordenadas.coord.geo.ycen
|
||||||
|
if lat == DotMap():
|
||||||
|
lat = None
|
||||||
|
|
||||||
''' Adding to tracking file'''
|
''' Adding to tracking file'''
|
||||||
logger.info('{},{}'.format(lon, lat))
|
logger.info('{},{}'.format(lon, lat))
|
||||||
|
@ -140,6 +140,7 @@ class ScrapperHTML(Scrapper):
|
||||||
counter += 1
|
counter += 1
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrap_cadaster_full_code(cls, full_cadaster, delimitacion, municipio, x=None, y=None, picture=None):
|
def scrap_cadaster_full_code(cls, full_cadaster, delimitacion, municipio, x=None, y=None, picture=None):
|
||||||
url_ref = cls.URL_REF_FULL.format(full_cadaster, full_cadaster, delimitacion, municipio)
|
url_ref = cls.URL_REF_FULL.format(full_cadaster, full_cadaster, delimitacion, municipio)
|
||||||
|
@ -150,13 +151,14 @@ class ScrapperHTML(Scrapper):
|
||||||
parsed_html = BeautifulSoup(html, features="html.parser")
|
parsed_html = BeautifulSoup(html, features="html.parser")
|
||||||
return ScrapperHTML.parse_html_parcela(parsed_html, x, y, picture)
|
return ScrapperHTML.parse_html_parcela(parsed_html, x, y, picture)
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrap_cadaster(cls, cadaster, delimitacion=None, municipio=None, x=None, y=None, pictures=False):
|
def scrap_cadaster(cls, cadaster, delimitacion=None, municipio=None, x=None, y=None, pictures=False):
|
||||||
rc_1 = cadaster[0:7]
|
rc_1 = cadaster[0:7]
|
||||||
rc_2 = cadaster[7:14]
|
rc_2 = cadaster[7:14]
|
||||||
url_ref = cls.URL_REF.format(rc_1, rc_2)
|
url_ref = cls.URL_REF.format(rc_1, rc_2)
|
||||||
|
|
||||||
logger.debug("[|||||||| ] URL for cadastral data: {}".format(url_ref))
|
logger.debug("URL for cadastral data: {}".format(url_ref))
|
||||||
|
|
||||||
f_ref = urlopen(url_ref)
|
f_ref = urlopen(url_ref)
|
||||||
data_ref = f_ref.read()
|
data_ref = f_ref.read()
|
||||||
|
@ -191,7 +193,8 @@ class ScrapperHTML(Scrapper):
|
||||||
partial_cadaster_ref = partial_cadaster.find("b")
|
partial_cadaster_ref = partial_cadaster.find("b")
|
||||||
logger.debug("-->Partial cadaster: {}".format(partial_cadaster_ref.text))
|
logger.debug("-->Partial cadaster: {}".format(partial_cadaster_ref.text))
|
||||||
partial_cadaster_text = partial_cadaster_ref.text.strip()
|
partial_cadaster_text = partial_cadaster_ref.text.strip()
|
||||||
cadaster = ScrapperHTML.scrap_cadaster_full_code(partial_cadaster_text, delimitacion, municipio, x, y, picture)
|
cadaster = ScrapperHTML.scrap_cadaster_full_code(partial_cadaster_text, delimitacion, municipio, x, y,
|
||||||
|
picture)
|
||||||
cadasters.append(cadaster)
|
cadasters.append(cadaster)
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
|
@ -200,12 +203,10 @@ class ScrapperHTML(Scrapper):
|
||||||
|
|
||||||
cadasters.append(cadaster)
|
cadasters.append(cadaster)
|
||||||
|
|
||||||
logger.debug("[|||||||||||] SUCCESS!")
|
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
return cadasters
|
return cadasters
|
||||||
|
|
||||||
""" Parsing """
|
""" Parsing """
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def parse_html_parcela(cls, parsed_html, x=None, y=None, picture=None):
|
def parse_html_parcela(cls, parsed_html, x=None, y=None, picture=None):
|
||||||
description = parsed_html.find(id='ctl00_Contenido_tblInmueble')
|
description = parsed_html.find(id='ctl00_Contenido_tblInmueble')
|
||||||
|
@ -229,7 +230,7 @@ class ScrapperHTML(Scrapper):
|
||||||
descriptive_data[field_name] = descriptive_data[field_name].split(' ')[0]
|
descriptive_data[field_name] = descriptive_data[field_name].split(' ')[0]
|
||||||
descriptive_data[field_name] = descriptive_data[field_name].split('\xa0')[0]
|
descriptive_data[field_name] = descriptive_data[field_name].split('\xa0')[0]
|
||||||
elif field_header.text == u'Localización':
|
elif field_header.text == u'Localización':
|
||||||
descriptive_data[field_name] = field_value.encode_contents().decode('utf-8').replace('<br/>', config['separator']).replace('<br>', config['separator'])
|
descriptive_data[field_name] = field_value.encode_contents().decode('utf-8').replace('<br/>',config['separator']).replace('<br>', config['separator'])
|
||||||
|
|
||||||
'''Graphical Surface'''
|
'''Graphical Surface'''
|
||||||
fields = parsed_html.find(id='ctl00_Contenido_tblFinca').find_all('div')
|
fields = parsed_html.find(id='ctl00_Contenido_tblFinca').find_all('div')
|
||||||
|
@ -253,7 +254,9 @@ class ScrapperHTML(Scrapper):
|
||||||
continue
|
continue
|
||||||
columns = construction.find_all('span')
|
columns = construction.find_all('span')
|
||||||
|
|
||||||
descriptive_data[u'Construcciones'].append(dict(uso=columns[0].text, escalera=columns[1].text, planta=columns[2].text, puerta=columns[3].text, superficie=columns[4].text, tipo=columns[5].text, fecha=columns[6].text))
|
descriptive_data[u'Construcciones'].append(
|
||||||
|
dict(uso=columns[0].text, escalera=columns[1].text, planta=columns[2].text, puerta=columns[3].text,
|
||||||
|
superficie=columns[4].text, tipo=columns[5].text, fecha=columns[6].text))
|
||||||
|
|
||||||
cadaster_entry = CadasterEntryHTML(descriptive_data)
|
cadaster_entry = CadasterEntryHTML(descriptive_data)
|
||||||
return cadaster_entry
|
return cadaster_entry
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import json
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from urllib import error
|
from urllib import error
|
||||||
|
|
||||||
|
@ -12,6 +11,8 @@ from src.librecatastro.scrapping.scrapper import Scrapper
|
||||||
from src.settings import config
|
from src.settings import config
|
||||||
from src.utils.cadastro_logger import CadastroLogger
|
from src.utils.cadastro_logger import CadastroLogger
|
||||||
|
|
||||||
|
from dotmap import DotMap
|
||||||
|
|
||||||
'''Logger'''
|
'''Logger'''
|
||||||
logger = CadastroLogger(__name__).logger
|
logger = CadastroLogger(__name__).logger
|
||||||
|
|
||||||
|
@ -20,118 +21,98 @@ class ScrapperXML(Scrapper):
|
||||||
"""Scrapper class for Catastro XML"""
|
"""Scrapper class for Catastro XML"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
super().__init__()
|
||||||
|
|
||||||
""" Scrapping main calls """
|
""" Scrapping main calls """
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrap_coord(cls, x, y, pictures=False):
|
def scrap_coord(cls, x, y, pictures=False):
|
||||||
"""Scraps properties by coordinates"""
|
"""Scraps properties by coordinates"""
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
params = {'SRS': 'EPSG:4230', 'Coordenada_X': x, 'Coordenada_Y': y}
|
params = {'SRS': 'EPSG:4230', 'Coordenada_X': x, 'Coordenada_Y': y}
|
||||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCoordenadas.asmx/Consulta_RCCOOR")
|
url = cls.URL_LOCATIONS_BASE.format("/OVCCoordenadas.asmx/Consulta_RCCOOR")
|
||||||
response = requests.get(url, params=params)
|
response = requests.get(url, params=params)
|
||||||
|
|
||||||
logger.debug("====Longitude: {} Latitude: {}====".format(x, y))
|
logger.debug("====Longitude: {} Latitude: {}====".format(x, y))
|
||||||
logger.debug("[||| ] URL for coordinates: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
logger.debug("URL for coordinates: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
||||||
|
|
||||||
xml = response.content
|
xml_dict_map = DotMap(xmltodict.parse(response.content, process_namespaces=False, xml_attribs=False))
|
||||||
xml_dict = xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)
|
|
||||||
pc1 = None
|
pc1 = None
|
||||||
pc2 = None
|
pc2 = None
|
||||||
if 'coordenadas' in xml_dict['consulta_coordenadas'] and 'coord' in xml_dict['consulta_coordenadas']['coordenadas']:
|
if xml_dict_map.consulta_coordenadas.coordenadas.coord.pc != DotMap():
|
||||||
pc1 = xml_dict['consulta_coordenadas']['coordenadas']['coord']['pc']['pc1'] if 'pc' in xml_dict['consulta_coordenadas']['coordenadas']['coord'] else None
|
pc1 = xml_dict_map.consulta_coordenadas.coordenadas.coord.pc.pc1
|
||||||
pc2 = xml_dict['consulta_coordenadas']['coordenadas']['coord']['pc']['pc2'] if 'pc' in xml_dict['consulta_coordenadas']['coordenadas']['coord'] else None
|
if pc1 == DotMap():
|
||||||
if pc1 is not None and pc2 is not None:
|
pc1 = None
|
||||||
logger.debug("||||| ] FOUND!")
|
|
||||||
|
|
||||||
entry = cls.get_cadaster_entries_by_cadaster('', '', ''.join([pc1,pc2]))
|
pc2 = xml_dict_map.consulta_coordenadas.coordenadas.coord.pc.pc2
|
||||||
cadaster_entry = CadasterEntryXML(entry, x, y)
|
if pc2 == DotMap():
|
||||||
|
pc2 = None
|
||||||
|
|
||||||
|
if pc1 is not None and pc2 is not None:
|
||||||
|
|
||||||
|
entry = cls.get_cadaster_entries_by_cadaster('', '', ''.join([pc1, pc2]))
|
||||||
|
picture = None
|
||||||
|
if entry.consulta_dnp.bico.bi.dt.loine != DotMap():
|
||||||
|
# Parcela
|
||||||
|
if pictures:
|
||||||
|
prov_num = entry.consulta_dnp.bico.bi.dt.loine.cp
|
||||||
|
city_num = entry.consulta_dnp.bico.bi.dt.cmc
|
||||||
|
if prov_num != DotMap() and city_num != DotMap():
|
||||||
|
picture = cls.scrap_site_picture(prov_num, city_num, ''.join([pc1, pc2]))
|
||||||
|
cadaster_entry = CadasterEntryXML.create_from_bico(entry, x, y, picture)
|
||||||
cadaster_entry.to_elasticsearch()
|
cadaster_entry.to_elasticsearch()
|
||||||
logger.debug("[|||||||||||] SUCCESS!")
|
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
results.append(cadaster_entry)
|
||||||
|
elif entry.consulta_dnp.lrcdnp.rcdnp != DotMap():
|
||||||
|
# Multiparcela
|
||||||
|
parcelas = entry.consulta_dnp.lrcdnp.rcdnp
|
||||||
|
if not isinstance(parcelas, list):
|
||||||
|
parcelas = [parcelas]
|
||||||
|
for parcela in parcelas:
|
||||||
|
if pictures:
|
||||||
|
prov_num = parcela.dt.loine.cp
|
||||||
|
city_num = parcela.dt.cmc
|
||||||
|
if prov_num != DotMap() and city_num != DotMap():
|
||||||
|
picture = cls.scrap_site_picture(prov_num, city_num, ''.join([pc1, pc2]))
|
||||||
|
|
||||||
|
cadaster = parcela.rc.pc1 if parcela.rc.pc1 != DotMap() else ''
|
||||||
|
cadaster += parcela.rc.pc2 if parcela.rc.pc2 != DotMap() else ''
|
||||||
|
cadaster += parcela.rc.car if parcela.rc.car != DotMap() else ''
|
||||||
|
cadaster += parcela.rc.cc1 if parcela.rc.cc1 != DotMap() else ''
|
||||||
|
cadaster += parcela.rc.cc2 if parcela.rc.cc2 != DotMap() else ''
|
||||||
|
|
||||||
|
parcela = cls.get_cadaster_entries_by_cadaster('', '', cadaster)
|
||||||
|
cadaster_entry = CadasterEntryXML(parcela, x, y, picture)
|
||||||
|
cadaster_entry.to_elasticsearch()
|
||||||
|
|
||||||
|
results.append(cadaster_entry)
|
||||||
|
|
||||||
|
sleep(config['sleep_time'])
|
||||||
|
return results
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrap_provinces(cls, prov_list, pictures=False):
|
def scrap_provinces(cls, prov_list, pictures=False):
|
||||||
"""Scraps properties by addresses"""
|
for prov_name, prov_num, city_name, city_num, address, tv, nv in cls.get_address_iter(prov_list):
|
||||||
|
if tv == DotMap() or nv == DotMap():
|
||||||
provinces = ScrapperXML.get_provinces()['consulta_provinciero']['provinciero']['prov']
|
|
||||||
|
|
||||||
for province in provinces:
|
|
||||||
prov_name = province['np']
|
|
||||||
|
|
||||||
if len(prov_list) > 0 and prov_name not in prov_list:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
cities = ScrapperXML.get_cities(prov_name)['consulta_municipiero']['municipiero']['muni']
|
|
||||||
for city in cities:
|
|
||||||
city_name = city['nm']
|
|
||||||
addresses = ScrapperXML.get_addresses(prov_name, city_name)['consulta_callejero']['callejero'][
|
|
||||||
'calle']
|
|
||||||
for address in addresses:
|
|
||||||
address_dir = address['dir']
|
|
||||||
tv = address_dir['tv']
|
|
||||||
nv = address_dir['nv']
|
|
||||||
|
|
||||||
num_scrapping_fails = 10
|
num_scrapping_fails = 10
|
||||||
counter = 1
|
counter = 1
|
||||||
while num_scrapping_fails > 0:
|
while num_scrapping_fails > 0:
|
||||||
try:
|
try:
|
||||||
cadaster = cls.get_cadaster_by_address(prov_name, city_name, tv, nv, counter)
|
cadaster = cls.get_cadaster_by_address(prov_name, city_name, tv, nv, counter)
|
||||||
if 'lerr' in cadaster['consulta_numerero'] and \
|
res = cls.process_xml_by_address(cadaster, prov_name, city_name, tv, nv)
|
||||||
'err' in cadaster['consulta_numerero']['lerr'] and \
|
if len(res) < 1:
|
||||||
'cod' in cadaster['consulta_numerero']['lerr']['err'] and \
|
|
||||||
cadaster['consulta_numerero']['lerr']['err']['cod'] == '43':
|
|
||||||
num_scrapping_fails -= 1
|
num_scrapping_fails -= 1
|
||||||
else:
|
else:
|
||||||
logger.debug("||||| ] FOUND!")
|
|
||||||
|
|
||||||
numps = cadaster['consulta_numerero']['numerero']['nump']
|
|
||||||
|
|
||||||
if not isinstance(numps, list):
|
|
||||||
numps = [numps]
|
|
||||||
|
|
||||||
for nump in numps:
|
|
||||||
num = nump['num']['pnp']
|
|
||||||
cadaster_num = nump['pc']['pc1'] + nump['pc']['pc2']
|
|
||||||
|
|
||||||
coords = cls.get_coords_from_cadaster(prov_name, city_name,
|
|
||||||
cadaster_num)
|
|
||||||
lon = coords['consulta_coordenadas']['coordenadas']['coord']['geo']['xcen']
|
|
||||||
lat = coords['consulta_coordenadas']['coordenadas']['coord']['geo']['ycen']
|
|
||||||
|
|
||||||
''' Adding to tracking file'''
|
|
||||||
logger.info('{},{}'.format(lon, lat))
|
|
||||||
|
|
||||||
num_scrapping_fails = 10
|
num_scrapping_fails = 10
|
||||||
|
|
||||||
entry = cls.get_cadaster_entries_by_address(prov_name, city_name, tv,
|
|
||||||
nv, num)
|
|
||||||
|
|
||||||
if 'bico' in entry['consulta_dnp']:
|
|
||||||
# Parcela
|
|
||||||
cadaster_entry = CadasterEntryXML(entry, lon, lat)
|
|
||||||
cadaster_entry.to_elasticsearch()
|
|
||||||
elif 'lrcdnp' in entry['consulta_dnp']:
|
|
||||||
# Multiparcela
|
|
||||||
for site in entry['consulta_dnp']['lrcdnp']['rcdnp']:
|
|
||||||
cadaster = site['rc']['pc1'] + \
|
|
||||||
site['rc']['pc2'] + \
|
|
||||||
site['rc']['car'] + \
|
|
||||||
site['rc']['cc1'] + \
|
|
||||||
site['rc']['cc2']
|
|
||||||
sub_entry = cls.get_cadaster_entries_by_cadaster(prov_name,
|
|
||||||
city_name,
|
|
||||||
cadaster)
|
|
||||||
cadaster_entry = CadasterEntryXML(sub_entry, lon, lat)
|
|
||||||
cadaster_entry.to_elasticsearch()
|
|
||||||
sleep(config['sleep_time'])
|
|
||||||
|
|
||||||
logger.debug("[|||||||||||] SUCCESS!")
|
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
"ERROR AT ADDRESS {} {} {} {} {}".format(tv, nv, num, prov_name, city_name))
|
"ERROR AT ADDRESS {} {} {} {} {}".format(tv, nv, counter, prov_name, city_name))
|
||||||
logger.error("=============================================")
|
logger.error("=============================================")
|
||||||
logger.error(e, exc_info=True)
|
logger.error(e, exc_info=True)
|
||||||
logger.error("...sleeping...")
|
logger.error("...sleeping...")
|
||||||
|
@ -142,7 +123,7 @@ class ScrapperXML(Scrapper):
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
"ERROR AT ADDRESS {} {} {} {} {}".format(tv, nv, num, prov_name, city_name))
|
"ERROR AT ADDRESS {} {} {} {} {}".format(tv, nv, counter, prov_name, city_name))
|
||||||
logger.error("=============================================")
|
logger.error("=============================================")
|
||||||
logger.error(e, exc_info=True)
|
logger.error(e, exc_info=True)
|
||||||
logger.error("=============================================")
|
logger.error("=============================================")
|
||||||
|
@ -150,3 +131,81 @@ class ScrapperXML(Scrapper):
|
||||||
|
|
||||||
counter += 1
|
counter += 1
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def process_xml_by_address(cls, numerero_map, prov_name, city_name, tv, nv):
|
||||||
|
results = []
|
||||||
|
if numerero_map.consulta_numerero.lerr.err.cod != DotMap():
|
||||||
|
return results
|
||||||
|
|
||||||
|
numps = numerero_map.consulta_numerero.numerero.nump
|
||||||
|
|
||||||
|
if not isinstance(numps, list):
|
||||||
|
numps = [numps]
|
||||||
|
|
||||||
|
for nump in numps:
|
||||||
|
if nump.num.pnp == DotMap():
|
||||||
|
continue
|
||||||
|
|
||||||
|
num = nump.num.pnp
|
||||||
|
|
||||||
|
if nump.pc == DotMap():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if nump.pc.pc1 == DotMap() or nump.pc.pc2 == DotMap():
|
||||||
|
continue
|
||||||
|
|
||||||
|
cadaster_num = nump.pc.pc1 + nump.pc.pc2
|
||||||
|
|
||||||
|
coords_map = cls.get_coords_from_cadaster(prov_name, city_name, cadaster_num)
|
||||||
|
|
||||||
|
lon = coords_map.consulta_coordenadas.coordenadas.coord.geo.xcen
|
||||||
|
if lon == DotMap():
|
||||||
|
lon = None
|
||||||
|
|
||||||
|
lat = coords_map.consulta_coordenadas.coordenadas.coord.geo.ycen
|
||||||
|
if lat == DotMap():
|
||||||
|
lat = None
|
||||||
|
|
||||||
|
''' Adding to tracking file'''
|
||||||
|
logger.info('{},{}'.format(lon, lat))
|
||||||
|
|
||||||
|
entry_map = cls.get_cadaster_entries_by_address(prov_name, city_name, tv, nv, num)
|
||||||
|
picture = None
|
||||||
|
if entry_map.consulta_dnp.bico != DotMap():
|
||||||
|
|
||||||
|
prov_num = entry_map.consulta_dnp.bico.bi.dt.loine.cp
|
||||||
|
city_num = entry_map.consulta_dnp.bico.bi.dt.loine.cm
|
||||||
|
|
||||||
|
if prov_num != DotMap() and city_num != DotMap():
|
||||||
|
picture = cls.scrap_site_picture(prov_num, city_num, cadaster_num)
|
||||||
|
|
||||||
|
# Parcela
|
||||||
|
cadaster_entry = CadasterEntryXML(entry_map, lon, lat, picture)
|
||||||
|
results.append(cadaster_entry)
|
||||||
|
cadaster_entry.to_elasticsearch()
|
||||||
|
|
||||||
|
sleep(config['sleep_time'])
|
||||||
|
elif entry_map.consulta_dnp.lrcdnp.rcdnp != DotMap():
|
||||||
|
# Multiparcela
|
||||||
|
for site in entry_map.consulta_dnp.lrcdnp.rcdnp:
|
||||||
|
site_map = DotMap(site)
|
||||||
|
|
||||||
|
if site_map.rc == DotMap():
|
||||||
|
continue
|
||||||
|
|
||||||
|
cadaster = site_map.rc.pc1 + site_map.rc.pc2 + site_map.rc.car + site_map.rc.cc1 + site_map.rc.cc2
|
||||||
|
sub_entry = cls.get_cadaster_entries_by_cadaster(prov_name, city_name, cadaster)
|
||||||
|
|
||||||
|
prov_num = entry_map.consulta_dnp.lrcdnp.rcdnp.loine.cp
|
||||||
|
city_num = entry_map.consulta_dnp.lrcdnp.rcdnp.loine.cm
|
||||||
|
|
||||||
|
if prov_num != DotMap() and city_num != DotMap():
|
||||||
|
picture = cls.scrap_site_picture(prov_num, city_num, cadaster)
|
||||||
|
|
||||||
|
cadaster_entry = CadasterEntryXML(sub_entry, lon, lat, picture)
|
||||||
|
results.append(cadaster_entry)
|
||||||
|
cadaster_entry.to_elasticsearch()
|
||||||
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
|
@ -4,6 +4,7 @@ from urllib.request import urlopen
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import xmltodict
|
import xmltodict
|
||||||
|
from dotmap import DotMap
|
||||||
|
|
||||||
from src.settings import config
|
from src.settings import config
|
||||||
from src.utils.cadastro_logger import CadastroLogger
|
from src.utils.cadastro_logger import CadastroLogger
|
||||||
|
@ -36,7 +37,7 @@ class Scrapper:
|
||||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaProvincia")
|
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaProvincia")
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
xml = response.content
|
xml = response.content
|
||||||
return xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)
|
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_cities(cls, provincia, municipio=None):
|
def get_cities(cls, provincia, municipio=None):
|
||||||
|
@ -48,7 +49,7 @@ class Scrapper:
|
||||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaMunicipio")
|
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaMunicipio")
|
||||||
response = requests.get(url, params=params)
|
response = requests.get(url, params=params)
|
||||||
xml = response.content
|
xml = response.content
|
||||||
return xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)
|
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_addresses(cls, provincia, municipio, tipovia=None, nombrevia=None):
|
def get_addresses(cls, provincia, municipio, tipovia=None, nombrevia=None):
|
||||||
|
@ -66,7 +67,56 @@ class Scrapper:
|
||||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaVia")
|
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaVia")
|
||||||
response = requests.get(url, params=params)
|
response = requests.get(url, params=params)
|
||||||
xml = response.content
|
xml = response.content
|
||||||
return xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)
|
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_address_iter(cls, prov_list=None):
|
||||||
|
"""Scraps properties by addresses"""
|
||||||
|
|
||||||
|
if prov_list is None:
|
||||||
|
prov_list = []
|
||||||
|
|
||||||
|
provinces = cls.get_provinces().consulta_provinciero.provinciero.prov
|
||||||
|
if provinces == DotMap():
|
||||||
|
logger.error("No provinces available right now (Service is down?)")
|
||||||
|
yield None
|
||||||
|
|
||||||
|
for province in provinces:
|
||||||
|
prov_name = province.np
|
||||||
|
prov_num = province.cpine
|
||||||
|
if prov_name == DotMap() or prov_num == DotMap():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(prov_list) > 0 and prov_name not in prov_list:
|
||||||
|
continue
|
||||||
|
|
||||||
|
cities = cls.get_cities(prov_name).consulta_municipiero.municipiero.muni
|
||||||
|
if cities == DotMap():
|
||||||
|
logger.error("No cities available right now (Service is down?)")
|
||||||
|
return
|
||||||
|
|
||||||
|
for city in cities:
|
||||||
|
city_name = city.nm
|
||||||
|
city_num = city.locat.cmc
|
||||||
|
|
||||||
|
if city_name == DotMap() or city_num == DotMap():
|
||||||
|
continue
|
||||||
|
|
||||||
|
addresses = cls.get_addresses(prov_name, city_name).consulta_callejero.callejero.calle
|
||||||
|
if addresses == DotMap():
|
||||||
|
logger.error("No addresses available right now (Service is down?)")
|
||||||
|
return
|
||||||
|
|
||||||
|
for address in addresses:
|
||||||
|
|
||||||
|
address_dir = address.dir
|
||||||
|
tv = address_dir.tv
|
||||||
|
nv = address_dir.nv
|
||||||
|
|
||||||
|
if tv == DotMap() or nv == DotMap():
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
yield (prov_name, prov_num, city_name, city_num, address_dir, tv, nv)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_cadaster_by_address(cls, provincia, municipio, tipovia, nombrevia, numero):
|
def get_cadaster_by_address(cls, provincia, municipio, tipovia, nombrevia, numero):
|
||||||
|
@ -79,11 +129,11 @@ class Scrapper:
|
||||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaNumero")
|
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaNumero")
|
||||||
|
|
||||||
logger.debug("====Dir: {} {} {} {} {}====".format(tipovia, nombrevia, numero, municipio, provincia))
|
logger.debug("====Dir: {} {} {} {} {}====".format(tipovia, nombrevia, numero, municipio, provincia))
|
||||||
logger.debug("[||| ] URL for address: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
logger.debug("URL for address: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
||||||
|
|
||||||
response = requests.get(url, params=params)
|
response = requests.get(url, params=params)
|
||||||
xml = response.content
|
xml = response.content
|
||||||
return xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)
|
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_cadaster_entries_by_address(cls, provincia, municipio, sigla, calle, numero, bloque=None, escalera=None,
|
def get_cadaster_entries_by_address(cls, provincia, municipio, sigla, calle, numero, bloque=None, escalera=None,
|
||||||
|
@ -111,11 +161,11 @@ class Scrapper:
|
||||||
params['Puerta'] = ''
|
params['Puerta'] = ''
|
||||||
|
|
||||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/Consulta_DNPLOC")
|
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/Consulta_DNPLOC")
|
||||||
logger.debug("[|||||||||| ] URL for entry: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
logger.debug("URL for entry: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
||||||
|
|
||||||
response = requests.get(url, params=params)
|
response = requests.get(url, params=params)
|
||||||
xml = response.content
|
xml = response.content
|
||||||
return xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)
|
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_cadaster_entries_by_cadaster(cls, provincia, municipio, rc):
|
def get_cadaster_entries_by_cadaster(cls, provincia, municipio, rc):
|
||||||
|
@ -125,27 +175,28 @@ class Scrapper:
|
||||||
"RC": rc}
|
"RC": rc}
|
||||||
|
|
||||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/Consulta_DNPRC")
|
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/Consulta_DNPRC")
|
||||||
logger.debug("[|||||||||| ] URL for entry: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
logger.debug("URL for entry: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
||||||
response = requests.get(url, params=params)
|
response = requests.get(url, params=params)
|
||||||
xml = response.content
|
xml = response.content
|
||||||
return xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)
|
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_coords_from_cadaster(cls, provincia, municipio, cadaster):
|
def get_coords_from_cadaster(cls, provincia, municipio, cadaster):
|
||||||
params = {'Provincia': provincia, 'Municipio': municipio, 'SRS': 'EPSG:4230', 'RC': cadaster}
|
params = {'Provincia': provincia, 'Municipio': municipio, 'SRS': 'EPSG:4230', 'RC': cadaster}
|
||||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCoordenadas.asmx/Consulta_CPMRC")
|
url = cls.URL_LOCATIONS_BASE.format("/OVCCoordenadas.asmx/Consulta_CPMRC")
|
||||||
|
|
||||||
logger.debug("[|||||||| ] URL for coords: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
logger.debug("URL for coords: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
||||||
|
|
||||||
response = requests.get(url, params=params)
|
response = requests.get(url, params=params)
|
||||||
xml = response.content
|
xml = response.content
|
||||||
return xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)
|
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def scrap_site_picture(cls, prov_name, city_name, cadaster):
|
def scrap_site_picture(cls, prov_num, city_num, cadaster):
|
||||||
url_pic = cls.URL_PICTURES.format(prov_name, city_name, cadaster, config['width_px'], config['height_px'])
|
|
||||||
|
|
||||||
logger.debug("[|||||||| ] URL for picture data: {}".format(url_pic))
|
url_pic = cls.URL_PICTURES.format(prov_num, city_num, cadaster, config['width_px'], config['height_px'])
|
||||||
|
|
||||||
|
logger.debug("URL for picture data: {}".format(url_pic))
|
||||||
|
|
||||||
f_pic = urlopen(url_pic)
|
f_pic = urlopen(url_pic)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
|
from dotmap import DotMap
|
||||||
|
|
||||||
from src.librecatastro.domain.cadaster_entry.cadaster_entry_xml import CadasterEntryXML
|
from src.librecatastro.domain.cadaster_entry.cadaster_entry_xml import CadasterEntryXML
|
||||||
from src.librecatastro.scrapping.format.scrapper_xml import ScrapperXML
|
from src.librecatastro.scrapping.format.scrapper_xml import ScrapperXML
|
||||||
from src.settings import config
|
from src.settings import config
|
||||||
|
@ -8,23 +11,32 @@ from src.settings import config
|
||||||
|
|
||||||
class ScrapperXMLTests(unittest.TestCase):
|
class ScrapperXMLTests(unittest.TestCase):
|
||||||
def test_scrapper_retrieves_dict_provinces(self):
|
def test_scrapper_retrieves_dict_provinces(self):
|
||||||
self.assertEqual(ScrapperXML.get_provinces()['consulta_provinciero']['control']['cuprov'], '48')
|
self.assertEqual(ScrapperXML.get_provinces().consulta_provinciero.control.cuprov, '48')
|
||||||
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
def test_scrapper_retrieves_dict_cities(self):
|
def test_scrapper_retrieves_dict_cities(self):
|
||||||
self.assertEqual(ScrapperXML.get_cities('ALACANT')['consulta_municipiero']['control']['cumun'],'141')
|
self.assertEqual(ScrapperXML.get_cities('ALACANT').consulta_municipiero.control.cumun, '141')
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
def test_scrapper_retrieves_dict_addresses(self):
|
def test_scrapper_retrieves_dict_addresses(self):
|
||||||
self.assertEqual(ScrapperXML.get_addresses('ALACANT','AGOST')['consulta_callejero']['control']['cuca'], '117')
|
self.assertEqual(ScrapperXML.get_addresses('ALACANT','AGOST').consulta_callejero.control.cuca, '117')
|
||||||
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
|
def test_scrapper_retrieves_dict_addresses_iter(self):
|
||||||
|
iterator = ScrapperXML.get_address_iter()
|
||||||
|
address = iterator.__next__()
|
||||||
|
self.assertEqual(address[1], '15')
|
||||||
|
self.assertEqual(address[3], '7')
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
def test_scrapper_creates_cadaster_entry(self):
|
def test_scrapper_creates_cadaster_entry(self):
|
||||||
print(ScrapperXML.get_cadaster_entries_by_cadaster('','', '6375620YH0567S0001GW'))
|
dotmap_res = ScrapperXML.get_cadaster_entries_by_cadaster('', '', '6375620YH0567S0001GW')
|
||||||
|
self.assertNotEqual(dotmap_res, DotMap())
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
def test_scrapper_creates_cadaster_entry_and_stores_in_elasticsearch(self):
|
def test_scrapper_creates_cadaster_entry_and_stores_in_elasticsearch(self):
|
||||||
entry = ScrapperXML.get_cadaster_entries_by_cadaster('', '', '6375620YH0567S0001GW')
|
entry = ScrapperXML.get_cadaster_entries_by_cadaster('', '', '6375620YH0567S0001GW')
|
||||||
cadaster_entry = CadasterEntryXML(entry, None, None)
|
cadaster_entry = CadasterEntryXML(entry)
|
||||||
cadaster_entry.to_elasticsearch()
|
cadaster_entry.to_elasticsearch()
|
||||||
self.assertIsNotNone(cadaster_entry.from_elasticsearch())
|
self.assertIsNotNone(cadaster_entry.from_elasticsearch())
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
@ -36,17 +48,20 @@ class ScrapperXMLTests(unittest.TestCase):
|
||||||
nv = u'ARZÓN'
|
nv = u'ARZÓN'
|
||||||
num = 21
|
num = 21
|
||||||
entry = ScrapperXML.get_cadaster_entries_by_address(prov_name, city_name, tv, nv, num)
|
entry = ScrapperXML.get_cadaster_entries_by_address(prov_name, city_name, tv, nv, num)
|
||||||
for site in entry['consulta_dnp']['lrcdnp']['rcdnp']:
|
counter = 0
|
||||||
cadaster = site['rc']['pc1'] + \
|
for site in entry.consulta_dnp.lrcdnp.rcdnp:
|
||||||
site['rc']['pc2'] + \
|
cadaster = site.rc.pc1 + \
|
||||||
site['rc']['car'] + \
|
site.rc.pc2 + \
|
||||||
site['rc']['cc1'] + \
|
site.rc.car + \
|
||||||
site['rc']['cc2']
|
site.rc.cc1 + \
|
||||||
|
site.rc.cc2
|
||||||
sub_entry = ScrapperXML.get_cadaster_entries_by_cadaster(prov_name, city_name, cadaster)
|
sub_entry = ScrapperXML.get_cadaster_entries_by_cadaster(prov_name, city_name, cadaster)
|
||||||
cadaster_entry = CadasterEntryXML(sub_entry, None, None)
|
cadaster_entry = CadasterEntryXML(sub_entry)
|
||||||
cadaster_entry.to_elasticsearch()
|
cadaster_entry.to_elasticsearch()
|
||||||
self.assertIsNotNone(cadaster_entry.from_elasticsearch())
|
self.assertIsNotNone(cadaster_entry.from_elasticsearch())
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
counter += 1
|
||||||
|
self.assertEqual(counter, 2)
|
||||||
|
|
||||||
def test_no_use_creates_entry_in_elasticsearch(self):
|
def test_no_use_creates_entry_in_elasticsearch(self):
|
||||||
prov_name = u'A CORUÑA'
|
prov_name = u'A CORUÑA'
|
||||||
|
@ -55,14 +70,14 @@ class ScrapperXMLTests(unittest.TestCase):
|
||||||
nv = u'BARCALA'
|
nv = u'BARCALA'
|
||||||
num = 5
|
num = 5
|
||||||
entry = ScrapperXML.get_cadaster_entries_by_address(prov_name, city_name, tv, nv, num)
|
entry = ScrapperXML.get_cadaster_entries_by_address(prov_name, city_name, tv, nv, num)
|
||||||
for site in entry['consulta_dnp']['lrcdnp']['rcdnp']:
|
for site in entry.consulta_dnp.lrcdnp.rcdnp:
|
||||||
cadaster = site['rc']['pc1'] + \
|
cadaster = site.rc.pc1 + \
|
||||||
site['rc']['pc2'] + \
|
site.rc.pc2 + \
|
||||||
site['rc']['car'] + \
|
site.rc.car + \
|
||||||
site['rc']['cc1'] + \
|
site.rc.cc1 + \
|
||||||
site['rc']['cc2']
|
site.rc.cc2
|
||||||
sub_entry = ScrapperXML.get_cadaster_entries_by_cadaster(prov_name, city_name, cadaster)
|
sub_entry = ScrapperXML.get_cadaster_entries_by_cadaster(prov_name, city_name, cadaster)
|
||||||
cadaster_entry = CadasterEntryXML(sub_entry, None, None)
|
cadaster_entry = CadasterEntryXML(sub_entry)
|
||||||
cadaster_entry.to_elasticsearch()
|
cadaster_entry.to_elasticsearch()
|
||||||
self.assertIsNotNone(cadaster_entry.from_elasticsearch())
|
self.assertIsNotNone(cadaster_entry.from_elasticsearch())
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
@ -74,18 +89,43 @@ class ScrapperXMLTests(unittest.TestCase):
|
||||||
nv = u'CASTELAO'
|
nv = u'CASTELAO'
|
||||||
num = 1
|
num = 1
|
||||||
entry = ScrapperXML.get_cadaster_entries_by_address(prov_name, city_name, tv, nv, num)
|
entry = ScrapperXML.get_cadaster_entries_by_address(prov_name, city_name, tv, nv, num)
|
||||||
for site in entry['consulta_dnp']['lrcdnp']['rcdnp']:
|
for site in entry.consulta_dnp.lrcdnp.rcdnp:
|
||||||
cadaster = site['rc']['pc1'] + \
|
cadaster = site.rc.pc1 + \
|
||||||
site['rc']['pc2'] + \
|
site.rc.pc2 + \
|
||||||
site['rc']['car'] + \
|
site.rc.car + \
|
||||||
site['rc']['cc1'] + \
|
site.rc.cc1 + \
|
||||||
site['rc']['cc2']
|
site.rc.cc2
|
||||||
sub_entry = ScrapperXML.get_cadaster_entries_by_cadaster(prov_name, city_name, cadaster)
|
sub_entry = ScrapperXML.get_cadaster_entries_by_cadaster(prov_name, city_name, cadaster)
|
||||||
cadaster_entry = CadasterEntryXML(sub_entry, None, None)
|
cadaster_entry = CadasterEntryXML(sub_entry)
|
||||||
cadaster_entry.to_elasticsearch()
|
cadaster_entry.to_elasticsearch()
|
||||||
self.assertIsNotNone(cadaster_entry.from_elasticsearch())
|
self.assertIsNotNone(cadaster_entry.from_elasticsearch())
|
||||||
sleep(config['sleep_time'])
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
|
def test_no_es_pt_pu_creates_entry_in_elasticsearch_2(self):
|
||||||
|
# CL BEATAS 4 MADRID ALCALA DE HENARES
|
||||||
|
prov_name = u'MADRID'
|
||||||
|
city_name = u'ALCALA DE HENARES'
|
||||||
|
tv = u'CL'
|
||||||
|
nv = u'BEATAS'
|
||||||
|
num = 4
|
||||||
|
entry = ScrapperXML.get_cadaster_entries_by_address(prov_name, city_name, tv, nv, num)
|
||||||
|
for site in entry.consulta_dnp.lrcdnp.rcdnp:
|
||||||
|
cadaster = site.rc.pc1 + \
|
||||||
|
site.rc.pc2 + \
|
||||||
|
site.rc.car + \
|
||||||
|
site.rc.cc1 + \
|
||||||
|
site.rc.cc2
|
||||||
|
sub_entry = ScrapperXML.get_cadaster_entries_by_cadaster(prov_name, city_name, cadaster)
|
||||||
|
cadaster_entry = CadasterEntryXML(sub_entry)
|
||||||
|
cadaster_entry.to_elasticsearch()
|
||||||
|
self.assertIsNotNone(cadaster_entry.from_elasticsearch())
|
||||||
|
sleep(config['sleep_time'])
|
||||||
|
|
||||||
|
def test_multiparcela_creates_n_entries(self):
|
||||||
|
lon = -9.2503
|
||||||
|
lat = 42.9723
|
||||||
|
self.assertEqual(len(ScrapperXML.scrap_coord(lon, lat, True)), 2)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user