Adds documentation of most of functions and methods.
This commit is contained in:
parent
5ea0da9449
commit
6c6da34adf
|
@ -1,6 +1,9 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
""" Script that initializes 'cadaster' index in ElasticSearch so that
|
||||
is also well supported by Kibana Visualization """
|
||||
|
||||
from src.utils.elasticsearch_utils import ElasticSearchUtils
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -11,10 +11,15 @@ from src.librecatastro.scrapping.searchers.provinces_searcher import ProvincesSe
|
|||
from src.settings import config
|
||||
from src.tests.servers_health.server_health_tests import ServerHealthTests
|
||||
|
||||
""" Main executable file, that processes all the arguments with ArguentParser
|
||||
and do different functionalities, like listing provinces, cities, scrapping from HTML,
|
||||
from XML, based on coordinates files or a list of provinces, etc """
|
||||
|
||||
if __name__ == "__main__":
|
||||
''' Definition of command line arguments for ArgumentParser '''
|
||||
parser = argparse.ArgumentParser(description='Runs libreCadastro')
|
||||
parser.add_argument('--coords', action='store_true', dest='coords', default=False)
|
||||
parser.add_argument('--filenames', action='store', nargs='+', dest='filenames', default=[])
|
||||
parser.add_argument('--coords-filenames', action='store', nargs='+', dest='filenames', default=[])
|
||||
parser.add_argument('--provinces', action='store', nargs='+', dest='provinces', default=[])
|
||||
parser.add_argument('--sleep', action='store', dest='sleep', type=int, default=5)
|
||||
parser.add_argument('--html', dest='html', default=False, action='store_true')
|
||||
|
@ -25,14 +30,17 @@ if __name__ == "__main__":
|
|||
parser.add_argument('--listcities', action='store', nargs=1, dest='listcities', default=[])
|
||||
parser.add_argument('--health', action='store_true', dest='health', default=False)
|
||||
|
||||
''' Parsing of arguments from command line'''
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
''' Configuration of parameters to be overwriten '''
|
||||
if args.sleep:
|
||||
config['sleep_time'] = args.sleep
|
||||
|
||||
if args.scale:
|
||||
config['scale'] = args.scale
|
||||
|
||||
''' Listing functionality '''
|
||||
if args.listprovinces:
|
||||
ProvincesSearcher.list_provinces()
|
||||
exit(0)
|
||||
|
@ -41,10 +49,12 @@ if __name__ == "__main__":
|
|||
ProvincesSearcher.list_cities(args.listcities[0])
|
||||
exit(0)
|
||||
|
||||
''' Cadaster server checking '''
|
||||
if args.health:
|
||||
ServerHealthTests.healthcheck()
|
||||
exit(0)
|
||||
|
||||
''' Scrapping / Parsing core functionality'''
|
||||
parser = ParserHTML if args.html else ParserXML
|
||||
|
||||
filenames = args.filenames
|
||||
|
|
|
@ -12,6 +12,7 @@ logger = CadastroLogger(__name__).logger
|
|||
|
||||
class Address:
|
||||
""" Domain class for storing Address in Catastro parsers"""
|
||||
|
||||
def __init__(self, address):
|
||||
self.full_address = address.strip()
|
||||
|
||||
|
@ -31,7 +32,7 @@ class Address:
|
|||
self.site = None
|
||||
self.lot = None
|
||||
|
||||
''' NLP searchers '''
|
||||
''' Parses address and extracts different information '''
|
||||
self.first_line = self.get_first_line()
|
||||
self.second_line = self.get_second_line()
|
||||
|
||||
|
@ -48,6 +49,7 @@ class Address:
|
|||
self.city = self.get_city()
|
||||
|
||||
def get_first_line(self):
|
||||
""" Extracts first line of the address if not yet done"""
|
||||
if self.first_line is not None:
|
||||
return self.first_line
|
||||
second_line = re.search(config['separator'], self.full_address)
|
||||
|
@ -64,6 +66,7 @@ class Address:
|
|||
else self.full_address
|
||||
|
||||
def get_second_line(self):
|
||||
""" Extracts the second line of the address if not yet done """
|
||||
if self.second_line is not None:
|
||||
return self.second_line
|
||||
|
||||
|
@ -81,9 +84,11 @@ class Address:
|
|||
else self.full_address
|
||||
|
||||
def get_street(self):
|
||||
""" Alias to get_first_line() """
|
||||
return self.get_first_line()
|
||||
|
||||
def get_doorway(self):
|
||||
""" Gets the doorway(escalera) of an address """
|
||||
if self.doorway is not None:
|
||||
return self.doorway
|
||||
|
||||
|
@ -96,6 +101,7 @@ class Address:
|
|||
return doorway_text
|
||||
|
||||
def get_door(self):
|
||||
""" Gets the door (puerta) of an address """
|
||||
if self.door is not None:
|
||||
return self.door
|
||||
|
||||
|
@ -108,6 +114,7 @@ class Address:
|
|||
return door_text
|
||||
|
||||
def get_floor(self):
|
||||
""" Gets the floor (planta) of an address """
|
||||
if self.floor is not None:
|
||||
return self.floor
|
||||
|
||||
|
@ -120,6 +127,7 @@ class Address:
|
|||
return floor_text
|
||||
|
||||
def get_site(self):
|
||||
""" Gets the site (polígono) of an address """
|
||||
if self.site is not None:
|
||||
return self.site
|
||||
|
||||
|
@ -132,6 +140,7 @@ class Address:
|
|||
return site_text
|
||||
|
||||
def get_lot(self):
|
||||
""" Gets the lot (parcela) of an address """
|
||||
if self.lot is not None:
|
||||
return self.lot
|
||||
|
||||
|
@ -144,6 +153,7 @@ class Address:
|
|||
return lot_text
|
||||
|
||||
def get_cp(self):
|
||||
""" Gets the postal code (CP) of an address """
|
||||
if self.cp is not None:
|
||||
return self.cp
|
||||
|
||||
|
@ -157,6 +167,7 @@ class Address:
|
|||
return cp_text
|
||||
|
||||
def get_city(self):
|
||||
""" Gets the city of an address """
|
||||
if self.city is not None:
|
||||
return self.city
|
||||
|
||||
|
@ -168,6 +179,7 @@ class Address:
|
|||
return city_text.strip()
|
||||
|
||||
def get_province(self):
|
||||
""" Gets the province of an address """
|
||||
if self.province_parentheses is not None and self.province is not None:
|
||||
return self.province_parentheses, self.province
|
||||
|
||||
|
@ -180,4 +192,5 @@ class Address:
|
|||
return province_parentheses_text, province_text
|
||||
|
||||
def to_json(self):
|
||||
""" Transforms an object of this class into a json dict """
|
||||
return dict(full_address=self.full_address, first_line=self.first_line, second_line=self.second_line, street=self.street, cp=self.cp, city=self.city, province_parantheses=self.province_parentheses, province=self.province, doorway=self.doorway, floor=self.floor, door=self.door, site=self.site, lot=self.lot)
|
||||
|
|
|
@ -16,7 +16,9 @@ logger = CadastroLogger(__name__).logger
|
|||
|
||||
|
||||
class CadasterEntry:
|
||||
|
||||
""" Parent class that stores information about an entry in the Cadaster.
|
||||
It's instantiated from children classes (CadasterEntryHTML and CadasterEntryXML,
|
||||
not directly"""
|
||||
@abstractmethod
|
||||
def __init__(self, cadaster_entry):
|
||||
self.address = cadaster_entry.address
|
||||
|
@ -33,13 +35,16 @@ class CadasterEntry:
|
|||
logger.debug(self.to_json_recursive())
|
||||
|
||||
def to_json(self):
|
||||
""" Transforms an object of this class into a json dict """
|
||||
return dict(address=self.address, cadaster=self.cadaster, type=self.type, use=self.use, surface=self.surface, year=self.year, location=self.location, gsurface=self.gsurface, constructions=self.constructions, picture=str(self.picture) if self.picture is not None else None, timestamp=self.timestamp)
|
||||
|
||||
def to_json_recursive(self):
|
||||
""" Transforms recursively this object and all the objects inside that implement to_json() """
|
||||
return json.dumps(self.to_json(), cls=JSONEncoder, sort_keys=True,
|
||||
indent=4, separators=(',', ': '))
|
||||
|
||||
def to_elasticsearch(self):
|
||||
""" Gets stored in elastic search """
|
||||
es = Elasticsearch()
|
||||
res = None
|
||||
try:
|
||||
|
@ -54,6 +59,7 @@ class CadasterEntry:
|
|||
return res
|
||||
|
||||
def from_elasticsearch(self):
|
||||
""" Confirms for checking purposes that the entry has been stored in elastic search previously """
|
||||
res = False
|
||||
es = Elasticsearch()
|
||||
try:
|
||||
|
|
|
@ -13,7 +13,8 @@ logger = CadastroLogger(__name__).logger
|
|||
|
||||
|
||||
class CadasterEntryHTML(CadasterEntry):
|
||||
"""Cadaster class, that stores all the information about a surface and its properties"""
|
||||
"""Cadaster class, obtained from parsing HTML, that inheritates from Cadaster, and
|
||||
stores all the information about a surface and its properties"""
|
||||
|
||||
def __init__(self, description_data):
|
||||
self.address = Address(description_data[u'Localización'])
|
||||
|
|
|
@ -16,7 +16,8 @@ logger = CadastroLogger(__name__).logger
|
|||
|
||||
|
||||
class CadasterEntryXML(CadasterEntry):
|
||||
"""Cadaster class, that stores all the information about a surface and its properties"""
|
||||
"""Cadaster class, obtained from parsing XML, that inheritates from Cadaster, and
|
||||
stores all the information about a surface and its properties"""
|
||||
|
||||
def __init__(self, xml, lon=None, lat=None, picture=None):
|
||||
self.address = None
|
||||
|
@ -96,4 +97,4 @@ class CadasterEntryXML(CadasterEntry):
|
|||
|
||||
self.picture = picture
|
||||
self.timestamp = str(datetime.now())
|
||||
super().__init__(self)
|
||||
super().__init__(self)
|
||||
|
|
|
@ -5,6 +5,7 @@ from src.librecatastro.domain.reform import Reform
|
|||
|
||||
|
||||
class Construction:
|
||||
""" Class that stores constructions / reforms of a property"""
|
||||
def __init__(self, construction):
|
||||
self.use = construction[u'uso']
|
||||
self.doorway = construction[u'escalera']
|
||||
|
@ -14,4 +15,5 @@ class Construction:
|
|||
self.reform = Reform(dict(tipo=construction[u'tipo'], fecha=construction[u'fecha']))
|
||||
|
||||
def to_json(self):
|
||||
""" Transforms an object of this class into a json dict """
|
||||
return dict(use=self.use, doorway=self.doorway, floor=self.floor, door=self.door, surface=self.surface, reform=self.reform)
|
||||
|
|
|
@ -7,11 +7,14 @@ logger = CadastroLogger(__name__).logger
|
|||
|
||||
|
||||
class Location:
|
||||
""" Class that stores longitude and latitude of a property (xcen, ycen) by Cadaster
|
||||
in a format supported by Kibana (longitude=lon, latitude=lat)"""
|
||||
def __init__(self, longitude, latitude):
|
||||
self.lon = float(longitude) if longitude is not None else None
|
||||
self.lat = float(latitude) if latitude is not None else None
|
||||
|
||||
def to_json(self):
|
||||
""" Transforms an object of this class into a json dict """
|
||||
if self.lon is None and self.lat is None:
|
||||
return None
|
||||
else:
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
class Reform:
|
||||
""" Class that stores type of reform(reforma) and year """
|
||||
def __init__(self, reform_data):
|
||||
self.type = reform_data['tipo'].strip()
|
||||
self.year = reform_data['fecha'].strip()
|
||||
pass
|
||||
|
||||
def to_json(self):
|
||||
""" Transforms an object of this class into a json dict """
|
||||
return dict(type=self.type, year=self.year)
|
|
@ -1,6 +1,8 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from abc import abstractmethod
|
||||
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
|
||||
'''Logger'''
|
||||
|
@ -8,16 +10,19 @@ logger = CadastroLogger(__name__).logger
|
|||
|
||||
|
||||
class Parser:
|
||||
"""Generic Parser class"""
|
||||
""" Parser signature class that defines common interfaces for HTMLParser and XMLParser
|
||||
classes """
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
''' Processing signatures'''
|
||||
''' Signatures'''
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def process_search_by_coordinates(cls, x, y, pictures=False):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def process_search_by_provinces(cls, prov_list, pictures=False):
|
||||
pass
|
||||
pass
|
||||
|
|
|
@ -15,7 +15,10 @@ logger = CadastroLogger(__name__).logger
|
|||
|
||||
|
||||
class Scrapper:
|
||||
"""Catastro web services parametrized"""
|
||||
"""Scrapper class, from which inheritates ScrapperHTML and ScrapperXML, and which
|
||||
implements common scrapping functions for both HTML and XML"""
|
||||
|
||||
'''Catastro web services parametrized'''
|
||||
|
||||
URL_PICTURES = "https://www1.sedecatastro.gob.es/Cartografia/GeneraGraficoParcela.aspx?del={}&mun={}&refcat={}&AnchoPixels={}&AltoPixels={}"
|
||||
URL_LOCATIONS_BASE = "http://ovc.catastro.meh.es/ovcservweb/OVCSWLocalizacionRC{}"
|
||||
|
@ -25,6 +28,8 @@ class Scrapper:
|
|||
|
||||
@classmethod
|
||||
def get_provinces(cls):
|
||||
"""Get all provinces registered by Catastro (call only available from XML but used in both XML and HTML)"""
|
||||
|
||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaProvincia")
|
||||
response = requests.get(url)
|
||||
xml = response.content
|
||||
|
@ -33,10 +38,16 @@ class Scrapper:
|
|||
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||
|
||||
@classmethod
|
||||
def get_cities(cls, provincia, municipio=None):
|
||||
params = {'Provincia': provincia}
|
||||
if municipio:
|
||||
params['Municipio'] = municipio
|
||||
def get_cities(cls, prov_name, city_name=None):
|
||||
"""
|
||||
Get all cities registered by Catastro (call only available from XML but used in both XML and HTML)
|
||||
:param prov_name: Name of the province (from Cadaster Province List)
|
||||
:param city_name: Optional. Name of the city (from Cadaster City List) in case a specific city is required
|
||||
:return: DotMap (dict with properties accessible by '.') with all the cities
|
||||
"""
|
||||
params = {'Provincia': prov_name}
|
||||
if city_name:
|
||||
params['Municipio'] = city_name
|
||||
else:
|
||||
params['Municipio'] = ''
|
||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaMunicipio")
|
||||
|
@ -47,15 +58,25 @@ class Scrapper:
|
|||
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||
|
||||
@classmethod
|
||||
def get_addresses(cls, provincia, municipio, tipovia=None, nombrevia=None):
|
||||
params = {'Provincia': provincia,
|
||||
'Municipio': municipio}
|
||||
if tipovia:
|
||||
params['TipoVia'] = tipovia
|
||||
def get_addresses(cls, prov_name, city_name, tv=None, nv=None):
|
||||
"""
|
||||
Get all addresses registered by Catastro (call only available from XML but used in both XML and HTML)
|
||||
|
||||
:param prov_name: Name of the province (from Cadaster Province List)
|
||||
:param city_name: Name of the city (from Cadaster City List)
|
||||
:param tv: Optional. Name of the kind of street (CL, AV ...) in case a specific kind is needed
|
||||
:param nv: Optional. Name of the street in case a specific street is needed
|
||||
:return: DotMap (dict with properties accessible by '.') with all the cities
|
||||
"""
|
||||
|
||||
params = {'Provincia': prov_name,
|
||||
'Municipio': city_name}
|
||||
if tv:
|
||||
params['TipoVia'] = tv
|
||||
else:
|
||||
params['TipoVia'] = ''
|
||||
if nombrevia:
|
||||
params['NombreVia'] = nombrevia
|
||||
if nv:
|
||||
params['NombreVia'] = nv
|
||||
else:
|
||||
params['NombreVia'] = ''
|
||||
|
||||
|
@ -68,8 +89,14 @@ class Scrapper:
|
|||
|
||||
@classmethod
|
||||
def get_address_iter(cls, prov_list=None, start_from=''):
|
||||
"""Scraps properties by addresses"""
|
||||
"""
|
||||
Funcion that, instead of returning all the addresses, returns an iterator to all the addresses of a province list
|
||||
to optimize performance.
|
||||
|
||||
:param prov_list: List of province names to get addresses from (from Cadaster Province List)
|
||||
:param start_from: Optional. Name of the city where to start from in a province (from Cadaster City List)
|
||||
:return: iterator to all the addresses of the provinces of the list
|
||||
"""
|
||||
if prov_list is None:
|
||||
prov_list = []
|
||||
|
||||
|
@ -121,6 +148,14 @@ class Scrapper:
|
|||
|
||||
@classmethod
|
||||
def scrap_site_picture(cls, prov_num, city_num, cadaster):
|
||||
"""
|
||||
Gets the house plan picture.
|
||||
|
||||
:param prov_num: Province number.
|
||||
:param city_num: City number.
|
||||
:param cadaster: Cadaster number.
|
||||
:return: an image, coded in base64.
|
||||
"""
|
||||
|
||||
url_pic = cls.URL_PICTURES.format(prov_num, city_num, cadaster, config['width_px'], config['height_px'])
|
||||
|
||||
|
@ -136,16 +171,26 @@ class Scrapper:
|
|||
return b64_image
|
||||
|
||||
@classmethod
|
||||
def get_cadaster_by_address(cls, provincia, municipio, tipovia, nombrevia, numero):
|
||||
params = {'Provincia': provincia,
|
||||
'Municipio': municipio,
|
||||
'TipoVia': tipovia,
|
||||
'NomVia': nombrevia,
|
||||
'Numero': str(numero)}
|
||||
def get_cadaster_by_address(cls, prov_name, city_name, tv, nv, num):
|
||||
"""
|
||||
Gets the cadaster information, based on an address.
|
||||
|
||||
:param prov_name: Name of the province.
|
||||
:param city_name: Name of the city.
|
||||
:param tv: Kind of street (CL, AV...)
|
||||
:param nv: Name of the street
|
||||
:param num: Number of the street
|
||||
:return: DotMap (dict with properties accessible by '.') with the cadaster information
|
||||
"""
|
||||
params = {'Provincia': prov_name,
|
||||
'Municipio': city_name,
|
||||
'TipoVia': tv,
|
||||
'NomVia': nv,
|
||||
'Numero': str(num)}
|
||||
|
||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaNumero")
|
||||
|
||||
logger.debug("====Dir: {} {} {} {} {}====".format(tipovia, nombrevia, numero, municipio, provincia))
|
||||
logger.debug("====Dir: {} {} {} {} {}====".format(tv, nv, num, city_name, prov_name))
|
||||
logger.debug("URL for address: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
||||
|
||||
response = requests.get(url, params=params)
|
||||
|
@ -155,8 +200,17 @@ class Scrapper:
|
|||
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||
|
||||
@classmethod
|
||||
def get_coords_from_cadaster(cls, provincia, municipio, cadaster):
|
||||
params = {'Provincia': provincia, 'Municipio': municipio, 'SRS': 'EPSG:4326', 'RC': cadaster}
|
||||
def get_coords_from_cadaster(cls, prov_name, city_name, cadaster):
|
||||
"""
|
||||
Returns the lon (xcen) and lat (ycen) of a property, identified by its cadaster number
|
||||
and province and city names.
|
||||
|
||||
:param prov_name: Province name.
|
||||
:param city_name: City name.
|
||||
:param cadaster: Cadaster number.
|
||||
:return: DotMap (dict with properties accessible by '.') with the location information
|
||||
"""
|
||||
params = {'Provincia': prov_name, 'Municipio': city_name, 'SRS': 'EPSG:4326', 'RC': cadaster}
|
||||
url = cls.URL_LOCATIONS_BASE.format("/OVCCoordenadas.asmx/Consulta_CPMRC")
|
||||
|
||||
logger.debug("URL for coordinates: {}".format(url + '?' + urllib.parse.urlencode(params)))
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
from abc import abstractmethod
|
||||
|
||||
|
||||
class Searcher:
|
||||
""" Just a signature, an abstract class just in case we need to define
|
||||
something common for Provinces and Coordinates Searchers """
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self):
|
||||
pass
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from dotmap import DotMap
|
||||
|
||||
from src.librecatastro.scrapping.scrapper import Scrapper
|
||||
from src.librecatastro.scrapping.searcher import Searcher
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
import os
|
||||
|
||||
""" Dict settings file with config parameters"""
|
||||
|
||||
root_path = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
config = {
|
||||
|
|
Loading…
Reference in New Issue