Adds documentation of most of functions and methods.

This commit is contained in:
josejuanmartinez 2019-09-26 16:52:53 +02:00
parent 5ea0da9449
commit 6c6da34adf
14 changed files with 139 additions and 34 deletions

View File

@ -1,6 +1,9 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Script that initializes 'cadaster' index in ElasticSearch so that
is also well supported by Kibana Visualization """
from src.utils.elasticsearch_utils import ElasticSearchUtils
if __name__ == "__main__":

View File

@ -11,10 +11,15 @@ from src.librecatastro.scrapping.searchers.provinces_searcher import ProvincesSe
from src.settings import config
from src.tests.servers_health.server_health_tests import ServerHealthTests
""" Main executable file, that processes all the arguments with ArguentParser
and do different functionalities, like listing provinces, cities, scrapping from HTML,
from XML, based on coordinates files or a list of provinces, etc """
if __name__ == "__main__":
''' Definition of command line arguments for ArgumentParser '''
parser = argparse.ArgumentParser(description='Runs libreCadastro')
parser.add_argument('--coords', action='store_true', dest='coords', default=False)
parser.add_argument('--filenames', action='store', nargs='+', dest='filenames', default=[])
parser.add_argument('--coords-filenames', action='store', nargs='+', dest='filenames', default=[])
parser.add_argument('--provinces', action='store', nargs='+', dest='provinces', default=[])
parser.add_argument('--sleep', action='store', dest='sleep', type=int, default=5)
parser.add_argument('--html', dest='html', default=False, action='store_true')
@ -25,14 +30,17 @@ if __name__ == "__main__":
parser.add_argument('--listcities', action='store', nargs=1, dest='listcities', default=[])
parser.add_argument('--health', action='store_true', dest='health', default=False)
''' Parsing of arguments from command line'''
args = parser.parse_args(sys.argv[1:])
''' Configuration of parameters to be overwriten '''
if args.sleep:
config['sleep_time'] = args.sleep
if args.scale:
config['scale'] = args.scale
''' Listing functionality '''
if args.listprovinces:
ProvincesSearcher.list_provinces()
exit(0)
@ -41,10 +49,12 @@ if __name__ == "__main__":
ProvincesSearcher.list_cities(args.listcities[0])
exit(0)
''' Cadaster server checking '''
if args.health:
ServerHealthTests.healthcheck()
exit(0)
''' Scrapping / Parsing core functionality'''
parser = ParserHTML if args.html else ParserXML
filenames = args.filenames

View File

@ -12,6 +12,7 @@ logger = CadastroLogger(__name__).logger
class Address:
""" Domain class for storing Address in Catastro parsers"""
def __init__(self, address):
self.full_address = address.strip()
@ -31,7 +32,7 @@ class Address:
self.site = None
self.lot = None
''' NLP searchers '''
''' Parses address and extracts different information '''
self.first_line = self.get_first_line()
self.second_line = self.get_second_line()
@ -48,6 +49,7 @@ class Address:
self.city = self.get_city()
def get_first_line(self):
""" Extracts first line of the address if not yet done"""
if self.first_line is not None:
return self.first_line
second_line = re.search(config['separator'], self.full_address)
@ -64,6 +66,7 @@ class Address:
else self.full_address
def get_second_line(self):
""" Extracts the second line of the address if not yet done """
if self.second_line is not None:
return self.second_line
@ -81,9 +84,11 @@ class Address:
else self.full_address
def get_street(self):
""" Alias to get_first_line() """
return self.get_first_line()
def get_doorway(self):
""" Gets the doorway(escalera) of an address """
if self.doorway is not None:
return self.doorway
@ -96,6 +101,7 @@ class Address:
return doorway_text
def get_door(self):
""" Gets the door (puerta) of an address """
if self.door is not None:
return self.door
@ -108,6 +114,7 @@ class Address:
return door_text
def get_floor(self):
""" Gets the floor (planta) of an address """
if self.floor is not None:
return self.floor
@ -120,6 +127,7 @@ class Address:
return floor_text
def get_site(self):
""" Gets the site (polígono) of an address """
if self.site is not None:
return self.site
@ -132,6 +140,7 @@ class Address:
return site_text
def get_lot(self):
""" Gets the lot (parcela) of an address """
if self.lot is not None:
return self.lot
@ -144,6 +153,7 @@ class Address:
return lot_text
def get_cp(self):
""" Gets the postal code (CP) of an address """
if self.cp is not None:
return self.cp
@ -157,6 +167,7 @@ class Address:
return cp_text
def get_city(self):
""" Gets the city of an address """
if self.city is not None:
return self.city
@ -168,6 +179,7 @@ class Address:
return city_text.strip()
def get_province(self):
""" Gets the province of an address """
if self.province_parentheses is not None and self.province is not None:
return self.province_parentheses, self.province
@ -180,4 +192,5 @@ class Address:
return province_parentheses_text, province_text
def to_json(self):
""" Transforms an object of this class into a json dict """
return dict(full_address=self.full_address, first_line=self.first_line, second_line=self.second_line, street=self.street, cp=self.cp, city=self.city, province_parantheses=self.province_parentheses, province=self.province, doorway=self.doorway, floor=self.floor, door=self.door, site=self.site, lot=self.lot)

View File

@ -16,7 +16,9 @@ logger = CadastroLogger(__name__).logger
class CadasterEntry:
""" Parent class that stores information about an entry in the Cadaster.
It's instantiated from children classes (CadasterEntryHTML and CadasterEntryXML,
not directly"""
@abstractmethod
def __init__(self, cadaster_entry):
self.address = cadaster_entry.address
@ -33,13 +35,16 @@ class CadasterEntry:
logger.debug(self.to_json_recursive())
def to_json(self):
""" Transforms an object of this class into a json dict """
return dict(address=self.address, cadaster=self.cadaster, type=self.type, use=self.use, surface=self.surface, year=self.year, location=self.location, gsurface=self.gsurface, constructions=self.constructions, picture=str(self.picture) if self.picture is not None else None, timestamp=self.timestamp)
def to_json_recursive(self):
""" Transforms recursively this object and all the objects inside that implement to_json() """
return json.dumps(self.to_json(), cls=JSONEncoder, sort_keys=True,
indent=4, separators=(',', ': '))
def to_elasticsearch(self):
""" Gets stored in elastic search """
es = Elasticsearch()
res = None
try:
@ -54,6 +59,7 @@ class CadasterEntry:
return res
def from_elasticsearch(self):
""" Confirms for checking purposes that the entry has been stored in elastic search previously """
res = False
es = Elasticsearch()
try:

View File

@ -13,7 +13,8 @@ logger = CadastroLogger(__name__).logger
class CadasterEntryHTML(CadasterEntry):
"""Cadaster class, that stores all the information about a surface and its properties"""
"""Cadaster class, obtained from parsing HTML, that inheritates from Cadaster, and
stores all the information about a surface and its properties"""
def __init__(self, description_data):
self.address = Address(description_data[u'Localización'])

View File

@ -16,7 +16,8 @@ logger = CadastroLogger(__name__).logger
class CadasterEntryXML(CadasterEntry):
"""Cadaster class, that stores all the information about a surface and its properties"""
"""Cadaster class, obtained from parsing XML, that inheritates from Cadaster, and
stores all the information about a surface and its properties"""
def __init__(self, xml, lon=None, lat=None, picture=None):
self.address = None
@ -96,4 +97,4 @@ class CadasterEntryXML(CadasterEntry):
self.picture = picture
self.timestamp = str(datetime.now())
super().__init__(self)
super().__init__(self)

View File

@ -5,6 +5,7 @@ from src.librecatastro.domain.reform import Reform
class Construction:
""" Class that stores constructions / reforms of a property"""
def __init__(self, construction):
self.use = construction[u'uso']
self.doorway = construction[u'escalera']
@ -14,4 +15,5 @@ class Construction:
self.reform = Reform(dict(tipo=construction[u'tipo'], fecha=construction[u'fecha']))
def to_json(self):
""" Transforms an object of this class into a json dict """
return dict(use=self.use, doorway=self.doorway, floor=self.floor, door=self.door, surface=self.surface, reform=self.reform)

View File

@ -7,11 +7,14 @@ logger = CadastroLogger(__name__).logger
class Location:
""" Class that stores longitude and latitude of a property (xcen, ycen) by Cadaster
in a format supported by Kibana (longitude=lon, latitude=lat)"""
def __init__(self, longitude, latitude):
self.lon = float(longitude) if longitude is not None else None
self.lat = float(latitude) if latitude is not None else None
def to_json(self):
""" Transforms an object of this class into a json dict """
if self.lon is None and self.lat is None:
return None
else:

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
class Reform:
""" Class that stores type of reform(reforma) and year """
def __init__(self, reform_data):
self.type = reform_data['tipo'].strip()
self.year = reform_data['fecha'].strip()
pass
def to_json(self):
""" Transforms an object of this class into a json dict """
return dict(type=self.type, year=self.year)

View File

@ -1,6 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from abc import abstractmethod
from src.utils.cadastro_logger import CadastroLogger
'''Logger'''
@ -8,16 +10,19 @@ logger = CadastroLogger(__name__).logger
class Parser:
"""Generic Parser class"""
""" Parser signature class that defines common interfaces for HTMLParser and XMLParser
classes """
def __init__(self):
pass
''' Processing signatures'''
''' Signatures'''
@classmethod
@abstractmethod
def process_search_by_coordinates(cls, x, y, pictures=False):
pass
@classmethod
@abstractmethod
def process_search_by_provinces(cls, prov_list, pictures=False):
pass
pass

View File

@ -15,7 +15,10 @@ logger = CadastroLogger(__name__).logger
class Scrapper:
"""Catastro web services parametrized"""
"""Scrapper class, from which inheritates ScrapperHTML and ScrapperXML, and which
implements common scrapping functions for both HTML and XML"""
'''Catastro web services parametrized'''
URL_PICTURES = "https://www1.sedecatastro.gob.es/Cartografia/GeneraGraficoParcela.aspx?del={}&mun={}&refcat={}&AnchoPixels={}&AltoPixels={}"
URL_LOCATIONS_BASE = "http://ovc.catastro.meh.es/ovcservweb/OVCSWLocalizacionRC{}"
@ -25,6 +28,8 @@ class Scrapper:
@classmethod
def get_provinces(cls):
"""Get all provinces registered by Catastro (call only available from XML but used in both XML and HTML)"""
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaProvincia")
response = requests.get(url)
xml = response.content
@ -33,10 +38,16 @@ class Scrapper:
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
@classmethod
def get_cities(cls, provincia, municipio=None):
params = {'Provincia': provincia}
if municipio:
params['Municipio'] = municipio
def get_cities(cls, prov_name, city_name=None):
"""
Get all cities registered by Catastro (call only available from XML but used in both XML and HTML)
:param prov_name: Name of the province (from Cadaster Province List)
:param city_name: Optional. Name of the city (from Cadaster City List) in case a specific city is required
:return: DotMap (dict with properties accessible by '.') with all the cities
"""
params = {'Provincia': prov_name}
if city_name:
params['Municipio'] = city_name
else:
params['Municipio'] = ''
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaMunicipio")
@ -47,15 +58,25 @@ class Scrapper:
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
@classmethod
def get_addresses(cls, provincia, municipio, tipovia=None, nombrevia=None):
params = {'Provincia': provincia,
'Municipio': municipio}
if tipovia:
params['TipoVia'] = tipovia
def get_addresses(cls, prov_name, city_name, tv=None, nv=None):
"""
Get all addresses registered by Catastro (call only available from XML but used in both XML and HTML)
:param prov_name: Name of the province (from Cadaster Province List)
:param city_name: Name of the city (from Cadaster City List)
:param tv: Optional. Name of the kind of street (CL, AV ...) in case a specific kind is needed
:param nv: Optional. Name of the street in case a specific street is needed
:return: DotMap (dict with properties accessible by '.') with all the cities
"""
params = {'Provincia': prov_name,
'Municipio': city_name}
if tv:
params['TipoVia'] = tv
else:
params['TipoVia'] = ''
if nombrevia:
params['NombreVia'] = nombrevia
if nv:
params['NombreVia'] = nv
else:
params['NombreVia'] = ''
@ -68,8 +89,14 @@ class Scrapper:
@classmethod
def get_address_iter(cls, prov_list=None, start_from=''):
"""Scraps properties by addresses"""
"""
Funcion that, instead of returning all the addresses, returns an iterator to all the addresses of a province list
to optimize performance.
:param prov_list: List of province names to get addresses from (from Cadaster Province List)
:param start_from: Optional. Name of the city where to start from in a province (from Cadaster City List)
:return: iterator to all the addresses of the provinces of the list
"""
if prov_list is None:
prov_list = []
@ -121,6 +148,14 @@ class Scrapper:
@classmethod
def scrap_site_picture(cls, prov_num, city_num, cadaster):
"""
Gets the house plan picture.
:param prov_num: Province number.
:param city_num: City number.
:param cadaster: Cadaster number.
:return: an image, coded in base64.
"""
url_pic = cls.URL_PICTURES.format(prov_num, city_num, cadaster, config['width_px'], config['height_px'])
@ -136,16 +171,26 @@ class Scrapper:
return b64_image
@classmethod
def get_cadaster_by_address(cls, provincia, municipio, tipovia, nombrevia, numero):
params = {'Provincia': provincia,
'Municipio': municipio,
'TipoVia': tipovia,
'NomVia': nombrevia,
'Numero': str(numero)}
def get_cadaster_by_address(cls, prov_name, city_name, tv, nv, num):
"""
Gets the cadaster information, based on an address.
:param prov_name: Name of the province.
:param city_name: Name of the city.
:param tv: Kind of street (CL, AV...)
:param nv: Name of the street
:param num: Number of the street
:return: DotMap (dict with properties accessible by '.') with the cadaster information
"""
params = {'Provincia': prov_name,
'Municipio': city_name,
'TipoVia': tv,
'NomVia': nv,
'Numero': str(num)}
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaNumero")
logger.debug("====Dir: {} {} {} {} {}====".format(tipovia, nombrevia, numero, municipio, provincia))
logger.debug("====Dir: {} {} {} {} {}====".format(tv, nv, num, city_name, prov_name))
logger.debug("URL for address: {}".format(url + '?' + urllib.parse.urlencode(params)))
response = requests.get(url, params=params)
@ -155,8 +200,17 @@ class Scrapper:
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
@classmethod
def get_coords_from_cadaster(cls, provincia, municipio, cadaster):
params = {'Provincia': provincia, 'Municipio': municipio, 'SRS': 'EPSG:4326', 'RC': cadaster}
def get_coords_from_cadaster(cls, prov_name, city_name, cadaster):
"""
Returns the lon (xcen) and lat (ycen) of a property, identified by its cadaster number
and province and city names.
:param prov_name: Province name.
:param city_name: City name.
:param cadaster: Cadaster number.
:return: DotMap (dict with properties accessible by '.') with the location information
"""
params = {'Provincia': prov_name, 'Municipio': city_name, 'SRS': 'EPSG:4326', 'RC': cadaster}
url = cls.URL_LOCATIONS_BASE.format("/OVCCoordenadas.asmx/Consulta_CPMRC")
logger.debug("URL for coordinates: {}".format(url + '?' + urllib.parse.urlencode(params)))

View File

@ -1,7 +1,12 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from abc import abstractmethod
class Searcher:
""" Just a signature, an abstract class just in case we need to define
something common for Provinces and Coordinates Searchers """
@abstractmethod
def __init__(self):
pass

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from dotmap import DotMap
from src.librecatastro.scrapping.scrapper import Scrapper
from src.librecatastro.scrapping.searcher import Searcher
from src.utils.cadastro_logger import CadastroLogger

View File

@ -3,6 +3,8 @@
import os
""" Dict settings file with config parameters"""
root_path = os.path.dirname(os.path.abspath(__file__))
config = {