Adds documentation of most of functions and methods.

This commit is contained in:
josejuanmartinez 2019-09-26 16:52:53 +02:00
parent 5ea0da9449
commit 6c6da34adf
14 changed files with 139 additions and 34 deletions

View File

@ -1,6 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" Script that initializes 'cadaster' index in ElasticSearch so that
is also well supported by Kibana Visualization """
from src.utils.elasticsearch_utils import ElasticSearchUtils from src.utils.elasticsearch_utils import ElasticSearchUtils
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -11,10 +11,15 @@ from src.librecatastro.scrapping.searchers.provinces_searcher import ProvincesSe
from src.settings import config from src.settings import config
from src.tests.servers_health.server_health_tests import ServerHealthTests from src.tests.servers_health.server_health_tests import ServerHealthTests
""" Main executable file, that processes all the arguments with ArguentParser
and do different functionalities, like listing provinces, cities, scrapping from HTML,
from XML, based on coordinates files or a list of provinces, etc """
if __name__ == "__main__": if __name__ == "__main__":
''' Definition of command line arguments for ArgumentParser '''
parser = argparse.ArgumentParser(description='Runs libreCadastro') parser = argparse.ArgumentParser(description='Runs libreCadastro')
parser.add_argument('--coords', action='store_true', dest='coords', default=False) parser.add_argument('--coords', action='store_true', dest='coords', default=False)
parser.add_argument('--filenames', action='store', nargs='+', dest='filenames', default=[]) parser.add_argument('--coords-filenames', action='store', nargs='+', dest='filenames', default=[])
parser.add_argument('--provinces', action='store', nargs='+', dest='provinces', default=[]) parser.add_argument('--provinces', action='store', nargs='+', dest='provinces', default=[])
parser.add_argument('--sleep', action='store', dest='sleep', type=int, default=5) parser.add_argument('--sleep', action='store', dest='sleep', type=int, default=5)
parser.add_argument('--html', dest='html', default=False, action='store_true') parser.add_argument('--html', dest='html', default=False, action='store_true')
@ -25,14 +30,17 @@ if __name__ == "__main__":
parser.add_argument('--listcities', action='store', nargs=1, dest='listcities', default=[]) parser.add_argument('--listcities', action='store', nargs=1, dest='listcities', default=[])
parser.add_argument('--health', action='store_true', dest='health', default=False) parser.add_argument('--health', action='store_true', dest='health', default=False)
''' Parsing of arguments from command line'''
args = parser.parse_args(sys.argv[1:]) args = parser.parse_args(sys.argv[1:])
''' Configuration of parameters to be overwriten '''
if args.sleep: if args.sleep:
config['sleep_time'] = args.sleep config['sleep_time'] = args.sleep
if args.scale: if args.scale:
config['scale'] = args.scale config['scale'] = args.scale
''' Listing functionality '''
if args.listprovinces: if args.listprovinces:
ProvincesSearcher.list_provinces() ProvincesSearcher.list_provinces()
exit(0) exit(0)
@ -41,10 +49,12 @@ if __name__ == "__main__":
ProvincesSearcher.list_cities(args.listcities[0]) ProvincesSearcher.list_cities(args.listcities[0])
exit(0) exit(0)
''' Cadaster server checking '''
if args.health: if args.health:
ServerHealthTests.healthcheck() ServerHealthTests.healthcheck()
exit(0) exit(0)
''' Scrapping / Parsing core functionality'''
parser = ParserHTML if args.html else ParserXML parser = ParserHTML if args.html else ParserXML
filenames = args.filenames filenames = args.filenames

View File

@ -12,6 +12,7 @@ logger = CadastroLogger(__name__).logger
class Address: class Address:
""" Domain class for storing Address in Catastro parsers""" """ Domain class for storing Address in Catastro parsers"""
def __init__(self, address): def __init__(self, address):
self.full_address = address.strip() self.full_address = address.strip()
@ -31,7 +32,7 @@ class Address:
self.site = None self.site = None
self.lot = None self.lot = None
''' NLP searchers ''' ''' Parses address and extracts different information '''
self.first_line = self.get_first_line() self.first_line = self.get_first_line()
self.second_line = self.get_second_line() self.second_line = self.get_second_line()
@ -48,6 +49,7 @@ class Address:
self.city = self.get_city() self.city = self.get_city()
def get_first_line(self): def get_first_line(self):
""" Extracts first line of the address if not yet done"""
if self.first_line is not None: if self.first_line is not None:
return self.first_line return self.first_line
second_line = re.search(config['separator'], self.full_address) second_line = re.search(config['separator'], self.full_address)
@ -64,6 +66,7 @@ class Address:
else self.full_address else self.full_address
def get_second_line(self): def get_second_line(self):
""" Extracts the second line of the address if not yet done """
if self.second_line is not None: if self.second_line is not None:
return self.second_line return self.second_line
@ -81,9 +84,11 @@ class Address:
else self.full_address else self.full_address
def get_street(self): def get_street(self):
""" Alias to get_first_line() """
return self.get_first_line() return self.get_first_line()
def get_doorway(self): def get_doorway(self):
""" Gets the doorway(escalera) of an address """
if self.doorway is not None: if self.doorway is not None:
return self.doorway return self.doorway
@ -96,6 +101,7 @@ class Address:
return doorway_text return doorway_text
def get_door(self): def get_door(self):
""" Gets the door (puerta) of an address """
if self.door is not None: if self.door is not None:
return self.door return self.door
@ -108,6 +114,7 @@ class Address:
return door_text return door_text
def get_floor(self): def get_floor(self):
""" Gets the floor (planta) of an address """
if self.floor is not None: if self.floor is not None:
return self.floor return self.floor
@ -120,6 +127,7 @@ class Address:
return floor_text return floor_text
def get_site(self): def get_site(self):
""" Gets the site (polígono) of an address """
if self.site is not None: if self.site is not None:
return self.site return self.site
@ -132,6 +140,7 @@ class Address:
return site_text return site_text
def get_lot(self): def get_lot(self):
""" Gets the lot (parcela) of an address """
if self.lot is not None: if self.lot is not None:
return self.lot return self.lot
@ -144,6 +153,7 @@ class Address:
return lot_text return lot_text
def get_cp(self): def get_cp(self):
""" Gets the postal code (CP) of an address """
if self.cp is not None: if self.cp is not None:
return self.cp return self.cp
@ -157,6 +167,7 @@ class Address:
return cp_text return cp_text
def get_city(self): def get_city(self):
""" Gets the city of an address """
if self.city is not None: if self.city is not None:
return self.city return self.city
@ -168,6 +179,7 @@ class Address:
return city_text.strip() return city_text.strip()
def get_province(self): def get_province(self):
""" Gets the province of an address """
if self.province_parentheses is not None and self.province is not None: if self.province_parentheses is not None and self.province is not None:
return self.province_parentheses, self.province return self.province_parentheses, self.province
@ -180,4 +192,5 @@ class Address:
return province_parentheses_text, province_text return province_parentheses_text, province_text
def to_json(self): def to_json(self):
""" Transforms an object of this class into a json dict """
return dict(full_address=self.full_address, first_line=self.first_line, second_line=self.second_line, street=self.street, cp=self.cp, city=self.city, province_parantheses=self.province_parentheses, province=self.province, doorway=self.doorway, floor=self.floor, door=self.door, site=self.site, lot=self.lot) return dict(full_address=self.full_address, first_line=self.first_line, second_line=self.second_line, street=self.street, cp=self.cp, city=self.city, province_parantheses=self.province_parentheses, province=self.province, doorway=self.doorway, floor=self.floor, door=self.door, site=self.site, lot=self.lot)

View File

@ -16,7 +16,9 @@ logger = CadastroLogger(__name__).logger
class CadasterEntry: class CadasterEntry:
""" Parent class that stores information about an entry in the Cadaster.
It's instantiated from children classes (CadasterEntryHTML and CadasterEntryXML,
not directly"""
@abstractmethod @abstractmethod
def __init__(self, cadaster_entry): def __init__(self, cadaster_entry):
self.address = cadaster_entry.address self.address = cadaster_entry.address
@ -33,13 +35,16 @@ class CadasterEntry:
logger.debug(self.to_json_recursive()) logger.debug(self.to_json_recursive())
def to_json(self): def to_json(self):
""" Transforms an object of this class into a json dict """
return dict(address=self.address, cadaster=self.cadaster, type=self.type, use=self.use, surface=self.surface, year=self.year, location=self.location, gsurface=self.gsurface, constructions=self.constructions, picture=str(self.picture) if self.picture is not None else None, timestamp=self.timestamp) return dict(address=self.address, cadaster=self.cadaster, type=self.type, use=self.use, surface=self.surface, year=self.year, location=self.location, gsurface=self.gsurface, constructions=self.constructions, picture=str(self.picture) if self.picture is not None else None, timestamp=self.timestamp)
def to_json_recursive(self): def to_json_recursive(self):
""" Transforms recursively this object and all the objects inside that implement to_json() """
return json.dumps(self.to_json(), cls=JSONEncoder, sort_keys=True, return json.dumps(self.to_json(), cls=JSONEncoder, sort_keys=True,
indent=4, separators=(',', ': ')) indent=4, separators=(',', ': '))
def to_elasticsearch(self): def to_elasticsearch(self):
""" Gets stored in elastic search """
es = Elasticsearch() es = Elasticsearch()
res = None res = None
try: try:
@ -54,6 +59,7 @@ class CadasterEntry:
return res return res
def from_elasticsearch(self): def from_elasticsearch(self):
""" Confirms for checking purposes that the entry has been stored in elastic search previously """
res = False res = False
es = Elasticsearch() es = Elasticsearch()
try: try:

View File

@ -13,7 +13,8 @@ logger = CadastroLogger(__name__).logger
class CadasterEntryHTML(CadasterEntry): class CadasterEntryHTML(CadasterEntry):
"""Cadaster class, that stores all the information about a surface and its properties""" """Cadaster class, obtained from parsing HTML, that inheritates from Cadaster, and
stores all the information about a surface and its properties"""
def __init__(self, description_data): def __init__(self, description_data):
self.address = Address(description_data[u'Localización']) self.address = Address(description_data[u'Localización'])

View File

@ -16,7 +16,8 @@ logger = CadastroLogger(__name__).logger
class CadasterEntryXML(CadasterEntry): class CadasterEntryXML(CadasterEntry):
"""Cadaster class, that stores all the information about a surface and its properties""" """Cadaster class, obtained from parsing XML, that inheritates from Cadaster, and
stores all the information about a surface and its properties"""
def __init__(self, xml, lon=None, lat=None, picture=None): def __init__(self, xml, lon=None, lat=None, picture=None):
self.address = None self.address = None
@ -96,4 +97,4 @@ class CadasterEntryXML(CadasterEntry):
self.picture = picture self.picture = picture
self.timestamp = str(datetime.now()) self.timestamp = str(datetime.now())
super().__init__(self) super().__init__(self)

View File

@ -5,6 +5,7 @@ from src.librecatastro.domain.reform import Reform
class Construction: class Construction:
""" Class that stores constructions / reforms of a property"""
def __init__(self, construction): def __init__(self, construction):
self.use = construction[u'uso'] self.use = construction[u'uso']
self.doorway = construction[u'escalera'] self.doorway = construction[u'escalera']
@ -14,4 +15,5 @@ class Construction:
self.reform = Reform(dict(tipo=construction[u'tipo'], fecha=construction[u'fecha'])) self.reform = Reform(dict(tipo=construction[u'tipo'], fecha=construction[u'fecha']))
def to_json(self): def to_json(self):
""" Transforms an object of this class into a json dict """
return dict(use=self.use, doorway=self.doorway, floor=self.floor, door=self.door, surface=self.surface, reform=self.reform) return dict(use=self.use, doorway=self.doorway, floor=self.floor, door=self.door, surface=self.surface, reform=self.reform)

View File

@ -7,11 +7,14 @@ logger = CadastroLogger(__name__).logger
class Location: class Location:
""" Class that stores longitude and latitude of a property (xcen, ycen) by Cadaster
in a format supported by Kibana (longitude=lon, latitude=lat)"""
def __init__(self, longitude, latitude): def __init__(self, longitude, latitude):
self.lon = float(longitude) if longitude is not None else None self.lon = float(longitude) if longitude is not None else None
self.lat = float(latitude) if latitude is not None else None self.lat = float(latitude) if latitude is not None else None
def to_json(self): def to_json(self):
""" Transforms an object of this class into a json dict """
if self.lon is None and self.lat is None: if self.lon is None and self.lat is None:
return None return None
else: else:

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
class Reform: class Reform:
""" Class that stores type of reform(reforma) and year """
def __init__(self, reform_data): def __init__(self, reform_data):
self.type = reform_data['tipo'].strip() self.type = reform_data['tipo'].strip()
self.year = reform_data['fecha'].strip() self.year = reform_data['fecha'].strip()
pass
def to_json(self): def to_json(self):
""" Transforms an object of this class into a json dict """
return dict(type=self.type, year=self.year) return dict(type=self.type, year=self.year)

View File

@ -1,6 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from abc import abstractmethod
from src.utils.cadastro_logger import CadastroLogger from src.utils.cadastro_logger import CadastroLogger
'''Logger''' '''Logger'''
@ -8,16 +10,19 @@ logger = CadastroLogger(__name__).logger
class Parser: class Parser:
"""Generic Parser class""" """ Parser signature class that defines common interfaces for HTMLParser and XMLParser
classes """
def __init__(self): def __init__(self):
pass pass
''' Processing signatures''' ''' Signatures'''
@classmethod @classmethod
@abstractmethod
def process_search_by_coordinates(cls, x, y, pictures=False): def process_search_by_coordinates(cls, x, y, pictures=False):
pass pass
@classmethod @classmethod
@abstractmethod
def process_search_by_provinces(cls, prov_list, pictures=False): def process_search_by_provinces(cls, prov_list, pictures=False):
pass pass

View File

@ -15,7 +15,10 @@ logger = CadastroLogger(__name__).logger
class Scrapper: class Scrapper:
"""Catastro web services parametrized""" """Scrapper class, from which inheritates ScrapperHTML and ScrapperXML, and which
implements common scrapping functions for both HTML and XML"""
'''Catastro web services parametrized'''
URL_PICTURES = "https://www1.sedecatastro.gob.es/Cartografia/GeneraGraficoParcela.aspx?del={}&mun={}&refcat={}&AnchoPixels={}&AltoPixels={}" URL_PICTURES = "https://www1.sedecatastro.gob.es/Cartografia/GeneraGraficoParcela.aspx?del={}&mun={}&refcat={}&AnchoPixels={}&AltoPixels={}"
URL_LOCATIONS_BASE = "http://ovc.catastro.meh.es/ovcservweb/OVCSWLocalizacionRC{}" URL_LOCATIONS_BASE = "http://ovc.catastro.meh.es/ovcservweb/OVCSWLocalizacionRC{}"
@ -25,6 +28,8 @@ class Scrapper:
@classmethod @classmethod
def get_provinces(cls): def get_provinces(cls):
"""Get all provinces registered by Catastro (call only available from XML but used in both XML and HTML)"""
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaProvincia") url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaProvincia")
response = requests.get(url) response = requests.get(url)
xml = response.content xml = response.content
@ -33,10 +38,16 @@ class Scrapper:
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)) return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
@classmethod @classmethod
def get_cities(cls, provincia, municipio=None): def get_cities(cls, prov_name, city_name=None):
params = {'Provincia': provincia} """
if municipio: Get all cities registered by Catastro (call only available from XML but used in both XML and HTML)
params['Municipio'] = municipio :param prov_name: Name of the province (from Cadaster Province List)
:param city_name: Optional. Name of the city (from Cadaster City List) in case a specific city is required
:return: DotMap (dict with properties accessible by '.') with all the cities
"""
params = {'Provincia': prov_name}
if city_name:
params['Municipio'] = city_name
else: else:
params['Municipio'] = '' params['Municipio'] = ''
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaMunicipio") url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaMunicipio")
@ -47,15 +58,25 @@ class Scrapper:
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)) return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
@classmethod @classmethod
def get_addresses(cls, provincia, municipio, tipovia=None, nombrevia=None): def get_addresses(cls, prov_name, city_name, tv=None, nv=None):
params = {'Provincia': provincia, """
'Municipio': municipio} Get all addresses registered by Catastro (call only available from XML but used in both XML and HTML)
if tipovia:
params['TipoVia'] = tipovia :param prov_name: Name of the province (from Cadaster Province List)
:param city_name: Name of the city (from Cadaster City List)
:param tv: Optional. Name of the kind of street (CL, AV ...) in case a specific kind is needed
:param nv: Optional. Name of the street in case a specific street is needed
:return: DotMap (dict with properties accessible by '.') with all the cities
"""
params = {'Provincia': prov_name,
'Municipio': city_name}
if tv:
params['TipoVia'] = tv
else: else:
params['TipoVia'] = '' params['TipoVia'] = ''
if nombrevia: if nv:
params['NombreVia'] = nombrevia params['NombreVia'] = nv
else: else:
params['NombreVia'] = '' params['NombreVia'] = ''
@ -68,8 +89,14 @@ class Scrapper:
@classmethod @classmethod
def get_address_iter(cls, prov_list=None, start_from=''): def get_address_iter(cls, prov_list=None, start_from=''):
"""Scraps properties by addresses""" """
Funcion that, instead of returning all the addresses, returns an iterator to all the addresses of a province list
to optimize performance.
:param prov_list: List of province names to get addresses from (from Cadaster Province List)
:param start_from: Optional. Name of the city where to start from in a province (from Cadaster City List)
:return: iterator to all the addresses of the provinces of the list
"""
if prov_list is None: if prov_list is None:
prov_list = [] prov_list = []
@ -121,6 +148,14 @@ class Scrapper:
@classmethod @classmethod
def scrap_site_picture(cls, prov_num, city_num, cadaster): def scrap_site_picture(cls, prov_num, city_num, cadaster):
"""
Gets the house plan picture.
:param prov_num: Province number.
:param city_num: City number.
:param cadaster: Cadaster number.
:return: an image, coded in base64.
"""
url_pic = cls.URL_PICTURES.format(prov_num, city_num, cadaster, config['width_px'], config['height_px']) url_pic = cls.URL_PICTURES.format(prov_num, city_num, cadaster, config['width_px'], config['height_px'])
@ -136,16 +171,26 @@ class Scrapper:
return b64_image return b64_image
@classmethod @classmethod
def get_cadaster_by_address(cls, provincia, municipio, tipovia, nombrevia, numero): def get_cadaster_by_address(cls, prov_name, city_name, tv, nv, num):
params = {'Provincia': provincia, """
'Municipio': municipio, Gets the cadaster information, based on an address.
'TipoVia': tipovia,
'NomVia': nombrevia, :param prov_name: Name of the province.
'Numero': str(numero)} :param city_name: Name of the city.
:param tv: Kind of street (CL, AV...)
:param nv: Name of the street
:param num: Number of the street
:return: DotMap (dict with properties accessible by '.') with the cadaster information
"""
params = {'Provincia': prov_name,
'Municipio': city_name,
'TipoVia': tv,
'NomVia': nv,
'Numero': str(num)}
url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaNumero") url = cls.URL_LOCATIONS_BASE.format("/OVCCallejero.asmx/ConsultaNumero")
logger.debug("====Dir: {} {} {} {} {}====".format(tipovia, nombrevia, numero, municipio, provincia)) logger.debug("====Dir: {} {} {} {} {}====".format(tv, nv, num, city_name, prov_name))
logger.debug("URL for address: {}".format(url + '?' + urllib.parse.urlencode(params))) logger.debug("URL for address: {}".format(url + '?' + urllib.parse.urlencode(params)))
response = requests.get(url, params=params) response = requests.get(url, params=params)
@ -155,8 +200,17 @@ class Scrapper:
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False)) return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
@classmethod @classmethod
def get_coords_from_cadaster(cls, provincia, municipio, cadaster): def get_coords_from_cadaster(cls, prov_name, city_name, cadaster):
params = {'Provincia': provincia, 'Municipio': municipio, 'SRS': 'EPSG:4326', 'RC': cadaster} """
Returns the lon (xcen) and lat (ycen) of a property, identified by its cadaster number
and province and city names.
:param prov_name: Province name.
:param city_name: City name.
:param cadaster: Cadaster number.
:return: DotMap (dict with properties accessible by '.') with the location information
"""
params = {'Provincia': prov_name, 'Municipio': city_name, 'SRS': 'EPSG:4326', 'RC': cadaster}
url = cls.URL_LOCATIONS_BASE.format("/OVCCoordenadas.asmx/Consulta_CPMRC") url = cls.URL_LOCATIONS_BASE.format("/OVCCoordenadas.asmx/Consulta_CPMRC")
logger.debug("URL for coordinates: {}".format(url + '?' + urllib.parse.urlencode(params))) logger.debug("URL for coordinates: {}".format(url + '?' + urllib.parse.urlencode(params)))

View File

@ -1,7 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from abc import abstractmethod
class Searcher: class Searcher:
""" Just a signature, an abstract class just in case we need to define
something common for Provinces and Coordinates Searchers """
@abstractmethod
def __init__(self): def __init__(self):
pass pass

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from dotmap import DotMap
from src.librecatastro.scrapping.scrapper import Scrapper from src.librecatastro.scrapping.scrapper import Scrapper
from src.librecatastro.scrapping.searcher import Searcher from src.librecatastro.scrapping.searcher import Searcher
from src.utils.cadastro_logger import CadastroLogger from src.utils.cadastro_logger import CadastroLogger

View File

@ -3,6 +3,8 @@
import os import os
""" Dict settings file with config parameters"""
root_path = os.path.dirname(os.path.abspath(__file__)) root_path = os.path.dirname(os.path.abspath(__file__))
config = { config = {