mirror of
https://github.com/josejuanmartinez/libreCatastro.git
synced 2024-07-06 15:22:28 +02:00
Adds list provinces and list cities to main
This commit is contained in:
parent
d5b280f6eb
commit
f186186477
@ -3,8 +3,8 @@ import argparse
|
||||
|
||||
from src.librecatastro.scrapping.format.scrapper_html import ScrapperHTML
|
||||
from src.librecatastro.scrapping.format.scrapper_xml import ScrapperXML
|
||||
from src.librecatastro.scrapping.source.coordinates_input import CoordinatesInput
|
||||
from src.librecatastro.scrapping.source.provinces_input import ProvincesInput
|
||||
from src.librecatastro.scrapping.searchers.coordinates_search import CoordinatesSearch
|
||||
from src.librecatastro.scrapping.searchers.provinces_search import ProvincesSearch
|
||||
from src.settings import config
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -15,7 +15,10 @@ if __name__ == "__main__":
|
||||
parser.add_argument('--sleep', action='store', dest='sleep', type=int, default=5)
|
||||
parser.add_argument('--html', dest='html', default=False, action='store_true')
|
||||
parser.add_argument('--scale', action='store', dest='scale', type=int, default=10000)
|
||||
parser.add_argument('--pictures', action='store_true', dest='pictures', default='False')
|
||||
parser.add_argument('--pictures', action='store_true', dest='pictures', default=False)
|
||||
parser.add_argument('--startcity', action='store', dest='startcity', default='')
|
||||
parser.add_argument('--listprovinces', action='store_true', dest='listprovinces', default=False)
|
||||
parser.add_argument('--listcities', action='store', nargs=1, dest='listcities', default=[])
|
||||
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
@ -30,8 +33,17 @@ if __name__ == "__main__":
|
||||
filenames = args.filenames
|
||||
pictures = args.pictures
|
||||
provinces = args.provinces
|
||||
startcity = args.startcity
|
||||
|
||||
if args.listprovinces:
|
||||
ProvincesSearch.list_provinces()
|
||||
exit(0)
|
||||
|
||||
if len(args.listcities) == 1:
|
||||
ProvincesSearch.list_cities(args.listcities[0])
|
||||
exit(0)
|
||||
|
||||
if args.coords:
|
||||
CoordinatesInput.scrap_coordinates(scrapper, filenames, pictures)
|
||||
CoordinatesSearch.scrap_coordinates(scrapper, filenames, pictures)
|
||||
else:
|
||||
ProvincesInput.scrap_provinces(scrapper, provinces, pictures)
|
||||
ProvincesSearch.scrap_provinces(scrapper, provinces, pictures, startcity)
|
@ -28,7 +28,7 @@ class Address:
|
||||
self.site = None
|
||||
self.lot = None
|
||||
|
||||
''' NLP search '''
|
||||
''' NLP searchers '''
|
||||
self.first_line = self.get_first_line()
|
||||
self.second_line = self.get_second_line()
|
||||
|
||||
|
@ -60,9 +60,9 @@ class ScrapperHTML(Scrapper):
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
def scrap_provinces(cls, prov_list, pictures=False):
|
||||
def scrap_provinces(cls, prov_list, pictures=False, start_from=''):
|
||||
|
||||
for prov_name, prov_num, city_name, city_num, address, tv, nv in cls.get_address_iter(prov_list):
|
||||
for prov_name, prov_num, city_name, city_num, address, tv, nv in cls.get_address_iter(prov_list, start_from):
|
||||
|
||||
if tv == DotMap() or nv == DotMap():
|
||||
continue
|
||||
@ -151,7 +151,6 @@ class ScrapperHTML(Scrapper):
|
||||
parsed_html = BeautifulSoup(html, features="html.parser")
|
||||
return ScrapperHTML.parse_html_parcela(parsed_html, x, y, picture)
|
||||
|
||||
|
||||
@classmethod
|
||||
def scrap_cadaster(cls, cadaster, delimitacion=None, municipio=None, x=None, y=None, pictures=False):
|
||||
rc_1 = cadaster[0:7]
|
||||
|
@ -93,8 +93,8 @@ class ScrapperXML(Scrapper):
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
def scrap_provinces(cls, prov_list, pictures=False):
|
||||
for prov_name, prov_num, city_name, city_num, address, tv, nv in cls.get_address_iter(prov_list):
|
||||
def scrap_provinces(cls, prov_list, pictures=False, start_from=''):
|
||||
for prov_name, prov_num, city_name, city_num, address, tv, nv in cls.get_address_iter(prov_list, start_from):
|
||||
if tv == DotMap() or nv == DotMap():
|
||||
continue
|
||||
|
||||
@ -103,7 +103,7 @@ class ScrapperXML(Scrapper):
|
||||
while num_scrapping_fails > 0:
|
||||
try:
|
||||
cadaster = cls.get_cadaster_by_address(prov_name, city_name, tv, nv, counter)
|
||||
res = cls.process_xml_by_address(cadaster, prov_name, city_name, tv, nv)
|
||||
res = cls.process_xml_by_address(cadaster, prov_name, city_name, tv, nv, pictures)
|
||||
if len(res) < 1:
|
||||
num_scrapping_fails -= 1
|
||||
else:
|
||||
@ -133,7 +133,7 @@ class ScrapperXML(Scrapper):
|
||||
sleep(config['sleep_time'])
|
||||
|
||||
@classmethod
|
||||
def process_xml_by_address(cls, numerero_map, prov_name, city_name, tv, nv):
|
||||
def process_xml_by_address(cls, numerero_map, prov_name, city_name, tv, nv, pictures=False):
|
||||
results = []
|
||||
if numerero_map.consulta_numerero.lerr.err.cod != DotMap():
|
||||
return results
|
||||
@ -177,8 +177,8 @@ class ScrapperXML(Scrapper):
|
||||
prov_num = entry_map.consulta_dnp.bico.bi.dt.loine.cp
|
||||
city_num = entry_map.consulta_dnp.bico.bi.dt.loine.cm
|
||||
|
||||
if prov_num != DotMap() and city_num != DotMap():
|
||||
picture = cls.scrap_site_picture(prov_num, city_num, cadaster_num)
|
||||
if pictures and prov_num != DotMap() and city_num != DotMap():
|
||||
picture = cls.scrap_site_picture(prov_num, city_num, cadaster_num)
|
||||
|
||||
# Parcela
|
||||
cadaster_entry = CadasterEntryXML(entry_map, lon, lat, picture)
|
||||
@ -200,7 +200,7 @@ class ScrapperXML(Scrapper):
|
||||
prov_num = entry_map.consulta_dnp.lrcdnp.rcdnp.loine.cp
|
||||
city_num = entry_map.consulta_dnp.lrcdnp.rcdnp.loine.cm
|
||||
|
||||
if prov_num != DotMap() and city_num != DotMap():
|
||||
if pictures and prov_num != DotMap() and city_num != DotMap():
|
||||
picture = cls.scrap_site_picture(prov_num, city_num, cadaster)
|
||||
|
||||
cadaster_entry = CadasterEntryXML(sub_entry, lon, lat, picture)
|
||||
|
@ -70,7 +70,7 @@ class Scrapper:
|
||||
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||
|
||||
@classmethod
|
||||
def get_address_iter(cls, prov_list=None):
|
||||
def get_address_iter(cls, prov_list=None, start_from=''):
|
||||
"""Scraps properties by addresses"""
|
||||
|
||||
if prov_list is None:
|
||||
@ -102,6 +102,10 @@ class Scrapper:
|
||||
if city_name == DotMap() or city_num == DotMap():
|
||||
continue
|
||||
|
||||
if start_from != '' and city_name != start_from:
|
||||
logger.debug("Skipping {}".format(city_name))
|
||||
continue
|
||||
|
||||
addresses = cls.get_addresses(prov_name, city_name).consulta_callejero.callejero.calle
|
||||
if addresses == DotMap():
|
||||
logger.error("No addresses available right now (Service is down?)")
|
||||
@ -165,6 +169,7 @@ class Scrapper:
|
||||
|
||||
response = requests.get(url, params=params)
|
||||
xml = response.content
|
||||
|
||||
return DotMap(xmltodict.parse(xml, process_namespaces=False, xml_attribs=False))
|
||||
|
||||
@classmethod
|
||||
|
@ -1,3 +1,3 @@
|
||||
class Input:
|
||||
class Search:
|
||||
def __init__(self):
|
||||
pass
|
@ -5,7 +5,7 @@ import random
|
||||
from time import sleep
|
||||
|
||||
from src.librecatastro.domain.geometry.geo_polygon import GeoPolygon
|
||||
from src.librecatastro.scrapping.input import Input
|
||||
from src.librecatastro.scrapping.search import Search
|
||||
from src.settings import config
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
from src.utils.list_utils import ListUtils
|
||||
@ -14,12 +14,12 @@ from src.utils.list_utils import ListUtils
|
||||
logger = CadastroLogger(__name__).logger
|
||||
|
||||
|
||||
class CoordinatesInput(Input):
|
||||
class CoordinatesSearch(Search):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
@classmethod
|
||||
def scrap_coordinates(cls, scrapper, filenames, pictures):
|
||||
def scrap_coordinates(cls, scrapper, filenames, pictures=False):
|
||||
for r, d, files in os.walk(config['coordinates_path']):
|
||||
for file in files:
|
||||
|
||||
@ -31,12 +31,12 @@ class CoordinatesInput(Input):
|
||||
|
||||
try:
|
||||
polygon = GeoPolygon(os.path.join(config['coordinates_path'], file))
|
||||
CoordinatesInput.scrap_polygon(scrapper, polygon, pictures)
|
||||
CoordinatesSearch.scrap_polygon(scrapper, polygon, pictures)
|
||||
except:
|
||||
logger.error("{} is not formatted properly. Please take a look at the examples.".format(file))
|
||||
|
||||
@classmethod
|
||||
def scrap_polygon(cls, scrapper, polygon, pictures):
|
||||
def scrap_polygon(cls, scrapper, polygon, pictures=False):
|
||||
bb = polygon.get_bounding_box()
|
||||
lon_min = int(bb[0] * config['scale'])
|
||||
lon_max = int(bb[2] * config['scale'])
|
28
src/librecatastro/scrapping/searchers/provinces_search.py
Normal file
28
src/librecatastro/scrapping/searchers/provinces_search.py
Normal file
@ -0,0 +1,28 @@
|
||||
import json
|
||||
|
||||
from dotmap import DotMap
|
||||
|
||||
from src.librecatastro.scrapping.scrapper import Scrapper
|
||||
from src.librecatastro.scrapping.search import Search
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
|
||||
'''Logger'''
|
||||
logger = CadastroLogger(__name__).logger
|
||||
|
||||
class ProvincesSearch(Search):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
@classmethod
|
||||
def scrap_provinces(cls, scrapper, prov_list, pictures=False, start_from=''):
|
||||
scrapper.scrap_provinces(prov_list, pictures, start_from)
|
||||
|
||||
@classmethod
|
||||
def list_provinces(cls):
|
||||
logger.debug(DotMap.pprint(Scrapper.get_provinces()))
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def list_cities(cls, prov_name):
|
||||
logger.debug(DotMap.pprint(Scrapper.get_cities(prov_name)))
|
||||
return
|
@ -1,10 +0,0 @@
|
||||
from src.librecatastro.scrapping.input import Input
|
||||
|
||||
|
||||
class ProvincesInput(Input):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
@classmethod
|
||||
def scrap_provinces(cls, scrapper, prov_list, pictures=False):
|
||||
scrapper.scrap_provinces(prov_list, pictures)
|
@ -3,7 +3,7 @@ import unittest
|
||||
|
||||
from src.librecatastro.domain.geometry.geo_polygon import GeoPolygon
|
||||
from src.librecatastro.scrapping.format.scrapper_html import ScrapperHTML
|
||||
from src.librecatastro.scrapping.source.coordinates_input import CoordinatesInput
|
||||
from src.librecatastro.scrapping.searchers.coordinates_search import CoordinatesSearch
|
||||
from src.settings import config
|
||||
from src.utils.elasticsearch_utils import ElasticSearchUtils
|
||||
|
||||
@ -89,7 +89,7 @@ class ScrapperHTMLTests(unittest.TestCase):
|
||||
def scrap_random_until_x_times_found(self, times):
|
||||
polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json'))
|
||||
coord = polygon.get_bounding_box()
|
||||
cadaster_list = CoordinatesInput.scrap_results_random_x_times(times, int(coord[0]*config['scale']), int(coord[2]*config['scale']), int(coord[1]*config['scale']), int(coord[3]*config['scale']), ScrapperHTML)
|
||||
cadaster_list = CoordinatesSearch.scrap_results_random_x_times(times, int(coord[0] * config['scale']), int(coord[2] * config['scale']), int(coord[1] * config['scale']), int(coord[3] * config['scale']), ScrapperHTML)
|
||||
self.assertTrue(len(cadaster_list) >= times)
|
||||
return cadaster_list
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user