mirror of
https://github.com/josejuanmartinez/libreCatastro.git
synced 2024-07-06 15:22:28 +02:00
CHhnges coordinates system, now uses Kibana Geo Point json format. Different regions provided to avoid a big suboptimized square with lots of sea points
This commit is contained in:
parent
c29de7faf2
commit
89b3cb5994
0
src/coordinates/__init__.py
Normal file
0
src/coordinates/__init__.py
Normal file
15
src/coordinates/balear_islands.json
Normal file
15
src/coordinates/balear_islands.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"geo_bounding_box": {
|
||||
"ignore_unmapped": true,
|
||||
"location": {
|
||||
"bottom_right": {
|
||||
"lat": 38.631890929028370,
|
||||
"lon": 4.361572265625001
|
||||
},
|
||||
"top_left": {
|
||||
"lat": 40.101185062587010,
|
||||
"lon": 1.208496093750000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
15
src/coordinates/canary_islands.json
Normal file
15
src/coordinates/canary_islands.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"geo_bounding_box": {
|
||||
"ignore_unmapped": true,
|
||||
"location": {
|
||||
"bottom_right": {
|
||||
"lat": 27.615406013399590,
|
||||
"lon":-13.403320312500002
|
||||
},
|
||||
"top_left": {
|
||||
"lat": 29.458731185355344,
|
||||
"lon":-18.160400390625004
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
15
src/coordinates/central_peninsulae.json
Normal file
15
src/coordinates/central_peninsulae.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"geo_bounding_box": {
|
||||
"ignore_unmapped": true,
|
||||
"location": {
|
||||
"bottom_right": {
|
||||
"lat": 35.995785386420344,
|
||||
"lon": 1.098632812500000
|
||||
},
|
||||
"top_left": {
|
||||
"lat": 41.162113939396920,
|
||||
"lon": -7.602539062500001
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
15
src/coordinates/ceuta.json
Normal file
15
src/coordinates/ceuta.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"geo_bounding_box": {
|
||||
"ignore_unmapped": true,
|
||||
"location": {
|
||||
"bottom_right": {
|
||||
"lat": 35.869994909901720,
|
||||
"lon": -5.275497436523438
|
||||
},
|
||||
"top_left": {
|
||||
"lat": 35.922281333698294,
|
||||
"lon": -5.383987426757813
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
15
src/coordinates/melilla.json
Normal file
15
src/coordinates/melilla.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"geo_bounding_box": {
|
||||
"ignore_unmapped": true,
|
||||
"location": {
|
||||
"bottom_right": {
|
||||
"lat": 35.264683153268145,
|
||||
"lon": -2.927513122558594
|
||||
},
|
||||
"top_left": {
|
||||
"lat": 35.321008047212080,
|
||||
"lon": -2.972831726074218
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
15
src/coordinates/north_peninsulae.json
Normal file
15
src/coordinates/north_peninsulae.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"geo_bounding_box": {
|
||||
"ignore_unmapped": true,
|
||||
"location": {
|
||||
"bottom_right": {
|
||||
"lat": 41.013065787006300,
|
||||
"lon": 3.669433593750000
|
||||
},
|
||||
"top_left": {
|
||||
"lat": 43.755225053069280,
|
||||
"lon": -9.316406250000002
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@ -10,24 +11,12 @@ from xml.etree import ElementTree
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from src.librecatastro.domain.cadaster_entry import CadasterEntry
|
||||
from src.librecatastro.domain.kibana_geo_bounding_box import KibanaGeoBoundingBox
|
||||
from src.settings import config
|
||||
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
from src.utils.list_utils import ListUtils
|
||||
|
||||
"""Constants"""
|
||||
|
||||
'''Spain geocoordinates'''
|
||||
LONGITUDE = (42896, -180243) # *1000000
|
||||
LATITUDE = (437692, 277255) # *1000000
|
||||
|
||||
'''Scale for scrapping'''
|
||||
SCALE = 10000
|
||||
|
||||
'''Enumerator for tuple access'''
|
||||
MAX = 0
|
||||
MIN = 1
|
||||
|
||||
'''Catastro web services parametrized'''
|
||||
URL = "http://ovc.catastro.meh.es/ovcservweb/ovcswlocalizacionrc/ovccoordenadas.asmx/Consulta_RCCOOR?SRS=EPSG:4230&Coordenada_X={}&Coordenada_Y={}"
|
||||
URL_REF = "https://www1.sedecatastro.gob.es/CYCBienInmueble/OVCListaBienes.aspx?rc1={}&rc2={}"
|
||||
@ -49,11 +38,25 @@ class CadastroScrapper:
|
||||
""" Scrapping main calls """
|
||||
@staticmethod
|
||||
def scrap_all():
|
||||
for x in range(LONGITUDE[MIN], LONGITUDE[MAX]):
|
||||
for y in range(LATITUDE[MIN], LATITUDE[MAX]):
|
||||
for r, d, files in os.walk(config['coordinates_path']):
|
||||
for file in files:
|
||||
if '.json' in file:
|
||||
f = open(os.path.join(config['coordinates_path'], file), "r")
|
||||
content = f.read()
|
||||
try:
|
||||
bb = KibanaGeoBoundingBox(content)
|
||||
coordinates_tuple = bb.get_coordinates_tuple()
|
||||
CadastroScrapper.scrap_range_of_coordinates(coordinates_tuple[0], coordinates_tuple[1], coordinates_tuple[2], coordinates_tuple[3])
|
||||
except:
|
||||
logger.error("{} is not formatted properly. Please take a look at the examples.".format(file))
|
||||
|
||||
x_scaled = x / SCALE
|
||||
y_scaled = y / SCALE
|
||||
@staticmethod
|
||||
def scrap_range_of_coordinates(long_min, long_max, lat_min, lat_max):
|
||||
for x in range(long_min, long_max):
|
||||
for y in range(lat_min, lat_max):
|
||||
|
||||
x_scaled = x / config['scale']
|
||||
y_scaled = y / config['scale']
|
||||
|
||||
''' Adding to tracking file'''
|
||||
logger.info('{},{}'.format(x_scaled, y_scaled))
|
||||
@ -81,16 +84,16 @@ class CadastroScrapper:
|
||||
sleep(5)
|
||||
|
||||
@staticmethod
|
||||
def scrap_results_by_time(seconds):
|
||||
def scrap_results_by_time(seconds, lon_min, lon_max, lat_min, lat_max):
|
||||
start_time = time.time()
|
||||
results = []
|
||||
|
||||
finished = False
|
||||
for x in range(LONGITUDE[MIN], LONGITUDE[MAX]):
|
||||
for y in range(LATITUDE[MIN], LATITUDE[MAX]):
|
||||
for x in range(lon_min, lon_max):
|
||||
for y in range(lat_min, lat_max):
|
||||
|
||||
x_scaled = x / SCALE
|
||||
y_scaled = y / SCALE
|
||||
x_scaled = x / config['scale']
|
||||
y_scaled = y / config['scale']
|
||||
|
||||
try:
|
||||
result = CadastroScrapper.scrap_coord(x_scaled, y_scaled)
|
||||
@ -122,16 +125,16 @@ class CadastroScrapper:
|
||||
return ListUtils.flat(results)
|
||||
|
||||
@staticmethod
|
||||
def scrap_results_linear_x_times(times):
|
||||
def scrap_results_linear_x_times(times, lon_min, lon_max, lat_min, lat_max):
|
||||
results = []
|
||||
counter = times
|
||||
|
||||
finished = False
|
||||
for x in range(LONGITUDE[MIN], LONGITUDE[MAX]):
|
||||
for y in range(LATITUDE[MIN], LATITUDE[MAX]):
|
||||
for x in range(lon_min, lon_max):
|
||||
for y in range(lat_min, lat_max):
|
||||
|
||||
x_scaled = x / SCALE
|
||||
y_scaled = y / SCALE
|
||||
x_scaled = x / config['scale']
|
||||
y_scaled = y / config['scale']
|
||||
|
||||
try:
|
||||
|
||||
@ -164,15 +167,15 @@ class CadastroScrapper:
|
||||
return ListUtils.flat(results)
|
||||
|
||||
@staticmethod
|
||||
def scrap_results_random_x_times(times):
|
||||
def scrap_results_random_x_times(times, lon_min, lon_max, lat_min, lat_max):
|
||||
results = []
|
||||
counter = times
|
||||
while counter > 0:
|
||||
x = random.randrange(LONGITUDE[MIN], LONGITUDE[MAX])
|
||||
y = random.randrange(LATITUDE[MIN], LATITUDE[MAX])
|
||||
x = random.randrange(lon_min, lon_max)
|
||||
y = random.randrange(lat_min, lat_max)
|
||||
|
||||
x_scaled = x / SCALE
|
||||
y_scaled = y / SCALE
|
||||
x_scaled = x / config['scale']
|
||||
y_scaled = y / config['scale']
|
||||
|
||||
try:
|
||||
cadaster_entry = CadastroScrapper.scrap_coord(x_scaled, y_scaled)
|
||||
|
@ -7,7 +7,7 @@ from src.librecatastro.domain.address import Address
|
||||
from src.librecatastro.domain.location import Location
|
||||
from src.settings import config
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
from src.utils.json_enconder import JSONEncoder
|
||||
from src.utils.json_encoder import JSONEncoder
|
||||
|
||||
logger = CadastroLogger(__name__).logger
|
||||
|
||||
|
32
src/librecatastro/domain/kibana_geo_bounding_box.py
Normal file
32
src/librecatastro/domain/kibana_geo_bounding_box.py
Normal file
@ -0,0 +1,32 @@
|
||||
import json
|
||||
from collections import namedtuple
|
||||
|
||||
from src.settings import config
|
||||
from src.utils.cadastro_logger import CadastroLogger
|
||||
|
||||
'''Logger'''
|
||||
logger = CadastroLogger(__name__).logger
|
||||
|
||||
|
||||
class KibanaGeoBoundingBox:
|
||||
def __init__(self, data):
|
||||
self.data = json.loads(data, object_hook=lambda d: namedtuple('X', d.keys())(*d.values()))
|
||||
|
||||
def get_coordinates_tuple(self):
|
||||
return KibanaGeoBoundingBox.get_coordinates_tuple_static(self.data)
|
||||
|
||||
@staticmethod
|
||||
def get_coordinates_tuple_static(data):
|
||||
location = data.geo_bounding_box.location
|
||||
return int(location.top_left.lon * config['scale']), int(location.bottom_right.lon * config['scale']), int(location.bottom_right.lat * config['scale']), int(location.top_left.lat * config['scale'])
|
||||
|
||||
@staticmethod
|
||||
def get_coordinate_tuple_from_file(file):
|
||||
f = open(file, "r")
|
||||
content = f.read()
|
||||
try:
|
||||
data = json.loads(content, object_hook=lambda d: namedtuple('X', d.keys())(*d.values()))
|
||||
return KibanaGeoBoundingBox.get_coordinates_tuple_static(data)
|
||||
except:
|
||||
logger.error("{} is not formatted properly. Please take a look at the examples.".format(file))
|
||||
return None
|
@ -6,5 +6,7 @@ config = {
|
||||
"separator": "####",
|
||||
"elasticsearch-index": "cadaster",
|
||||
"error_log_file": os.path.join(root_path, 'logs', 'log'),
|
||||
"tracking_log_file": os.path.join(root_path, 'logs', 'track')
|
||||
"tracking_log_file": os.path.join(root_path, 'logs', 'track'),
|
||||
"scale": 1000000,
|
||||
"coordinates_path": os.path.join(root_path, 'coordinates')
|
||||
}
|
||||
|
@ -1,6 +1,9 @@
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from src.librecatastro.catastro_scrapper import CadastroScrapper
|
||||
from src.librecatastro.domain.kibana_geo_bounding_box import KibanaGeoBoundingBox
|
||||
from src.settings import config
|
||||
from src.utils.elasticsearch_utils import ElasticSearchUtils
|
||||
|
||||
|
||||
@ -77,7 +80,8 @@ class MyTestCase(unittest.TestCase):
|
||||
self.assertIsNotNone(cadaster.from_elasticsearch())
|
||||
|
||||
def scrap_random_until_x_times_found(self, times):
|
||||
cadaster_list = CadastroScrapper.scrap_results_random_x_times(times)
|
||||
coord = KibanaGeoBoundingBox.get_coordinate_tuple_from_file(os.path.join(config['coordinates_path'], 'central_peninsulae.json'))
|
||||
cadaster_list = CadastroScrapper.scrap_results_random_x_times(times, coord[0], coord[1], coord[2], coord[3])
|
||||
self.assertTrue(len(cadaster_list) >= times)
|
||||
return cadaster_list
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user