From 2606fc95f081be3d61b872e36c6c1566be734553 Mon Sep 17 00:00:00 2001 From: J Date: Sun, 22 Sep 2019 14:48:41 +0200 Subject: [PATCH] Refactoring of tests. Added health check and some minor changes. --- libreCadastro.py | 24 +-- .../scrapping/parsers/parser_html.py | 19 ++- .../scrapping/parsers/parser_xml.py | 14 +- .../scrapping/scrappers/scrapper_html.py | 2 +- .../scrapping/scrappers/scrapper_xml.py | 2 +- .../searchers/coordinates_searcher.py | 5 +- src/settings.py | 11 +- src/tests/parsers/__init__.py | 0 src/tests/parsers/parser_html_tests.py | 49 ++++++ src/tests/parsers/parser_xml_tests.py | 22 +++ src/tests/scrappers/scrapper_html_tests.py | 141 +++++------------- src/tests/scrappers/scrapper_xml_tests.py | 32 +--- src/tests/searchers/__init__.py | 0 .../searchers/coordinates_searcher_tests.py | 48 ++++++ src/tests/servers_health/__init__.py | 0 .../servers_health/server_health_tests.py | 49 ++++++ 16 files changed, 260 insertions(+), 158 deletions(-) create mode 100644 src/tests/parsers/__init__.py create mode 100644 src/tests/parsers/parser_html_tests.py create mode 100644 src/tests/parsers/parser_xml_tests.py create mode 100644 src/tests/searchers/__init__.py create mode 100644 src/tests/searchers/coordinates_searcher_tests.py create mode 100644 src/tests/servers_health/__init__.py create mode 100644 src/tests/servers_health/server_health_tests.py diff --git a/libreCadastro.py b/libreCadastro.py index 51b4f75..51dc26d 100644 --- a/libreCadastro.py +++ b/libreCadastro.py @@ -4,14 +4,15 @@ import sys import argparse -from src.librecatastro.scrapping.parsers.parser_html import ScrapperHTML, ParserHTML +from src.librecatastro.scrapping.parsers.parser_html import ParserHTML from src.librecatastro.scrapping.parsers.parser_xml import ParserXML from src.librecatastro.scrapping.searchers.coordinates_searcher import CoordinatesSearcher from src.librecatastro.scrapping.searchers.provinces_searcher import ProvincesSearcher from src.settings import config +from src.tests.servers_health.server_health_tests import ServerHealthTests if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Runs the Cadastro Parser') + parser = argparse.ArgumentParser(description='Runs libreCadastro') parser.add_argument('--coords', action='store_true', dest='coords', default=False) parser.add_argument('--filenames', action='store', nargs='+', dest='filenames', default=[]) parser.add_argument('--provinces', action='store', nargs='+', dest='provinces', default=[]) @@ -22,6 +23,7 @@ if __name__ == "__main__": parser.add_argument('--startcity', action='store', dest='startcity', default='') parser.add_argument('--listprovinces', action='store_true', dest='listprovinces', default=False) parser.add_argument('--listcities', action='store', nargs=1, dest='listcities', default=[]) + parser.add_argument('--health', action='store_true', dest='health', default=False) args = parser.parse_args(sys.argv[1:]) @@ -31,13 +33,6 @@ if __name__ == "__main__": if args.scale: config['scale'] = args.scale - parser = ParserHTML if args.html else ParserXML - - filenames = args.filenames - pictures = args.pictures - provinces = args.provinces - startcity = args.startcity - if args.listprovinces: ProvincesSearcher.list_provinces() exit(0) @@ -46,6 +41,17 @@ if __name__ == "__main__": ProvincesSearcher.list_cities(args.listcities[0]) exit(0) + if args.health: + ServerHealthTests.healthcheck() + exit(0) + + parser = ParserHTML if args.html else ParserXML + + filenames = args.filenames + pictures = args.pictures + provinces = args.provinces + startcity = args.startcity + if args.coords: CoordinatesSearcher.search_by_coordinates(parser, filenames, pictures) else: diff --git a/src/librecatastro/scrapping/parsers/parser_html.py b/src/librecatastro/scrapping/parsers/parser_html.py index 96702cb..cc6ecb6 100644 --- a/src/librecatastro/scrapping/parsers/parser_html.py +++ b/src/librecatastro/scrapping/parsers/parser_html.py @@ -45,8 +45,13 @@ class ParserHTML(Parser): results = [] if pc1 is not None and pc2 is not None: cadaster = ''.join([pc1.text, pc2.text]) - htmls = ScrapperHTML.scrap_cadaster(cadaster, None, None, pictures) - for html, picture in htmls.items(): + html_picture_tuples = ScrapperHTML.scrap_cadaster(cadaster, None, None, pictures) + + if not isinstance(html_picture_tuples, list): + html_picture_tuples = [html_picture_tuples] + + for html_picture_tuple in html_picture_tuples: + html, picture = html_picture_tuple cadaster_entry = cls.parse_html_parcela(html, x, y, picture) cadaster_entry.to_elasticsearch() results.append(cadaster_entry) @@ -54,7 +59,10 @@ class ParserHTML(Parser): return results @classmethod - def process_search_by_provinces(cls, prov_list, pictures=False, start_from=''): + def process_search_by_provinces(cls, prov_list, pictures=False, start_from='', max_times=None): + + times = 0 + results = [] num = '' for prov_name, prov_num, city_name, city_num, address, tv, nv in Scrapper.get_address_iter(prov_list, start_from): @@ -110,8 +118,13 @@ class ParserHTML(Parser): for html, picture in htmls: cadaster_entry = cls.parse_html_parcela(html, lon, lat, picture) cadaster_entry.to_elasticsearch() + results.append(cadaster_entry) counter += 1 + times += 1 + + if max_times is not None and times >= max_times: + return results except urllib.error.HTTPError as e: logger.error( diff --git a/src/librecatastro/scrapping/parsers/parser_xml.py b/src/librecatastro/scrapping/parsers/parser_xml.py index af79a9e..4594368 100644 --- a/src/librecatastro/scrapping/parsers/parser_xml.py +++ b/src/librecatastro/scrapping/parsers/parser_xml.py @@ -15,6 +15,8 @@ from src.utils.cadastro_logger import CadastroLogger from dotmap import DotMap +from src.utils.list_utils import ListUtils + '''Logger''' logger = CadastroLogger(__name__).logger @@ -124,7 +126,11 @@ class ParserXML(Parser): return results @classmethod - def process_search_by_provinces(cls, prov_list, pictures=False, start_from=''): + def process_search_by_provinces(cls, prov_list, pictures=False, start_from='', max_times=None): + + times = 0 + results = [] + for prov_name, prov_num, city_name, city_num, address, tv, nv in Scrapper.get_address_iter(prov_list, start_from): if tv == DotMap() or nv == DotMap(): continue @@ -139,6 +145,10 @@ class ParserXML(Parser): num_scrapping_fails -= 1 else: num_scrapping_fails = 10 + times += 1 + results.append(res) + if max_times is not None and times >= max_times: + return ListUtils.flat(results) except urllib.error.HTTPError as e: logger.error( @@ -160,6 +170,8 @@ class ParserXML(Parser): counter += 1 + return results + ''' Parsing calls ''' @classmethod diff --git a/src/librecatastro/scrapping/scrappers/scrapper_html.py b/src/librecatastro/scrapping/scrappers/scrapper_html.py index 43d8055..ed71d3d 100644 --- a/src/librecatastro/scrapping/scrappers/scrapper_html.py +++ b/src/librecatastro/scrapping/scrappers/scrapper_html.py @@ -15,7 +15,7 @@ logger = CadastroLogger(__name__).logger class ScrapperHTML(Scrapper): """HTML Catastro Scrapper""" - URL = "http://ovc.catastro.meh.es/ovcservweb/ovcswlocalizacionrc/ovccoordenadas.asmx/Consulta_RCCOOR?SRS=EPSG:4226&Coordenada_X={}&Coordenada_Y={}" + URL = "http://ovc.catastro.meh.es/ovcservweb/ovcswlocalizacionrc/ovccoordenadas.asmx/Consulta_RCCOOR?SRS=EPSG:4326&Coordenada_X={}&Coordenada_Y={}" URL_REF = "https://www1.sedecatastro.gob.es/CYCBienInmueble/OVCListaBienes.aspx?rc1={}&rc2={}" URL_REF_FULL = "https://www1.sedecatastro.gob.es/CYCBienInmueble/OVCConCiud.aspx?RefC={}&RCCompleta={}&del={}&mun={}" diff --git a/src/librecatastro/scrapping/scrappers/scrapper_xml.py b/src/librecatastro/scrapping/scrappers/scrapper_xml.py index 4fc66e4..d5178f2 100644 --- a/src/librecatastro/scrapping/scrappers/scrapper_xml.py +++ b/src/librecatastro/scrapping/scrappers/scrapper_xml.py @@ -20,7 +20,7 @@ class ScrapperXML(Scrapper): @classmethod def get_coord(cls,x, y): - params = {'SRS': 'EPSG:4230', 'Coordenada_X': x, 'Coordenada_Y': y} + params = {'SRS': 'EPSG:4326', 'Coordenada_X': x, 'Coordenada_Y': y} url = cls.URL_LOCATIONS_BASE.format("/OVCCoordenadas.asmx/Consulta_RCCOOR") response = requests.get(url, params=params) diff --git a/src/librecatastro/scrapping/searchers/coordinates_searcher.py b/src/librecatastro/scrapping/searchers/coordinates_searcher.py index cf57354..5c84e6c 100644 --- a/src/librecatastro/scrapping/searchers/coordinates_searcher.py +++ b/src/librecatastro/scrapping/searchers/coordinates_searcher.py @@ -159,10 +159,11 @@ class CoordinatesSearcher(Searcher): return ListUtils.flat(results) @staticmethod - def search_by_coordinates_random_max_n_matches(times, lon_min, lon_max, lat_min, lat_max, scrapper): + def search_by_coordinates_random_max_n_matches(times, lon_min, lon_max, lat_min, lat_max, parser): results = [] counter = times while counter > 0: + x = random.randrange(lon_min, lon_max) y = random.randrange(lat_min, lat_max) @@ -170,7 +171,7 @@ class CoordinatesSearcher(Searcher): y_scaled = y / config['scale'] try: - cadaster_entry = scrapper.process_search_by_coordinates(x_scaled, y_scaled) + cadaster_entry = parser.process_search_by_coordinates(x_scaled, y_scaled) if len(cadaster_entry) > 0: results.append(cadaster_entry) diff --git a/src/settings.py b/src/settings.py index b4c660a..28592c4 100644 --- a/src/settings.py +++ b/src/settings.py @@ -17,8 +17,13 @@ config = { "sleep_dos_time": 300, "width_px": 120, "height_px": 120, - "servers_down_message": "Some of the Cadastro servers are down. " - "Maintenance is usually carried out durign the night or the weekends. Please, retry later." - "As an alternative, your IP address may have been banned. Try to change your public IP" + "servers_down_message_001": "Error 001: Cadastro server to get provinces and cities is down.\n" + "Consequence: Search by provinces will fail.\n" + "Maintenance is usually carried out durign the night or the weekends. Please, retry later.\n" + "As an alternative, your IP address may have been banned. Try to change your public IP", + "servers_down_message_002": "Error 002: Cadastro server to query by cadaster number is off.\n" + "Search by Coordinates will fail.\n" + "Maintenance is usually carried out durign the night or the weekends. Please, retry later.\n" + "As an alternative, your IP address may have been banned. Try to change your public IP\n" } diff --git a/src/tests/parsers/__init__.py b/src/tests/parsers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tests/parsers/parser_html_tests.py b/src/tests/parsers/parser_html_tests.py new file mode 100644 index 0000000..3f67768 --- /dev/null +++ b/src/tests/parsers/parser_html_tests.py @@ -0,0 +1,49 @@ +import unittest + +from src.librecatastro.scrapping.parsers.parser_html import ParserHTML +from src.librecatastro.scrapping.scrappers.scrapper_html import ScrapperHTML + + +class ParserHTMLTests(unittest.TestCase): + + def test_search_by_coordinates_creates_and_stores_in_elasticsearch(self): + cadaster_list = ParserHTML.process_search_by_coordinates(-3.47600944027389, 40.5370635727521) + self.assertEqual(len(cadaster_list), 14) + for cadaster in cadaster_list: + self.assertIsNotNone(cadaster.from_elasticsearch()) + + def test_search_by_provinces_creates_and_stores_in_elasticsearch(self): + cadaster_list = ParserHTML.process_search_by_provinces(['MADRID'], max_times=1) + self.assertEqual(len(cadaster_list), 14) + for cadaster in cadaster_list: + self.assertIsNotNone(cadaster.from_elasticsearch()) + + def test_search_site_lot_is_set(self): + cadaster_list = ScrapperHTML.scrap_cadaster('45134A02500003') + html, picture = cadaster_list[0] + cadaster = ParserHTML.parse_html_parcela(html) + self.assertIsNotNone(cadaster.address.site) + self.assertIsNotNone(cadaster.address.lot) + + def test_search_constructions_are_set(self): + cadaster_list = ScrapperHTML.scrap_cadaster('5036901NH2553N0001HB') + html, picture = cadaster_list[0] + cadaster = ParserHTML.parse_html_parcela(html) + self.assertTrue(len(cadaster.constructions)>0) + + def test_seach_no_cp_is_correctly_set(self): + cadaster_list = ScrapperHTML.scrap_cadaster('06145A00500028') + html, picture = cadaster_list[0] + cadaster = ParserHTML.parse_html_parcela(html) + self.assertIsNone(cadaster.address.cp) + + def test_search_multiparcela_2_cadasters_are_set(self): + cadaster_list = ScrapperHTML.scrap_cadaster('22282A00900547') + for cadaster in cadaster_list: + html, picture = cadaster + cadaster = ParserHTML.parse_html_parcela(html) + self.assertIsNotNone(cadaster.cadaster) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/tests/parsers/parser_xml_tests.py b/src/tests/parsers/parser_xml_tests.py new file mode 100644 index 0000000..3a397fc --- /dev/null +++ b/src/tests/parsers/parser_xml_tests.py @@ -0,0 +1,22 @@ +import unittest + +from src.librecatastro.scrapping.parsers.parser_xml import ParserXML + + +class ParserXMLTests(unittest.TestCase): + + def test_search_by_coordinates_creates_and_stores_in_elasticsearch(self): + cadaster_list = ParserXML.process_search_by_coordinates(-3.47600944027389, 40.5370635727521) + self.assertEqual(len(cadaster_list), 14) + for cadaster in cadaster_list: + self.assertIsNotNone(cadaster.from_elasticsearch()) + + def test_search_by_provinces_creates_and_stores_in_elasticsearch(self): + cadaster_list = ParserXML.process_search_by_provinces(['MADRID'], max_times=1) + self.assertEqual(len(cadaster_list), 1) + for cadaster in cadaster_list: + self.assertIsNotNone(cadaster.from_elasticsearch()) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/tests/scrappers/scrapper_html_tests.py b/src/tests/scrappers/scrapper_html_tests.py index 198f7b5..b050af4 100644 --- a/src/tests/scrappers/scrapper_html_tests.py +++ b/src/tests/scrappers/scrapper_html_tests.py @@ -1,132 +1,59 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import os import unittest -from src.librecatastro.domain.geometry.geo_polygon import GeoPolygon -from src.librecatastro.scrapping.parsers.parser_html import ScrapperHTML -from src.librecatastro.scrapping.searchers.coordinates_searcher import CoordinatesSearcher -from src.settings import config -from src.utils.elasticsearch_utils import ElasticSearchUtils +from src.librecatastro.scrapping.parsers.parser_html import ScrapperHTML, ParserHTML class ScrapperHTMLTests(unittest.TestCase): - def test_remove_index_elasticsearch_works(self): - ElasticSearchUtils.remove_index() - assert True - - def test_create_index_elasticsearch_works(self): - ElasticSearchUtils.create_index() - assert True - - def test_coordinate_creates_cadaster(self): - cadaster_list = ScrapperHTML.parse_coord(-3.68, 40.47) - self.assertEqual(len(cadaster_list), 1) - cadaster = cadaster_list[0] - self.assertEqual(cadaster.cadaster, '2302909VK4820A0001GK') - - def test_coordinate_multiparcela_creates_cadaster(self): - cadaster_list = ScrapperHTML.parse_coord(-0.33, 39.47) - self.assertTrue(len(cadaster_list) > 1) - - def test_coordinate_creates_cadaster_and_stores_in_elasticsearch(self): - cadaster_list = ScrapperHTML.parse_coord(-3.68, 40.47) - self.assertEqual(len(cadaster_list), 1) - cadaster = cadaster_list[0] - cadaster.to_elasticsearch() - self.assertIsNotNone(cadaster.from_elasticsearch()) - - def test_cadaster_site_lot_creates_cadaster_and_sets_site_lot(self): - cadaster_list = ScrapperHTML.scrap_cadaster('45134A02500003') - self.assertEqual(len(cadaster_list), 1) - cadaster = cadaster_list[0] - self.assertEqual(cadaster.address.site, '25') - self.assertEqual(cadaster.address.lot, '3') - - def test_cadaster_full_creates_cadaster(self): + def test_cadaster_full_returns_html(self): cadaster_list = ScrapperHTML.scrap_cadaster('0083101WK2008S0001PD') self.assertEqual(len(cadaster_list), 1) - cadaster = cadaster_list[0] - self.assertEqual(cadaster.address.city, 'ALMONACID DEL MARQUESADO') - self.assertEqual(cadaster.address.province, 'CUENCA') + html, picture = cadaster_list[0] + self.assertIsNotNone(html) + self.assertIsNone(picture) - def test_cadaster_full_creates_cadaster_with_constructions(self): + def test_cadaster_full_with_picture_returns_html_and_picture(self): + cadaster_list = ScrapperHTML.scrap_cadaster('0083101WK2008S0001PD', pictures=True) + self.assertEqual(len(cadaster_list), 1) + html, picture = cadaster_list[0] + self.assertIsNotNone(html) + self.assertIsNotNone(picture) + + def test_cadaster_half_site_lot_returns_html(self): + cadaster_list = ScrapperHTML.scrap_cadaster('45134A02500003') + self.assertEqual(len(cadaster_list), 1) + html, picture = cadaster_list[0] + self.assertIsNotNone(html) + self.assertIsNone(picture) + + def test_cadaster_half_site_lot_returns_html_and_picture(self): + cadaster_list = ScrapperHTML.scrap_cadaster('45134A02500003', pictures=True) + self.assertEqual(len(cadaster_list), 1) + html, picture = cadaster_list[0] + self.assertIsNotNone(html) + self.assertIsNotNone(picture) + + def test_cadaster_full_with_constructions_returns_html_and_picture(self): cadaster_list = ScrapperHTML.scrap_cadaster('5036901NH2553N0001HB') self.assertEqual(len(cadaster_list), 1) - cadaster = cadaster_list[0] - self.assertTrue(len(cadaster.constructions) > 0) + html, picture = cadaster_list[0] + self.assertIsNotNone(html) + self.assertIsNone(picture) - def test_cadaster_half_creates_cadaster(self): - cadaster_list = ScrapperHTML.scrap_cadaster('0183001WK2008S') - self.assertEqual(len(cadaster_list), 1) - cadaster = cadaster_list[0] - self.assertEqual(cadaster.address.city, 'ALMONACID DEL MARQUESADO') - self.assertEqual(cadaster.address.province, 'CUENCA') - - def test_cadaster_half_creates_cadaster_2(self): - cadaster_list = ScrapperHTML.scrap_cadaster('21012A03100046') - self.assertEqual(len(cadaster_list), 1) - cadaster = cadaster_list[0] - self.assertEqual(cadaster.address.province, 'HUELVA') - - def test_cadaster_no_cp_creates_cadaster(self): + def test_cadaster_no_cp_returns_html(self): cadaster_list = ScrapperHTML.scrap_cadaster('06145A00500028') self.assertEqual(len(cadaster_list), 1) - cadaster = cadaster_list[0] - self.assertIsNone(cadaster.address.cp) - self.assertEqual(cadaster.address.province, 'BADAJOZ') + html, picture = cadaster_list[0] + self.assertIsNotNone(html) + self.assertIsNone(picture) def test_cadaster_multiparcela_returns_list_of_2(self): cadaster_list = ScrapperHTML.scrap_cadaster('22282A00900547') self.assertEqual(len(cadaster_list), 2) - def test_cadaster_is_stored_in_elasticsearch(self): - cadaster_list = ScrapperHTML.scrap_cadaster('0183001WK2008S') - self.assertEqual(len(cadaster_list), 1) - cadaster = cadaster_list[0] - cadaster.to_elasticsearch() - self.assertIsNotNone(cadaster.from_elasticsearch()) - - def scrap_random_until_x_times_found(self, times): - polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json')) - coord = polygon.get_bounding_box() - cadaster_list = CoordinatesSearcher.search_by_coordinates_random_max_n_matches(times, int(coord[0] * config['scale']), int(coord[2] * config['scale']), int(coord[1] * config['scale']), int(coord[3] * config['scale']), ScrapperHTML) - self.assertTrue(len(cadaster_list) >= times) - return cadaster_list - - def test_scrap_random_until_5_found(self): - self.scrap_random_until_x_times_found(5) - - def test_scrap_random_until_5_is_stored_in_elasticsearch(self): - cadaster_list = self.scrap_random_until_x_times_found(5) - for cadaster in cadaster_list: - cadaster.to_elasticsearch() - self.assertIsNotNone(cadaster.from_elasticsearch()) - - def test_scrap_random_until_1_is_stored_in_elasticsearch(self): - cadaster_list = self.scrap_random_until_x_times_found(1) - for cadaster in cadaster_list: - cadaster.to_elasticsearch() - self.assertIsNotNone(cadaster.from_elasticsearch()) - - def test_loading_point_is_in_polygon_returns_true(self): - polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json')) - self.assertTrue(polygon.is_point_in_polygon(lon=-5.295410156250001, lat=40.069664523297774)) - - def test_loading_point_is_not_in_polygon_returns_false(self): - polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json')) - self.assertFalse(polygon.is_point_in_polygon(lon=-1.9335937500000002, lat=48.31242790407178)) - - def test_polygon_has_correct_bounding_box(self): - polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json')) - self.assertIsNotNone(polygon.get_bounding_box()) - - def test_if_pictures_enabled_picture_is_set(self): - cadaster_list = ScrapperHTML.scrap_cadaster('06145A00500028', pictures=True) - self.assertIsNotNone(cadaster_list[0].picture) - if __name__ == '__main__': unittest.main() diff --git a/src/tests/scrappers/scrapper_xml_tests.py b/src/tests/scrappers/scrapper_xml_tests.py index 109d74a..f02baaf 100644 --- a/src/tests/scrappers/scrapper_xml_tests.py +++ b/src/tests/scrappers/scrapper_xml_tests.py @@ -6,42 +6,12 @@ import unittest from time import sleep from src.librecatastro.domain.cadaster_entry.cadaster_entry_xml import CadasterEntryXML -from src.librecatastro.scrapping.parsers.parser_xml import ScrapperXML, ParserXML +from src.librecatastro.scrapping.parsers.parser_xml import ParserXML from src.librecatastro.scrapping.scrappers.scrapper_xml import ScrapperXML from src.settings import config -from src.utils.elasticsearch_utils import ElasticSearchUtils class ScrapperXMLTests(unittest.TestCase): - def test_scrapper_retrieves_dict_provinces(self): - try: - self.assertEqual(ScrapperXML.get_provinces().consulta_provinciero.control.cuprov, '48') - except: - self.assertFalse(config['servers_down_message']) - exit(-1) - - def test_scrapper_retrieves_dict_cities(self): - try: - self.assertEqual(ScrapperXML.get_cities('ALACANT').consulta_municipiero.control.cumun, '141') - except: - self.assertFalse(config['servers_down_message']) - exit(-1) - - def test_scrapper_retrieves_dict_addresses(self): - try: - self.assertEqual(ScrapperXML.get_addresses('ALACANT', 'AGOST').consulta_callejero.control.cuca, '117') - except: - self.assertFalse(config['servers_down_message']) - exit(-1) - - def test_get_cadaster_entries_by_cadaster_is_up(self): - cadasters = ['2503906VK4820D0001MX'] - try: - for cadaster in cadasters: - ScrapperXML.get_cadaster_entries_by_cadaster('', '', cadaster) - except: - self.assertFalse(config['servers_down_message']) - exit(-1) def test_scrapper_retrieves_dict_addresses_iter(self): iterator = ScrapperXML.get_address_iter() diff --git a/src/tests/searchers/__init__.py b/src/tests/searchers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tests/searchers/coordinates_searcher_tests.py b/src/tests/searchers/coordinates_searcher_tests.py new file mode 100644 index 0000000..da2c4f3 --- /dev/null +++ b/src/tests/searchers/coordinates_searcher_tests.py @@ -0,0 +1,48 @@ +import os +import unittest + +from src.librecatastro.domain.geometry.geo_polygon import GeoPolygon +from src.librecatastro.scrapping.parsers.parser_html import ParserHTML +from src.librecatastro.scrapping.searchers.coordinates_searcher import CoordinatesSearcher +from src.settings import config + + +class CoordinatesSearcherTests(unittest.TestCase): + + def search_random_until_x_times_found_by_html(self, times): + polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json')) + coord = polygon.get_bounding_box() + cadaster_list = CoordinatesSearcher.search_by_coordinates_random_max_n_matches(times, int(coord[0] * config['scale']), int(coord[2] * config['scale']), int(coord[1] * config['scale']), int(coord[3] * config['scale']), ParserHTML) + self.assertTrue(len(cadaster_list) >= times) + return cadaster_list + + def test_search_random_until_5_found(self): + self.search_random_until_x_times_found_by_html(5) + + def test_search_random_until_5_is_stored_in_elasticsearch(self): + cadaster_list = self.search_random_until_x_times_found_by_html(5) + for cadaster in cadaster_list: + cadaster.to_elasticsearch() + self.assertIsNotNone(cadaster.from_elasticsearch()) + + def test_search_random_until_1_is_stored_in_elasticsearch(self): + cadaster_list = self.search_random_until_x_times_found_by_html(1) + for cadaster in cadaster_list: + cadaster.to_elasticsearch() + self.assertIsNotNone(cadaster.from_elasticsearch()) + + def test_loading_point_is_in_polygon_returns_true(self): + polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json')) + self.assertTrue(polygon.is_point_in_polygon(lon=-5.295410156250001, lat=40.069664523297774)) + + def test_loading_point_is_not_in_polygon_returns_false(self): + polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json')) + self.assertFalse(polygon.is_point_in_polygon(lon=-1.9335937500000002, lat=48.31242790407178)) + + def test_polygon_has_correct_bounding_box(self): + polygon = GeoPolygon(os.path.join(config['coordinates_path'], 'spain_polygon.json')) + self.assertIsNotNone(polygon.get_bounding_box()) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/tests/servers_health/__init__.py b/src/tests/servers_health/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tests/servers_health/server_health_tests.py b/src/tests/servers_health/server_health_tests.py new file mode 100644 index 0000000..74f3589 --- /dev/null +++ b/src/tests/servers_health/server_health_tests.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import unittest + +from src.librecatastro.scrapping.scrapper import Scrapper +from src.librecatastro.scrapping.scrappers.scrapper_xml import ScrapperXML +from src.settings import config +from src.utils.cadastro_logger import CadastroLogger + +'''Logger''' +logger = CadastroLogger(__name__).logger + + +class ServerHealthTests(unittest.TestCase): + def test_scrapper_retrieves_dict_provinces(self): + try: + self.assertEqual(Scrapper.get_provinces().consulta_provinciero.control.cuprov, '48') + except: + logger.debug(config['servers_down_message_001']) + + def test_scrapper_retrieves_dict_cities(self): + try: + self.assertEqual(Scrapper.get_cities('ALACANT').consulta_municipiero.control.cumun, '141') + except: + logger.debug(config['servers_down_message_001']) + + def test_scrapper_retrieves_dict_addresses(self): + try: + self.assertEqual(Scrapper.get_addresses('ALACANT', 'AGOST').consulta_callejero.control.cuca, '117') + except: + logger.debug(config['servers_down_message_001']) + + def test_get_cadaster_entries_by_cadaster_is_up(self): + cadasters = ['2503906VK4820D0001MX'] + try: + for cadaster in cadasters: + ScrapperXML.get_cadaster_entries_by_cadaster('', '', cadaster) + except: + logger.debug(config['servers_down_message_002']) + + @staticmethod + def healthcheck(): + suite = unittest.TestLoader().loadTestsFromTestCase(ServerHealthTests) + unittest.TextTestRunner().run(suite) + + +if __name__ == '__main__': + unittest.main()