Fixes error after refactoring in html scrapping by provinces

This commit is contained in:
J 2019-09-21 15:27:03 +02:00
parent 7cf208a4c2
commit 137ce65ee0
3 changed files with 8 additions and 7 deletions

View File

@ -4,7 +4,7 @@
import sys
import argparse
from src.librecatastro.scrapping.parsers.parser_html import ScrapperHTML
from src.librecatastro.scrapping.parsers.parser_html import ScrapperHTML, ParserHTML
from src.librecatastro.scrapping.parsers.parser_xml import ParserXML
from src.librecatastro.scrapping.searchers.coordinates_searcher import CoordinatesSearcher
from src.librecatastro.scrapping.searchers.provinces_searcher import ProvincesSearcher
@ -31,7 +31,7 @@ if __name__ == "__main__":
if args.scale:
config['scale'] = args.scale
scrapper = ScrapperHTML if args.html else ParserXML
parser = ParserHTML if args.html else ParserXML
filenames = args.filenames
pictures = args.pictures
@ -47,6 +47,6 @@ if __name__ == "__main__":
exit(0)
if args.coords:
CoordinatesSearcher.search_by_coordinates(scrapper, filenames, pictures)
CoordinatesSearcher.search_by_coordinates(parser, filenames, pictures)
else:
ProvincesSearcher.search_by_provinces(scrapper, provinces, pictures, startcity)
ProvincesSearcher.search_by_provinces(parser, provinces, pictures, startcity)

View File

@ -137,6 +137,7 @@ class ParserHTML(Parser):
""" Parsing """
@classmethod
def parse_html_parcela(cls, parsed_html, x=None, y=None, picture=None):
print(parsed_html)
description = parsed_html.find(id='ctl00_Contenido_tblInmueble')
descriptive_data = dict()

View File

@ -88,12 +88,12 @@ class ScrapperHTML(Scrapper):
partial_cadaster_ref = partial_cadaster.find("b")
logger.debug("-->Partial cadaster: {}".format(partial_cadaster_ref.text))
partial_cadaster_text = partial_cadaster_ref.text.strip()
html = ScrapperHTML.scrap_cadaster_full_code(partial_cadaster_text, delimitacion, municipio)
htmls.append((html, picture))
parsed_html = ScrapperHTML.scrap_cadaster_full_code(partial_cadaster_text, delimitacion, municipio)
htmls.append((parsed_html, picture))
sleep(config['sleep_time'])
else:
# Parcela
htmls.append((html, picture))
htmls.append((parsed_html, picture))
return htmls