diff --git a/README.md b/README.md index e6b9804..3bbf115 100644 --- a/README.md +++ b/README.md @@ -79,4 +79,5 @@ Y hacerlo de forma cooperativa, reuniendo esfuerzos y capacidades en torno a un - Jaime Gómez Obregón ([@JaimeObregon](https://twitter.com/jaimeobregon)), _scraping_ de los datos y propuesta inicial. - JuanMa Cuevas ([@juanmacuevas](https://twitter.com/juanmacuevas)), programador python & android. _hacktivista_ aficionado. - Yago F. ([@yaguetoo](https://github.com/yagueto)), programador Python y Java. +- Pedro J. Molina ([@pjmolina](https://github.com/phmolina)), programador TypeScript, Docker y dotNet. - … diff --git a/db/mongodb/.gitignore b/db/mongodb/.gitignore new file mode 100644 index 0000000..3889dc6 --- /dev/null +++ b/db/mongodb/.gitignore @@ -0,0 +1,2 @@ +# Ignore data files here +*.gz diff --git a/db/mongodb/Dockerfile-seed b/db/mongodb/Dockerfile-seed new file mode 100644 index 0000000..9fdb48c --- /dev/null +++ b/db/mongodb/Dockerfile-seed @@ -0,0 +1,11 @@ +## Seed database + +FROM mongo:5.0.6-focal +WORKDIR /app/data +COPY data/* . +COPY script.sh . +RUN chmod +x script.sh + +ENV HOST=mongodb + +CMD ["/app/data/script.sh"] diff --git a/db/mongodb/README.md b/db/mongodb/README.md new file mode 100644 index 0000000..baf6894 --- /dev/null +++ b/db/mongodb/README.md @@ -0,0 +1,14 @@ +# Scripts para cargar en MongoDB + +Scripts para carga de datos desde CSV en una base de datos MongoDB con docker. + +## Prerrequisitos + +- bash +- Docker + +## Como usarlo + +```bash +./docker-build.sh +``` diff --git a/db/mongodb/README_en.md b/db/mongodb/README_en.md new file mode 100644 index 0000000..03cc42a --- /dev/null +++ b/db/mongodb/README_en.md @@ -0,0 +1,14 @@ +# Load Scripts for MongoDB + +Data load Scripts for MongoDB database. + +## Prerequirements + +- bash +- Docker + +## How to use it + +```bash +./docker-build.sh +``` diff --git a/db/mongodb/data/convocatorias-headers.csv b/db/mongodb/data/convocatorias-headers.csv new file mode 100644 index 0000000..de4bc5f --- /dev/null +++ b/db/mongodb/data/convocatorias-headers.csv @@ -0,0 +1 @@ +id, codigo_bdns, MRR, administracion, departamento, organo, fecha_registro, titulo_convocatoria, url_bases_regul, titulo_cooficial, colDesconocida1, colDesconocida2, colDesconocida3 diff --git a/db/mongodb/data/juridicas-headers.csv b/db/mongodb/data/juridicas-headers.csv new file mode 100644 index 0000000..ee04ab7 --- /dev/null +++ b/db/mongodb/data/juridicas-headers.csv @@ -0,0 +1 @@ +id, id_convocatoria, administracion, departamento, organo, titulo_convocatoria, url_bases_regul, apli_presupuestaria, fecha_concesion, beneficiario, importe, instrumento, ayuda_equivalente, detalles, codigo_bdns, valor_desconocido, id_interna diff --git a/db/mongodb/docker-build.sh b/db/mongodb/docker-build.sh new file mode 100644 index 0000000..89cea6a --- /dev/null +++ b/db/mongodb/docker-build.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# Create an initial DB in mongodb from CSV files + +PORT=27017 + +cp ../../files/*.csv.gz ./data/ +docker build -t mongo-seed -f Dockerfile-seed . + + +docker rm mongo-sub mongo-seed -f + +docker run --name mongo-sub -d -p $PORT:27017 mongo:5.0.6-focal + +docker run --link mongo-sub:mongodb --name mongo-seed mongo-seed + +echo "Done. DB ready listening at $PORT!" diff --git a/db/mongodb/script.sh b/db/mongodb/script.sh new file mode 100644 index 0000000..eda9a63 --- /dev/null +++ b/db/mongodb/script.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# Import data + +HOST=mongodb + +echo "$(date -u) Import started" > status + +gzip -d convocatorias.csv.gz +gzip -d juridicas_1.csv.gz +gzip -d juridicas_2.csv.gz + +cp convocatorias-headers.csv c1.csv +cp juridicas-headers.csv j1.csv +cp juridicas-headers.csv j2.csv + +cat convocatorias.csv >> c1.csv +cat juridicas_1.csv >> j1.csv +cat juridicas_2.csv >> j2.csv + + +mongoimport --host $HOST --type csv -d subvenciones -c convocatorias --headerline --drop c1.csv +mongoimport --host $HOST --type csv -d subvenciones -c juridicas --headerline --drop j1.csv +mongoimport --host $HOST --type csv -d subvenciones -c juridicas --headerline j2.csv + +rm *.csv + +echo "$(date -u) Import done!" >> status