Merge branch 'main' of github.com:PucelaBits/websegura into main

This commit is contained in:
nukeador 2021-02-08 22:11:43 +01:00
commit 5baf24d9d8
6 changed files with 4393 additions and 12 deletions

24
.github/workflows/tests.yml vendored Normal file
View File

@ -0,0 +1,24 @@
name: tests
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-node@v2
with:
node-version: "14"
- name: Install dependencies
run: npm ci
- name: Run tests
run: npm test

View File

@ -1,6 +1,6 @@
{
"algorithm_version": 2,
"end_time": "Fri, 05 Feb 2021 19:25:04 GMT",
"end_time": "Mon, 08 Feb 2021 19:58:47 GMT",
"grade": "F",
"hidden": false,
"likelihood_indicator": "MEDIUM",
@ -8,12 +8,12 @@
"Connection": "close",
"Content-Length": "315",
"Content-Type": "text/html; charset=us-ascii",
"Date": "Fri, 05 Feb 2021 19:24:58 GMT",
"Date": "Mon, 08 Feb 2021 19:58:40 GMT",
"Server": "Microsoft-HTTPAPI/2.0"
},
"scan_id": 17607164,
"scan_id": 17660262,
"score": 0,
"start_time": "Fri, 05 Feb 2021 19:24:55 GMT",
"start_time": "Mon, 08 Feb 2021 19:58:38 GMT",
"state": "FINISHED",
"status_code": 404,
"tests_failed": 6,

View File

@ -8,16 +8,23 @@ const glob = require('fast-glob');
const MAX_TIME_TO_REFRESH_MILLIS = 3 * 24 * 60 * 60 * 1000; // 3 days, can be increased when we have many sites to scan
/**
* Obtiene los ficheros global, de comunidades y provincias y devuelve la URL
* de aquellos que no se han refrescado en los últimos MAX_TIME_TO_REFRESH_MILLIS.
* Obtiene los ficheros global, de comunidades y provincias.
*/
function getAllUrls() {
const files = glob.sync('_data/{comunidades,provincias}/*.json');
files.push('_data/general.json')
return files
.flatMap(file => JSON.parse(fs.readFileSync(file)).webs.map(x => beautify(x.url)))
}
/**
* Devuelve la URL de las webs que no se han refrescado
* en los últimos MAX_TIME_TO_REFRESH_MILLIS.
*
* For the sake of simplicity, this function is sync for now
*/
async function parse() {
const files = glob.sync(['_data/{comunidades,provincias}/*.json', '_data/general.json']);
return files
.flatMap(file => JSON.parse(fs.readFileSync(file)).webs.map(x => beautify(x.url)))
.filter(outdated);
return getAllUrls().filter(outdated);
}
// Mozilla espera un hostname (sin / final y sin indicar protocolo "http[s]://")
@ -46,5 +53,7 @@ function outdated(site) {
}
module.exports = {
parse: parse
parse: parse,
beautify: beautify,
getAllUrls: getAllUrls,
}

4322
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,8 @@
"build": "env NODE_ENV=production npm run build:dev",
"crawl:analyze": "node crawler/analyze.js",
"crawl:results": "node crawler/results.js",
"crawl": "npm run crawl:analyze && sleep 60 && npm run crawl:results"
"crawl": "npm run crawl:analyze && sleep 60 && npm run crawl:results",
"test": "ava"
},
"repository": {
"type": "git",
@ -23,9 +24,21 @@
"homepage": "https://github.com/PucelaBits/websegura#readme",
"devDependencies": {
"@11ty/eleventy": "^0.11.1",
"array-find-duplicates": "^2.0.1",
"ava": "^3.15.0",
"axios": "^0.21.1",
"bottleneck": "^2.19.5",
"fast-glob": "^3.2.4",
"sass": "^1.32.5"
},
"ava": {
"files": [
"tests/**/*.js"
],
"cache": true,
"concurrency": 5,
"failWithoutAssertions": false,
"tap": true,
"verbose": true
}
}

13
tests/duplicates.test.js Normal file
View File

@ -0,0 +1,13 @@
const test = require('ava');
const { getAllUrls } = require('../crawler/sites-parser')
const findDuplicates = require('array-find-duplicates');
test.serial('verify duplicates', (t) => {
const urls = getAllUrls();
const duplicates = findDuplicates(urls);
if (duplicates.length > 0) {
return t.fail("Found duplicated URLs: " + duplicates.join(", "));
} else {
return t.pass()
}
});