Skip to content

Commit 3e0571c

Browse files
committed
Download test PDFs with the Fetch API
Using the Fetch API simplifies and shortens the `downloadFile` function considerably, since among other things it handles redirects[1] by default. Also, the regular expression in `downloadManifestFiles` can be replaced with a simple string function now. --- [1] Implementations of the Fetch API should already prevent e.g. redirect loops and limit the total number of redirects allowed.
1 parent 2643125 commit 3e0571c

1 file changed

Lines changed: 7 additions & 43 deletions

File tree

test/downloadutils.mjs

Lines changed: 7 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515

1616
import crypto from "crypto";
1717
import fs from "fs";
18-
import http from "http";
19-
import https from "https";
20-
import { resolve as urlResolve } from "url";
2118

2219
function rewriteWebArchiveUrl(url) {
2320
// Web Archive URLs need to be transformed to add `if_` after the ID.
@@ -32,54 +29,21 @@ function rewriteWebArchiveUrl(url) {
3229
return url;
3330
}
3431

35-
function downloadFile(file, url, redirects = 0) {
32+
async function downloadFile(file, url) {
3633
url = rewriteWebArchiveUrl(url);
37-
const protocol = /^https:\/\//.test(url) ? https : http;
3834

39-
return new Promise((resolve, reject) => {
40-
protocol
41-
.get(url, async function (response) {
42-
if ([301, 302, 307, 308].includes(response.statusCode)) {
43-
if (redirects > 10) {
44-
response.resume();
45-
reject(new Error("Too many redirects"));
46-
return;
47-
}
48-
const redirectTo = urlResolve(url, response.headers.location);
49-
try {
50-
await downloadFile(file, redirectTo, ++redirects);
51-
resolve();
52-
} catch (ex) {
53-
response.resume();
54-
reject(ex);
55-
}
56-
return;
57-
}
58-
59-
if (response.statusCode !== 200) {
60-
response.resume();
61-
reject(new Error(`HTTP ${response.statusCode}`));
62-
return;
63-
}
64-
65-
const stream = fs.createWriteStream(file);
66-
stream.on("error", error => reject(error));
67-
stream.on("finish", () => {
68-
stream.end();
69-
resolve();
70-
});
71-
response.pipe(stream);
72-
})
73-
.on("error", error => reject(error));
74-
});
35+
const response = await fetch(url);
36+
if (!response.ok) {
37+
throw new Error(response.statusText);
38+
}
39+
return fs.promises.writeFile(file, response.body);
7540
}
7641

7742
async function downloadManifestFiles(manifest) {
7843
const links = manifest
7944
.filter(item => item.link && !fs.existsSync(item.file))
8045
.map(item => {
81-
let url = fs.readFileSync(`${item.file}.link`).toString();
82-
url = url.replace(/\s+$/, "");
46+
const url = fs.readFileSync(`${item.file}.link`).toString().trimEnd();
8347
return { file: item.file, url };
8448
});
8549

0 commit comments

Comments
 (0)