Skip to content

Commit d200632

Browse files
committed
Fix #3780: stream project ZIP download to prevent timeout and memory issues
1 parent 97f761d commit d200632

1 file changed

Lines changed: 164 additions & 23 deletions

File tree

server/controllers/project.controller.js

Lines changed: 164 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,20 @@ import mime from 'mime';
66
import isAfter from 'date-fns/isAfter';
77
import axios from 'axios';
88
import slugify from 'slugify';
9+
import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
910
import Project from '../models/project';
1011
import { User } from '../models/user';
1112
import { resolvePathToFile } from '../utils/filePath';
1213
import { generateFileSystemSafeName } from '../utils/generateFileSystemSafeName';
1314

15+
const s3Client = new S3Client({
16+
credentials: {
17+
accessKeyId: process.env.AWS_ACCESS_KEY,
18+
secretAccessKey: process.env.AWS_SECRET_KEY
19+
},
20+
region: process.env.AWS_REGION
21+
});
22+
1423
export {
1524
default as createProject,
1625
apiCreateProject
@@ -196,6 +205,10 @@ export async function getProjectForUser(username, projectId) {
196205
*/
197206
function bundleExternalLibs(project) {
198207
const indexHtml = project.files.find((file) => file.name === 'index.html');
208+
if (!indexHtml || !indexHtml.content) {
209+
return; // Gracefully handle missing index.html
210+
}
211+
199212
const { window } = new JSDOM(indexHtml.content);
200213
const scriptTags = window.document.getElementsByTagName('script');
201214

@@ -205,6 +218,11 @@ function bundleExternalLibs(project) {
205218
const path = src.split('/');
206219
const filename = path[path.length - 1];
207220

221+
// Prevent duplicate external libs if downloaded multiple times
222+
if (project.files.some((f) => f.name === filename && f.url === src)) {
223+
return;
224+
}
225+
208226
project.files.push({
209227
name: filename,
210228
url: src
@@ -216,7 +234,52 @@ function bundleExternalLibs(project) {
216234
}
217235

218236
/**
219-
* Recursively adds a file and all of its children to the JSZip instance.
237+
* Helper function to get a readable stream from an S3 URL
238+
* Optimized to return stream handle quickly without waiting for data
239+
* @param {string} url - S3 URL
240+
* @return {Promise<Readable>}
241+
*/
242+
async function getStreamFromS3Url(url) {
243+
// Parse the S3 URL to get bucket and key
244+
const urlObj = new URL(url);
245+
let bucket;
246+
let key;
247+
248+
// Support different S3 URL formats
249+
if (urlObj.hostname.includes('s3')) {
250+
// Format: https://bucket-name.s3.region.amazonaws.com/key
251+
// or https://s3.region.amazonaws.com/bucket-name/key
252+
if (urlObj.hostname.startsWith('s3')) {
253+
// https://s3.region.amazonaws.com/bucket-name/key
254+
const pathParts = urlObj.pathname.split('/').filter(Boolean);
255+
[bucket] = pathParts;
256+
key = pathParts.slice(1).join('/');
257+
} else {
258+
// https://bucket-name.s3.region.amazonaws.com/key
259+
[bucket] = urlObj.hostname.split('.');
260+
key = urlObj.pathname.substring(1);
261+
}
262+
263+
// by nityam, Get S3 object stream - returns immediately with stream handle
264+
// Data is only fetched when the stream is consumed (by JSZip)
265+
const command = new GetObjectCommand({ Bucket: bucket, Key: key });
266+
const response = await s3Client.send(command);
267+
268+
// Ensure we return the stream, not buffer the response
269+
return response.Body;
270+
}
271+
272+
// Not an S3 URL, fall back to axios with streaming
273+
const response = await axios.get(url, {
274+
responseType: 'stream',
275+
timeout: 30000
276+
});
277+
return response.data;
278+
}
279+
280+
/**
281+
* Recursively adds a file and all of its children to the JSZip instance using streaming.
282+
* Files are fetched sequentially to avoid memory overload.
220283
* @param {object} file
221284
* @param {Array<object>} files
222285
* @param {JSZip} zip
@@ -225,29 +288,42 @@ function bundleExternalLibs(project) {
225288
async function addFileToZip(file, files, zip) {
226289
if (file.fileType === 'folder') {
227290
const folderZip = file.name === 'root' ? zip : zip.folder(file.name);
228-
await Promise.all(
229-
file.children.map((fileId) => {
230-
const childFile = files.find((f) => f.id === fileId);
231-
return addFileToZip(childFile, files, folderZip);
232-
})
233-
);
291+
// Process children sequentially to avoid fetching all files upfront
292+
await file.children.reduce(async (previousPromise, fileId) => {
293+
await previousPromise;
294+
const childFile = files.find((f) => f.id === fileId);
295+
return addFileToZip(childFile, files, folderZip);
296+
}, Promise.resolve());
234297
} else if (file.url) {
235298
try {
236-
const res = await axios.get(file.url, {
237-
responseType: 'arraybuffer',
238-
timeout: 30000 // 30 second timeout to prevent hanging requests
239-
});
240-
zip.file(file.name, res.data);
299+
// Check if this is an S3 URL
300+
if (file.url.includes('s3') && file.url.includes('amazonaws.com')) {
301+
// Use S3 streaming for S3 URLs
302+
// This gets the stream handle quickly - actual data is fetched by JSZip during generation
303+
const stream = await getStreamFromS3Url(file.url);
304+
zip.file(file.name, stream, { binary: true });
305+
} else {
306+
// For external URLs, use axios with streaming
307+
const response = await axios.get(file.url, {
308+
responseType: 'stream',
309+
timeout: 30000
310+
});
311+
zip.file(file.name, response.data, { binary: true });
312+
}
241313
} catch (e) {
242314
console.warn(`Failed to fetch file from ${file.url}:`, e.message);
243-
zip.file(file.name, new ArrayBuffer(0));
315+
// Add empty file on error to prevent ZIP corruption
316+
zip.file(file.name, Buffer.alloc(0));
244317
}
245318
} else {
319+
// Regular file with inline content
246320
zip.file(file.name, file.content);
247321
}
248322
}
249323

250324
async function buildZip(project, req, res) {
325+
let keepaliveInterval;
326+
251327
try {
252328
const zip = new JSZip();
253329
const currentTime = format(new Date(), 'yyyy_MM_dd_HH_mm_ss');
@@ -258,30 +334,95 @@ async function buildZip(project, req, res) {
258334
const { files } = project;
259335
const root = files.find((file) => file.name === 'root');
260336

337+
if (!root) {
338+
throw new Error('Project has no root folder');
339+
}
340+
261341
bundleExternalLibs(project);
342+
343+
// Send headers immediately to prevent gateway timeout
344+
res.writeHead(200, {
345+
'Content-Type': 'application/zip',
346+
'Content-disposition': `attachment; filename=${zipFileName}`,
347+
'Transfer-Encoding': 'chunked'
348+
});
349+
350+
// Send periodic keepalive comments to prevent gateway timeout
351+
// while we're building the file list. ZIP format allows for this.
352+
let keepaliveCounter = 0;
353+
keepaliveInterval = setInterval(() => {
354+
// Write a comment to keep connection alive without corrupting ZIP
355+
// This prevents 60s gateway timeouts during file list building
356+
if (!res.writableEnded) {
357+
res.write(Buffer.alloc(0)); // Empty write to keep connection alive
358+
keepaliveCounter++;
359+
if (keepaliveCounter % 10 === 0) {
360+
console.log(
361+
`Keepalive: Building ZIP file list (${keepaliveCounter}s elapsed)...`
362+
);
363+
}
364+
}
365+
}, 1000); // Every second
366+
367+
// Sequentially add files - this avoids parallel S3 connection storms
368+
// but still requires getting all file references before streaming begins
262369
await addFileToZip(root, files, zip);
263370

264-
const base64 = await zip.generateAsync({ type: 'base64' });
265-
const buff = Buffer.from(base64, 'base64');
371+
// Clear keepalive now that we're about to start streaming real data
372+
clearInterval(keepaliveInterval);
373+
keepaliveInterval = null;
266374

267-
// nityam Check if response was already sent (e.g., client disconnected)
268-
if (res.headersSent) {
269-
return;
270-
}
375+
// Generate ZIP stream with true end-to-end streaming
376+
// streamFiles: true means JSZip reads from our S3 streams on-demand
377+
const zipStream = zip.generateNodeStream({
378+
type: 'nodebuffer',
379+
streamFiles: true,
380+
compression: 'DEFLATE',
381+
compressionOptions: { level: 6 }
382+
});
271383

272-
res.writeHead(200, {
273-
'Content-Type': 'application/zip',
274-
'Content-disposition': `attachment; filename=${zipFileName}`
384+
// Pipe the ZIP stream to response - handles backpressure automatically
385+
zipStream.pipe(res);
386+
387+
// Handle stream errors
388+
zipStream.on('error', (err) => {
389+
console.error('Error streaming zip file:', err);
390+
if (!res.headersSent) {
391+
res.status(500).json({
392+
success: false,
393+
message: 'Failed to generate zip file. Please try again.'
394+
});
395+
} else {
396+
res.end();
397+
}
398+
});
399+
400+
// Wait for the stream to finish
401+
await new Promise((resolve, reject) => {
402+
zipStream.on('end', resolve);
403+
zipStream.on('error', reject);
404+
res.on('error', reject);
405+
res.on('close', () => {
406+
// Client disconnected
407+
reject(new Error('Client disconnected'));
408+
});
275409
});
276-
res.end(buff);
277410
} catch (err) {
278411
console.error('Error building zip file:', err);
412+
413+
// Clean up keepalive if still running
414+
if (keepaliveInterval) {
415+
clearInterval(keepaliveInterval);
416+
}
417+
279418
// Only send error if response hasn't been sent yet
280419
if (!res.headersSent) {
281420
res.status(500).json({
282421
success: false,
283422
message: 'Failed to generate zip file. Please try again.'
284423
});
424+
} else {
425+
res.end();
285426
}
286427
}
287428
}

0 commit comments

Comments
 (0)