From 888a7a3ea7a0bac20b82102616c5116b44743d91 Mon Sep 17 00:00:00 2001 From: Leif Henriksen Date: Wed, 3 Jun 2026 13:13:40 +0200 Subject: [PATCH 1/8] CLDSRV-908: add getCopyObjectChecksumAlgorithm helper --- .../apiUtils/integrity/validateChecksums.js | 36 ++++++++ .../apiUtils/integrity/validateChecksums.js | 87 +++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/lib/api/apiUtils/integrity/validateChecksums.js b/lib/api/apiUtils/integrity/validateChecksums.js index f14f6bd4f9..ee9f13b9c0 100644 --- a/lib/api/apiUtils/integrity/validateChecksums.js +++ b/lib/api/apiUtils/integrity/validateChecksums.js @@ -39,6 +39,13 @@ const errMPUTypeWithoutAlgo = errorInstances.InvalidRequest.customizeDescription 'The x-amz-checksum-type header can only be used ' + 'with the x-amz-checksum-algorithm header.', ); +// TODO(S3C-11278): Update with 'MD5', 'SHA512', 'XXHASH128', 'XXHASH3', 'XXHASH64' when they are introduced. +// https://scality.atlassian.net/browse/S3C-11278 +const errCopyChecksumAlgoNotSupported = errorInstances.InvalidRequest.customizeDescription( + 'Checksum algorithm provided is unsupported. Please try again with any of the valid types: ' + + '[CRC32, CRC32C, CRC64NVME, SHA1, SHA256]', +); + const checksumedMethods = Object.freeze({ completeMultipartUpload: true, multiObjectDelete: true, @@ -81,6 +88,7 @@ const ChecksumError = Object.freeze({ MPUTypeInvalid: 'MPUTypeInvalid', MPUTypeWithoutAlgo: 'MPUTypeWithoutAlgo', MPUInvalidCombination: 'MPUInvalidCombination', + CopyChecksumAlgoNotSupported: 'CopyChecksumAlgoNotSupported', }); const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/; @@ -401,6 +409,8 @@ function arsenalErrorFromChecksumError(err) { `The ${err.details.type} checksum type cannot be used ` + `with the ${err.details.algorithm.toUpperCase()} checksum algorithm.`, ); + case ChecksumError.CopyChecksumAlgoNotSupported: + return errCopyChecksumAlgoNotSupported; default: return ArsenalErrors.BadDigest; } @@ -585,6 +595,31 @@ function computeFullObjectMPUChecksum(algorithm, parts) { return { checksum: combinePartCrcs(parts, params.polyReversed, params.dim), error: null }; } +/** + * Read and validate the x-amz-checksum-algorithm header for CopyObject. + * + * @param {object} headers - request headers + * @returns {{ error: null, algorithm: string|null } + * | { error: { error: string, details: { algorithm: string } }, algorithm: null }} + */ +function getCopyObjectChecksumAlgorithm(headers) { + const requested = headers['x-amz-checksum-algorithm']; + if (requested === undefined) { + return { error: null, algorithm: null }; + } + const algorithm = requested.toLowerCase(); + if (!algorithms[algorithm]) { + return { + error: { + error: ChecksumError.CopyChecksumAlgoNotSupported, + details: { algorithm: requested }, + }, + algorithm: null, + }; + } + return { error: null, algorithm }; +} + module.exports = { ChecksumError, defaultChecksumData, @@ -597,4 +632,5 @@ module.exports = { getChecksumDataFromMPUHeaders, computeCompositeMPUChecksum, computeFullObjectMPUChecksum, + getCopyObjectChecksumAlgorithm, }; diff --git a/tests/unit/api/apiUtils/integrity/validateChecksums.js b/tests/unit/api/apiUtils/integrity/validateChecksums.js index 01bebac4a5..ce773ffcce 100644 --- a/tests/unit/api/apiUtils/integrity/validateChecksums.js +++ b/tests/unit/api/apiUtils/integrity/validateChecksums.js @@ -10,6 +10,7 @@ const { getChecksumDataFromHeaders, arsenalErrorFromChecksumError, getChecksumDataFromMPUHeaders, + getCopyObjectChecksumAlgorithm, } = require('../../../../../lib/api/apiUtils/integrity/validateChecksums'); const { errors: ArsenalErrors } = require('arsenal'); const { config } = require('../../../../../lib/Config'); @@ -930,3 +931,89 @@ describe('getChecksumDataFromMPUHeaders', () => { } }); }); + +describe('getCopyObjectChecksumAlgorithm', () => { + it('should return algorithm null and no error when the header is absent', () => { + const result = getCopyObjectChecksumAlgorithm({}); + assert.strictEqual(result.error, null); + assert.strictEqual(result.algorithm, null); + }); + + it('should ignore unrelated headers', () => { + const result = getCopyObjectChecksumAlgorithm({ + 'content-type': 'application/octet-stream', + host: 'example.com', + }); + assert.strictEqual(result.error, null); + assert.strictEqual(result.algorithm, null); + }); + + const validAlgorithms = [ + ['CRC32', 'crc32'], + ['CRC32C', 'crc32c'], + ['CRC64NVME', 'crc64nvme'], + ['SHA1', 'sha1'], + ['SHA256', 'sha256'], + ]; + + for (const [header, normalized] of validAlgorithms) { + it(`should return algorithm '${normalized}' for header '${header}'`, () => { + const result = getCopyObjectChecksumAlgorithm({ + 'x-amz-checksum-algorithm': header, + }); + assert.strictEqual(result.error, null); + assert.strictEqual(result.algorithm, normalized); + }); + } + + it('should lowercase mixed-case input', () => { + const result = getCopyObjectChecksumAlgorithm({ + 'x-amz-checksum-algorithm': 'Sha256', + }); + assert.strictEqual(result.error, null); + assert.strictEqual(result.algorithm, 'sha256'); + }); + + it('should accept algorithms already in lowercase', () => { + const result = getCopyObjectChecksumAlgorithm({ + 'x-amz-checksum-algorithm': 'crc32', + }); + assert.strictEqual(result.error, null); + assert.strictEqual(result.algorithm, 'crc32'); + }); + + it('should return CopyChecksumAlgoNotSupported for an unknown algorithm', () => { + const result = getCopyObjectChecksumAlgorithm({ + 'x-amz-checksum-algorithm': 'GARBAGE', + }); + assert.strictEqual(result.algorithm, null); + assert(result.error); + assert.strictEqual(result.error.error, + ChecksumError.CopyChecksumAlgoNotSupported); + assert.strictEqual(result.error.details.algorithm, 'GARBAGE'); + }); + + it('should reject algorithms AWS may know about but cloudserver does not support', () => { + // AWS error message lists MD5/SHA512/XXHASH* as known names; we only + // accept the five FULL_OBJECT-capable algorithms. + for (const algo of ['MD5', 'SHA512', 'XXHASH128', 'XXHASH3', 'XXHASH64']) { + const result = getCopyObjectChecksumAlgorithm({ + 'x-amz-checksum-algorithm': algo, + }); + assert.strictEqual(result.algorithm, null, + `expected null algorithm for ${algo}`); + assert.strictEqual(result.error.error, + ChecksumError.CopyChecksumAlgoNotSupported); + } + }); + + it('should produce an arsenal InvalidRequest error via the standard mapping', () => { + const result = getCopyObjectChecksumAlgorithm({ + 'x-amz-checksum-algorithm': 'GARBAGE', + }); + const err = arsenalErrorFromChecksumError(result.error); + assert.strictEqual(err.is.InvalidRequest, true); + assert(err.description.includes('Checksum algorithm provided is unsupported'), + 'expected AWS-shaped error description'); + }); +}); From f3c3ddfd962059a6108bbd5591b4e92e8e3e2233 Mon Sep 17 00:00:00 2001 From: Leif Henriksen Date: Wed, 3 Jun 2026 15:59:47 +0200 Subject: [PATCH 2/8] CLDSRV-908: detect orphans by data-location key in deleteExistingData --- lib/api/objectCopy.js | 36 +++++- tests/unit/api/objectCopy.js | 214 +++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+), 6 deletions(-) diff --git a/lib/api/objectCopy.js b/lib/api/objectCopy.js index 02b0a31d5b..bedff1856d 100644 --- a/lib/api/objectCopy.js +++ b/lib/api/objectCopy.js @@ -35,6 +35,31 @@ const versioningNotImplBackends = constants.versioningNotImplBackends; const externalVersioningErrorMessage = 'We do not currently support putting ' + 'a versioned object to a location-constraint of type AWS or Azure or GCP.'; +/** + * Compute the prior data locations that are orphaned. + * + * @param {Array} dataToDelete - prior data locations from versioningPreprocessing + * @param {Array} newDataGetInfo - new data locations stored in the new metadata + * @returns {Array|null} orphaned entries to batchDelete, or null if none + */ +function _orphanedDataLocations(dataToDelete, newDataGetInfo) { + // Identity is (dataStoreName, key): the same key under a different + // dataStoreName points to a different backend slot, so the old slot + // is an orphan even when the key string collides. dataStoreName is a + // location-constraint name (never contains '|'), so the delimiter is + // unambiguous. + + // We need to normalize to support old legacy location names. + const normalize = loc => (typeof loc === 'string' ? { key: loc } : loc); + const locId = loc => { + const n = normalize(loc); + return `${n.dataStoreName}|${n.key}`; + }; + const newIds = new Set((newDataGetInfo || []).map(locId)); + const orphans = (dataToDelete || []).filter(loc => !newIds.has(locId(loc))); + return orphans.length > 0 ? orphans : null; +} + /** * Preps metadata to be saved (based on copy or replace request header) * @param {object} request - request @@ -547,7 +572,8 @@ function objectCopy(authInfo, request, sourceBucket, if (options.extraMD) { Object.assign(storeMetadataParams, options.extraMD); } - const dataToDelete = options.dataToDelete; + const dataToDelete = _orphanedDataLocations( + options.dataToDelete, destDataGetInfoArr); return next(null, storeMetadataParams, destDataGetInfoArr, destObjMD, serverSideEncryption, destBucketMD, dataToDelete); @@ -589,11 +615,7 @@ function objectCopy(authInfo, request, sourceBucket, function deleteExistingData(dataToDelete, storingNewMdResult, destBucketMD, storeMetadataParams, serverSideEncryption, sourceObjSize, destObjPrevSize, next) { - // Clean up any potential orphans in data if object - // put is an overwrite of already existing - // object with same name, so long as the source is not - // the same as the destination - if (!sourceIsDestination && dataToDelete) { + if (dataToDelete) { const newDataStoreName = storeMetadataParams.dataStoreName; return data.batchDelete(dataToDelete, request.method, newDataStoreName, log, err => { @@ -688,3 +710,5 @@ function objectCopy(authInfo, request, sourceBucket, } module.exports = objectCopy; +// Exposed for unit testing only. +module.exports._orphanedDataLocations = _orphanedDataLocations; diff --git a/tests/unit/api/objectCopy.js b/tests/unit/api/objectCopy.js index 2448f3276f..4600e755fd 100644 --- a/tests/unit/api/objectCopy.js +++ b/tests/unit/api/objectCopy.js @@ -640,3 +640,217 @@ describe('objectCopy with objectKeyByteLimit', () => { }); }); }); + +describe('objectCopy data orphan cleanup on cross-backend copy-to-self', () => { + const prevConfigBackendsData = data.config.backends.data; + const prevConfigLocationConstraint1 = data.config.locationConstraints['us-east-1'].type; + const prevConfigLocationConstraint2 = data.config.locationConstraints['us-east-2'].type; + + before(() => { + data.config.backends.data = 'multiple'; + data.config.locationConstraints['us-east-1'].type = 'aws_s3'; + data.config.locationConstraints['us-east-2'].type = 'aws_s3'; + }); + + after(() => { + data.config.backends.data = prevConfigBackendsData; + data.config.locationConstraints['us-east-1'].type = prevConfigLocationConstraint1; + data.config.locationConstraints['us-east-2'].type = prevConfigLocationConstraint2; + }); + + beforeEach(done => { + cleanup(); + async.series([ + next => bucketPut(authInfo, putSourceBucketRequest, log, next), + next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest( + sourceBucketName, objectKey, objData[0]), undefined, log, next), + ], done); + }); + + afterEach(() => cleanup()); + + it('should reclaim the old location on copy-to-self that lands at a different backend key', done => { + // Copy-to-self where the new metadata points to a different data key + // than the source did (cross-backend rewrite via x-amz-meta-scal- + // location-constraint). The old key is no longer referenced and must + // be batchDeleted — guards against a pre-existing orphan bug. + const batchDeleteSpy = sinon.spy(data, 'batchDelete'); + const copyObjectStub = sinon.stub(data, 'copyObject').callsFake( + (req, srcLoc, sMP, dataLocator, ctx, backendInfo, srcBM, dstBM, sse, l, cb) => + cb(null, [{ key: 'new-backend-key', dataStoreName: 'us-east-2', size: objData[0].length, start: 0 }])); + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, srcMd) => { + assert.ifError(err); + const oldKeys = srcMd.location.map(l => l.key); + const req = new DummyRequest({ + bucketName: sourceBucketName, + namespace, + objectKey, + headers: { + 'x-amz-metadata-directive': 'REPLACE', + 'x-amz-meta-scal-location-constraint': 'us-east-2', + }, + url: `/${sourceBucketName}/${objectKey}`, + socket: {}, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => { + batchDeleteSpy.restore(); + copyObjectStub.restore(); + assert.ifError(err); + assert(batchDeleteSpy.calledOnce, + 'data.batchDelete should reclaim the old data location'); + const reclaimed = batchDeleteSpy.firstCall.args[0]; + const reclaimedKeys = reclaimed.map(l => l.key); + assert.deepStrictEqual(reclaimedKeys, oldKeys, + 'batchDelete should target the old source key, not the new backend key'); + done(); + }); + }); + }); + + it('should reclaim the old location when the new backend key collides with the old one', done => { + // bucketMatch-style external backends derive the backend key from the + // S3 object key, so a copy-to-self that changes dataStoreName lands at + // the same `key` in a different backend. Identity must include + // dataStoreName or the old slot is silently leaked. + const batchDeleteSpy = sinon.spy(data, 'batchDelete'); + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, srcMd) => { + assert.ifError(err); + const oldLoc = srcMd.location[0]; + const copyObjectStub = sinon.stub(data, 'copyObject').callsFake( + (req, srcLoc, sMP, dataLocator, ctx, backendInfo, srcBM, dstBM, sse, l, cb) => + cb(null, [{ key: oldLoc.key, dataStoreName: 'us-east-2', + size: objData[0].length, start: 0 }])); + const req = new DummyRequest({ + bucketName: sourceBucketName, + namespace, + objectKey, + headers: { + 'x-amz-metadata-directive': 'REPLACE', + 'x-amz-meta-scal-location-constraint': 'us-east-2', + }, + url: `/${sourceBucketName}/${objectKey}`, + socket: {}, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => { + batchDeleteSpy.restore(); + copyObjectStub.restore(); + assert.ifError(err); + assert(batchDeleteSpy.calledOnce, + 'data.batchDelete should reclaim the old slot even when the backend key matches'); + const reclaimed = batchDeleteSpy.firstCall.args[0]; + assert.strictEqual(reclaimed.length, 1); + assert.strictEqual(reclaimed[0].dataStoreName, oldLoc.dataStoreName, + 'batchDelete must target the old dataStoreName, not the new one'); + assert.strictEqual(reclaimed[0].key, oldLoc.key); + done(); + }); + }); + }); +}); + +describe('objectCopy legacy string-location copy-to-self', () => { + beforeEach(done => { + cleanup(); + async.series([ + next => bucketPut(authInfo, putSourceBucketRequest, log, next), + next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest( + sourceBucketName, objectKey, objData[0]), undefined, log, next), + ], done); + }); + + afterEach(() => cleanup()); + + it('should not delete the reused location on copy-to-self with a legacy string location', done => { + // Pre-md-model-version-2 objects store `location` as a bare string; + // versioningPreprocessing then surfaces dataToDelete as a string array + // while the reused new locator is { key }. The orphan filter must + // normalize both or it deletes the live data on copy-to-self. + const batchDeleteSpy = sinon.spy(data, 'batchDelete'); + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => { + assert.ifError(err); + const legacyKey = String(md.location[0].key); + // eslint-disable-next-line no-param-reassign + md.location = legacyKey; + // FULL_OBJECT checksum + no algo header keeps us on the propagate + // (metadata-only reuse) path, so no data.get on the string key. + // eslint-disable-next-line no-param-reassign + md.checksum = { checksumAlgorithm: 'crc32', checksumValue: 'AAAAAA==', checksumType: 'FULL_OBJECT' }; + metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, err => { + assert.ifError(err); + const req = new DummyRequest({ + bucketName: sourceBucketName, + namespace, + objectKey, + headers: { 'x-amz-metadata-directive': 'REPLACE' }, + url: `/${sourceBucketName}/${objectKey}`, + socket: {}, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => { + batchDeleteSpy.restore(); + assert.ifError(err); + assert(batchDeleteSpy.notCalled, + 'the reused legacy string location must not be treated as an orphan'); + done(); + }); + }); + }); + }); +}); + +describe('_orphanedDataLocations', () => { + const orphanedDataLocations = objectCopy._orphanedDataLocations; + + it('should return null when there is nothing to delete', () => { + const newLocs = [{ dataStoreName: 'l1', key: 'a' }]; + assert.strictEqual(orphanedDataLocations(undefined, newLocs), null); + assert.strictEqual(orphanedDataLocations(null, newLocs), null); + assert.strictEqual(orphanedDataLocations([], newLocs), null); + }); + + it('should return null when every prior location is still referenced', () => { + const locs = [ + { dataStoreName: 'l1', key: 'a' }, + { dataStoreName: 'l1', key: 'b' }, + ]; + assert.strictEqual(orphanedDataLocations(locs, locs), null); + }); + + it('should flag a prior location whose key is no longer referenced', () => { + const oldLocs = [{ dataStoreName: 'l1', key: 'a' }]; + const newLocs = [{ dataStoreName: 'l1', key: 'b' }]; + assert.deepStrictEqual(orphanedDataLocations(oldLocs, newLocs), oldLocs); + }); + + it('should treat the same key under a different dataStoreName as an orphan', () => { + const oldLocs = [{ dataStoreName: 'us-east-1', key: 'a' }]; + const newLocs = [{ dataStoreName: 'us-east-2', key: 'a' }]; + assert.deepStrictEqual(orphanedDataLocations(oldLocs, newLocs), oldLocs); + }); + + it('should return only the subset of prior locations that are orphaned', () => { + const reused = { dataStoreName: 'l1', key: 'keep' }; + const orphan = { dataStoreName: 'l1', key: 'gone' }; + assert.deepStrictEqual( + orphanedDataLocations([reused, orphan], [reused]), [orphan]); + }); + + it('should not flag a key referenced by any of several new locations', () => { + const oldLocs = [{ dataStoreName: 'l1', key: 'a' }]; + const newLocs = [ + { dataStoreName: 'l1', key: 'x' }, + { dataStoreName: 'l1', key: 'a' }, + ]; + assert.strictEqual(orphanedDataLocations(oldLocs, newLocs), null); + }); + + it('should normalize a reused legacy string location (not an orphan)', () => { + // pre-md-model-version-2 string location reused as { key } by goGetData + assert.strictEqual( + orphanedDataLocations(['legacyKey'], [{ key: 'legacyKey' }]), null); + }); + + it('should flag a legacy string location that is no longer referenced', () => { + assert.deepStrictEqual( + orphanedDataLocations(['oldKey'], [{ key: 'newKey' }]), ['oldKey']); + }); +}); From 5d7ec7f157213234beb3412fa98dcb58a9585afc Mon Sep 17 00:00:00 2001 From: Leif Henriksen Date: Wed, 27 May 2026 18:43:04 +0200 Subject: [PATCH 3/8] CLDSRV-908: handle checksums on CopyObject --- lib/api/objectCopy.js | 278 ++++++++++++++++-- tests/unit/api/objectCopy.js | 538 +++++++++++++++++++++++++++++++++++ 2 files changed, 793 insertions(+), 23 deletions(-) diff --git a/lib/api/objectCopy.js b/lib/api/objectCopy.js index bedff1856d..e2b074d5af 100644 --- a/lib/api/objectCopy.js +++ b/lib/api/objectCopy.js @@ -1,6 +1,8 @@ const async = require('async'); +const { PassThrough } = require('stream'); -const { errors, errorInstances, versioning, s3middleware, s3routes } = require('arsenal'); +const { errors, errorInstances, jsutil, versioning, s3middleware, s3routes, storage } = require('arsenal'); +const { externalBackendCopy } = storage.data.external.backendUtils; const { validateObjectKeyLength } = s3routes.routesUtils; const getMetaHeaders = s3middleware.userMetadata.getMetaHeaders; const validateHeaders = s3middleware.validateConditionalHeaders; @@ -28,6 +30,10 @@ const { verifyColdObjectAvailable } = require('./apiUtils/object/coldStorage'); const { setSSEHeaders } = require('./apiUtils/object/sseHeaders'); const { updateEncryption } = require('./apiUtils/bucket/updateEncryption'); const { initializeInternalLogRequestQueue, queueInternalLogRequest } = require('../utilities/serverAccessLogger'); +const { algorithms, arsenalErrorFromChecksumError, getCopyObjectChecksumAlgorithm } = + require('./apiUtils/integrity/validateChecksums'); +const ChecksumTransform = require('../auth/streamingV4/ChecksumTransform'); +const kms = require('../kms/wrapper'); const versionIdUtils = versioning.VersionID; const locationHeader = constants.objectLocationConstraintHeader; @@ -60,6 +66,191 @@ function _orphanedDataLocations(dataToDelete, newDataGetInfo) { return orphans.length > 0 ? orphans : null; } +/** + * Concatenate the source object's parts into a single Readable stream by + * reading them sequentially through `data.get`. Returns the PassThrough + * immediately; consumers should pipe it to the next stage and observe + * `error` on this stream for any read failure along the way. + * + * @param {Array} dataLocator - ordered source parts + * @param {object} log - request logger + * @return {PassThrough} + */ +function _pipeSourcePartsThrough(dataLocator, log) { + const passthrough = new PassThrough(); + async.eachSeries(dataLocator, (part, cb) => { + data.get(part, null, log, (err, partStream) => { + if (err) { + return cb(err); + } + const done = jsutil.once(cb); + partStream.once('error', done); + partStream.once('end', () => done()); + partStream.pipe(passthrough, { end: false }); + return undefined; + }); + }, err => { + if (err) { + passthrough.destroy(err); + } else { + passthrough.end(); + } + }); + return passthrough; +} + +/** + * Decide whether the destination's checksum needs to be recomputed by + * streaming the source bytes through a ChecksumTransform. + * + * @param {object} headers - request headers + * @param {object} sourceObjMD - source object metadata + * @returns {boolean} + */ +function _shouldRecomputeChecksum(headers, sourceObjMD) { + const requestedAlgo = headers['x-amz-checksum-algorithm']?.toLowerCase(); + if (sourceObjMD.checksum?.checksumType === 'FULL_OBJECT' + && (!requestedAlgo || requestedAlgo === sourceObjMD.checksum.checksumAlgorithm)) { + return false; + } + return true; +} + +/** + * Pick the checksum algorithm to write on CopyObject. Defaults to CRC64NVME when the source object has no checksum. + * + * @param {string|null} requestedAlgo - validated request algorithm + * @param {object} sourceObjMD - source object metadata + * @returns {string} + */ +function _getCopyObjectChecksumAlgorithm(requestedAlgo, sourceObjMD) { + return requestedAlgo || sourceObjMD.checksum?.checksumAlgorithm || 'crc64nvme'; +} + +/** + * Recompute the destination's checksum by streaming the source bytes + * through a ChecksumTransform. When the bytes don't have to move + * (copy-to-self, same location, no SSE, not versioned) the source is GET'ed + * solely to compute the new digest; the bytes are then discarded and the + * existing data location is reused — only cloudserver's metadata is updated. + * + * @param {object} log - request logger + * @param {object} request - HTTP request + * @param {string|null} requestedAlgo - client-requested algorithm, lowercase + * @param {object} sourceObjMD - source object metadata + * @param {Array} dataLocator - ordered source parts + * @param {boolean} sourceIsDestination - source key == destination key + * @param {boolean} isVersionedObj - destination bucket has versioning enabled + * @param {boolean} needsEncryption - destination requires SSE + * @param {object} storeMetadataParams - mutable carrier for destination metadata + * @param {object} destObjMD - existing destination object metadata + * @param {object} destBucketMD - destination bucket metadata + * @param {object} serverSideEncryption - SSE config for the destination + * @param {object} dataStoreContext - context passed to data.put + * @param {object} backendInfoDest - backend info for the destination + * @param {function} next - waterfall callback + * @return {undefined} + */ +function _recomputeChecksumAndStore(log, request, requestedAlgo, sourceObjMD, dataLocator, + sourceIsDestination, isVersionedObj, needsEncryption, + storeMetadataParams, destObjMD, destBucketMD, serverSideEncryption, + dataStoreContext, backendInfoDest, next) { + const algoName = _getCopyObjectChecksumAlgorithm(requestedAlgo, sourceObjMD); + + // Copy-to-self where the bytes don't have to move (same location, no SSE + // change, not versioned): GET the source through a ChecksumTransform to + // compute the new digest, discard the bytes, and reuse the existing data + // locations. No PUT, no DELETE — only the metadata gets updated. + if (sourceIsDestination && storeMetadataParams.locationMatch + && !needsEncryption && !isVersionedObj) { + log.debug('computing checksum on copy-to-self without rewriting data', + { algorithm: algoName, size: storeMetadataParams.size }); + const sourceStream = _pipeSourcePartsThrough(dataLocator, log); + const checksumStream = new ChecksumTransform(algoName, undefined, false, log); + const finish = jsutil.once(err => { + if (err) { + sourceStream.destroy(err); + checksumStream.destroy(err); + return next(err, destBucketMD); + } + // eslint-disable-next-line no-param-reassign + storeMetadataParams.checksum = { + algorithm: algoName, + value: checksumStream.digest, + type: 'FULL_OBJECT', + }; + return next(null, storeMetadataParams, dataLocator, destObjMD, + serverSideEncryption, destBucketMD); + }); + sourceStream.once('error', finish); + checksumStream.once('error', finish); + checksumStream.on('data', () => {}); + checksumStream.once('end', () => finish()); + sourceStream.pipe(checksumStream); + return undefined; + } + + // Stream source bytes through a ChecksumTransform and write them out as a single put. + log.debug('recomputing checksum on CopyObject', { algorithm: algoName, size: storeMetadataParams.size }); + const sourceStream = _pipeSourcePartsThrough(dataLocator, log); + const checksumStream = new ChecksumTransform(algoName, undefined, false, log); + const done = jsutil.once((err, results) => { + if (err) { + sourceStream.destroy(err); + checksumStream.destroy(err); + if (request.sourceServerAccessLog) { + // eslint-disable-next-line no-param-reassign + request.sourceServerAccessLog.error = err; + } + return next(err, destBucketMD); + } + return next(null, storeMetadataParams, results, destObjMD, serverSideEncryption, destBucketMD); + }); + sourceStream.once('error', done); + checksumStream.once('error', done); + sourceStream.pipe(checksumStream); + const doPut = cipherBundle => data.put( + cipherBundle, checksumStream, storeMetadataParams.size, + dataStoreContext, backendInfoDest, log, + (err, dataRetrievalInfo) => { + if (err) { + return done(err); + } + // eslint-disable-next-line no-param-reassign + storeMetadataParams.checksum = { + algorithm: algoName, + value: checksumStream.digest, + type: 'FULL_OBJECT', + }; + const putResult = { + key: dataRetrievalInfo.key, + size: storeMetadataParams.size, + start: 0, + dataStoreName: dataRetrievalInfo.dataStoreName, + dataStoreType: dataRetrievalInfo.dataStoreType, + dataStoreETag: dataRetrievalInfo.dataStoreETag, + dataStoreVersionId: + dataRetrievalInfo.dataStoreVersionId, + }; + if (cipherBundle) { + putResult.cryptoScheme = cipherBundle.cryptoScheme; + putResult.cipheredDataKey = + cipherBundle.cipheredDataKey; + } + return done(null, [putResult]); + }); + if (serverSideEncryption?.algorithm) { + return kms.createCipherBundle(serverSideEncryption, log, + (err, cipherBundle) => { + if (err) { + return done(err); + } + return doPut(cipherBundle); + }); + } + return doPut(null); +} + /** * Preps metadata to be saved (based on copy or replace request header) * @param {object} request - request @@ -213,6 +404,14 @@ function _prepMetadata(request, sourceObjMD, headers, sourceIsDestination, storeMetadataParams.defaultRetention = defaultRetentionConfig; } + if (sourceObjMD.checksum && !_shouldRecomputeChecksum(headers, sourceObjMD)) { + storeMetadataParams.checksum = { + algorithm: sourceObjMD.checksum.checksumAlgorithm, + value: sourceObjMD.checksum.checksumValue, + type: sourceObjMD.checksum.checksumType, + }; + } + // In case whichMetadata === 'REPLACE' but contentType is undefined in copy // request headers, make sure to keep the original header instead if (!storeMetadataParams.contentType) { @@ -303,6 +502,13 @@ function objectCopy(authInfo, request, sourceBucket, 'PUT', destBucketName, err.code, 'copyObject'); return callback(err); } + const { error: checksumAlgoErr, algorithm: requestedAlgo } = getCopyObjectChecksumAlgorithm(request.headers); + if (checksumAlgoErr) { + const err = arsenalErrorFromChecksumError(checksumAlgoErr); + log.debug('invalid x-amz-checksum-algorithm', { error: checksumAlgoErr }); + monitoring.promMetrics('PUT', destBucketName, err.code, 'copyObject'); + return callback(err); + } const queryContainsVersionId = checkQueryVersionId(request.query); if (queryContainsVersionId instanceof Error) { return callback(queryContainsVersionId); @@ -442,12 +648,12 @@ function objectCopy(authInfo, request, sourceBucket, return next(null, storeMetadataParams, dataLocator, sourceBucketMD, destBucketMD, destObjMD, - sourceLocationConstraintName, backendInfoDest); + sourceLocationConstraintName, backendInfoDest, sourceObjMD); }); }, function getSSEConfiguration(storeMetadataParams, dataLocator, sourceBucketMD, destBucketMD, destObjMD, sourceLocationConstraintName, - backendInfoDest, next) { + backendInfoDest, sourceObjMD, next) { getObjectSSEConfiguration( request.headers, destBucketMD, @@ -455,22 +661,36 @@ function objectCopy(authInfo, request, sourceBucket, (err, sseConfig) => next(err, storeMetadataParams, dataLocator, sourceBucketMD, destBucketMD, destObjMD, sourceLocationConstraintName, - backendInfoDest, sseConfig)); + backendInfoDest, sseConfig, sourceObjMD)); }, function goGetData(storeMetadataParams, dataLocator, sourceBucketMD, destBucketMD, destObjMD, sourceLocationConstraintName, - backendInfoDest, serverSideEncryption, next) { + backendInfoDest, serverSideEncryption, sourceObjMD, next) { const vcfg = destBucketMD.getVersioningConfiguration(); const isVersionedObj = vcfg && vcfg.Status === 'Enabled'; const destLocationConstraintName = storeMetadataParams.dataStoreName; const needsEncryption = serverSideEncryption && !!serverSideEncryption.algo; + const shouldRecomputeChecksum = _shouldRecomputeChecksum(request.headers, sourceObjMD); + let destLocationConstraintType; + if (config.backends.data === 'multiple') { + destLocationConstraintType = config.getLocationConstraintType(destLocationConstraintName); + } + // Skip recompute whenever the copy is a native server-side copy. The + // bytes never transit CloudServer, so we keep the native copy and + // write no checksum rather than forcing a GET + PUT just to hash. + const willUseNativeServerSideCopy = config.backends.data === 'multiple' + && !serverSideEncryption + && externalBackendCopy(config, sourceLocationConstraintName, + destLocationConstraintName, sourceBucketMD, destBucketMD); + const skipExternalBackendRecompute = shouldRecomputeChecksum + && willUseNativeServerSideCopy; // skip if source and dest and location constraint the same and - // versioning is not enabled - // still send along serverSideEncryption info so algo - // and masterKeyId stored properly in metadata + // versioning is not enabled, unless we need to recompute a + // checksum (which requires streaming the bytes through us). if (sourceIsDestination && storeMetadataParams.locationMatch - && !isVersionedObj && !needsEncryption) { + && !isVersionedObj && !needsEncryption + && (!shouldRecomputeChecksum || skipExternalBackendRecompute)) { return next(null, storeMetadataParams, dataLocator, destObjMD, serverSideEncryption, destBucketMD); } @@ -478,11 +698,6 @@ function objectCopy(authInfo, request, sourceBucket, // also skip if 0 byte object, unless location constraint is an // external backend and differs from source, in which case put // metadata to backend - let destLocationConstraintType; - if (config.backends.data === 'multiple') { - destLocationConstraintType = - config.getLocationConstraintType(destLocationConstraintName); - } if (destLocationConstraintType && versioningNotImplBackends[destLocationConstraintType] && isVersionedObj) { @@ -493,9 +708,8 @@ function objectCopy(authInfo, request, sourceBucket, externalVersioningErrorMessage), destBucketMD); } if (dataLocator.length === 0) { - if (!storeMetadataParams.locationMatch && - destLocationConstraintType && - constants.externalBackends[destLocationConstraintType]) { + if (!storeMetadataParams.locationMatch && destLocationConstraintType + && constants.externalBackends[destLocationConstraintType]) { return data.put(null, null, storeMetadataParams.size, dataStoreContext, backendInfoDest, log, (error, objectRetrievalInfo) => { @@ -504,20 +718,27 @@ function objectCopy(authInfo, request, sourceBucket, } const putResult = { key: objectRetrievalInfo.key, - dataStoreName: objectRetrievalInfo. - dataStoreName, - dataStoreType: objectRetrievalInfo. - dataStoreType, + dataStoreName: objectRetrievalInfo.dataStoreName, + dataStoreType: objectRetrievalInfo.dataStoreType, size: storeMetadataParams.size, }; - const putResultArr = [putResult]; - return next(null, storeMetadataParams, putResultArr, + return next(null, storeMetadataParams, [putResult], destObjMD, serverSideEncryption, destBucketMD); }); } return next(null, storeMetadataParams, dataLocator, destObjMD, serverSideEncryption, destBucketMD); } + if (shouldRecomputeChecksum && skipExternalBackendRecompute) { + log.debug('skipping checksum recompute on same external backend', + { source: sourceLocationConstraintName, dest: destLocationConstraintName }); + } + if (shouldRecomputeChecksum && !skipExternalBackendRecompute) { + return _recomputeChecksumAndStore(log, request, requestedAlgo, sourceObjMD, dataLocator, + sourceIsDestination, isVersionedObj, needsEncryption, + storeMetadataParams, destObjMD, destBucketMD, serverSideEncryption, + dataStoreContext, backendInfoDest, next); + } const originalIdentityImpDenies = request.actionImplicitDenies; // eslint-disable-next-line no-param-reassign delete request.actionImplicitDenies; @@ -661,12 +882,23 @@ function objectCopy(authInfo, request, sourceBucket, 'PUT', destBucketName, err.code, 'copyObject'); return callback(err, null, corsHeaders); } + let checksumXml = ''; + const checksum = storeMetadataParams?.checksum; + const checksumAlgo = checksum && algorithms[checksum.algorithm]; + if (checksum && checksumAlgo) { + checksumXml = + `<${checksumAlgo.xmlTag}>${checksum.value}` + + `${checksum.type}`; + } else if (checksum) { + log.error('unknown checksum algorithm in source object metadata', { algorithm: checksum.algorithm }); + } const xml = [ '', '', '', new Date(storeMetadataParams.lastModifiedDate) .toISOString(), '', '"', storeMetadataParams.contentMD5, '"', + checksumXml, '', ].join(''); const additionalHeaders = corsHeaders || {}; diff --git a/tests/unit/api/objectCopy.js b/tests/unit/api/objectCopy.js index 4600e755fd..0f5a9eaaaf 100644 --- a/tests/unit/api/objectCopy.js +++ b/tests/unit/api/objectCopy.js @@ -1,5 +1,6 @@ const assert = require('assert'); const async = require('async'); +const { Readable } = require('stream'); const { storage, versioning } = require('arsenal'); const sinon = require('sinon'); @@ -14,7 +15,10 @@ const { cleanup, DummyRequestLogger, makeAuthInfo, versioningTestUtils } const mpuUtils = require('../utils/mpuUtils'); const metadata = require('../metadataswitch'); const { data } = require('../../../lib/data/wrapper'); +const kms = require('../../../lib/kms/wrapper'); const { objectLocationConstraintHeader } = require('../../../constants'); +const { algorithms } = + require('../../../lib/api/apiUtils/integrity/validateChecksums'); const { fakeMetadataArchive } = require('../../functional/aws-node-sdk/test/utils/init'); const { config } = require('../../../lib/Config'); @@ -641,6 +645,540 @@ describe('objectCopy with objectKeyByteLimit', () => { }); }); +// Set or clear the source object's stored checksum directly in metadata. +function setSourceChecksum(checksum, cb) { + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => { + if (err) { + return cb(err); + } + if (checksum) { + // eslint-disable-next-line no-param-reassign + md.checksum = checksum; + } else { + // eslint-disable-next-line no-param-reassign + delete md.checksum; + } + return metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, cb); + }); +} + +describe('objectCopy checksum propagation', () => { + beforeEach(done => { + cleanup(); + async.series([ + next => bucketPut(authInfo, putDestBucketRequest, log, next), + next => bucketPut(authInfo, putSourceBucketRequest, log, next), + next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest( + sourceBucketName, objectKey, objData[0]), undefined, log, next), + ], done); + }); + + afterEach(() => cleanup()); + + const algorithmFixtures = [ + { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32', value: 'AAAAAA==' }, + { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C', value: 'AAAAAA==' }, + { algo: 'crc64nvme', header: 'CRC64NVME', xmlTag: 'ChecksumCRC64NVME', value: 'AAAAAAAAAAA=' }, + { algo: 'sha1', header: 'SHA1', xmlTag: 'ChecksumSHA1', value: 'AAAAAAAAAAAAAAAAAAAAAAAAAAA=' }, + { + algo: 'sha256', header: 'SHA256', xmlTag: 'ChecksumSHA256', + value: 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=' + }, + ]; + + algorithmFixtures.forEach(({ algo, header, xmlTag, value }) => { + const sourceChecksum = { + checksumAlgorithm: algo, + checksumValue: value, + checksumType: 'FULL_OBJECT', + }; + + function assertPropagated(xml, cb) { + assert(xml.includes(`<${xmlTag}>${value}`), `XML should contain ${xmlTag}`); + assert(xml.includes('FULL_OBJECT'), 'XML should contain ChecksumType'); + metadata.getObjectMD(destBucketName, objectKey, {}, log, + (err, md) => { + assert.ifError(err); + assert(md.checksum, 'destination should have a checksum'); + assert.strictEqual(md.checksum.checksumAlgorithm, algo); + assert.strictEqual(md.checksum.checksumValue, value); + assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT'); + cb(); + }); + } + + it(`should propagate a FULL_OBJECT ${algo} checksum from source to destination`, + done => { + setSourceChecksum(sourceChecksum, err => { + assert.ifError(err); + const req = _createObjectCopyRequest(destBucketName); + objectCopy(authInfo, req, sourceBucketName, objectKey, + undefined, log, (err, xml) => { + assert.ifError(err); + assertPropagated(xml, done); + }); + }); + }); + + it(`should propagate when x-amz-checksum-algorithm matches source ${algo} algorithm`, + done => { + setSourceChecksum(sourceChecksum, err => { + assert.ifError(err); + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': header, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, + undefined, log, (err, xml) => { + assert.ifError(err); + assertPropagated(xml, done); + }); + }); + }); + }); + + it('should default to CRC64NVME when source has no checksum and no algorithm is requested', done => { + setSourceChecksum(null, err => { + if (err) { + return done(err); + } + return Promise.resolve(algorithms.crc64nvme.digest(objData[0])).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName); + objectCopy(authInfo, req, sourceBucketName, objectKey, + undefined, log, + assertRecomputed('crc64nvme', 'ChecksumCRC64NVME', expectedDigest, done)); + }, done); + }); + }); +}); + +describe('objectCopy checksum recompute', () => { + beforeEach(done => { + cleanup(); + async.series([ + next => bucketPut(authInfo, putDestBucketRequest, log, next), + next => bucketPut(authInfo, putSourceBucketRequest, log, next), + next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest( + sourceBucketName, objectKey, objData[0]), undefined, log, next), + ], done); + }); + + afterEach(() => cleanup()); + + // Builds the objectCopy callback that asserts a successful FULL_OBJECT recompute: + // response XML and destination metadata both carry the expected algo and digest. + function assertRecomputed(algo, xmlTag, expectedDigest, done) { + return (err, xml) => { + if (err) { + return done(err); + } + try { + assert(xml.includes(`<${xmlTag}>${expectedDigest}`), + `XML should contain ${xmlTag}=${expectedDigest}`); + assert(xml.includes('FULL_OBJECT'), + 'XML should contain ChecksumType FULL_OBJECT'); + } catch (e) { + return done(e); + } + return metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => { + if (err) { + return done(err); + } + try { + assert(md.checksum, 'destination should have a checksum'); + assert.strictEqual(md.checksum.checksumAlgorithm, algo); + assert.strictEqual(md.checksum.checksumValue, expectedDigest); + assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT'); + return done(); + } catch (e) { + return done(e); + } + }); + }; + } + + const recomputeFixtures = [ + { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32' }, + { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C' }, + { algo: 'crc64nvme', header: 'CRC64NVME', xmlTag: 'ChecksumCRC64NVME' }, + { algo: 'sha1', header: 'SHA1', xmlTag: 'ChecksumSHA1' }, + { algo: 'sha256', header: 'SHA256', xmlTag: 'ChecksumSHA256' }, + ]; + + recomputeFixtures.forEach(({ algo, header, xmlTag }) => { + it(`should recompute ${algo} when x-amz-checksum-algorithm differs from source`, done => { + // Seed source with a different algorithm so the request forces a recompute. + // crc32 ↔ sha256 swap covers both pivots. + const sourceAlgo = algo === 'crc32' ? 'sha256' : 'crc32'; + const sourceValue = sourceAlgo === 'crc32' + ? 'AAAAAA==' + : '47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU='; + setSourceChecksum({ + checksumAlgorithm: sourceAlgo, + checksumValue: sourceValue, + checksumType: 'FULL_OBJECT', + }, err => { + if (err) { + return done(err); + } + return Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': header, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + assertRecomputed(algo, xmlTag, expectedDigest, done)); + }, done); + }); + }); + + // CRC64NVME cannot be used as COMPOSITE (AWS rejects it at MPU init); only test + // the COMPOSITE-source path for the four other algorithms. + if (algo !== 'crc64nvme') { + it(`should recompute ${algo} when source is COMPOSITE and no algorithm requested`, done => { + setSourceChecksum({ + checksumAlgorithm: algo, + // valid-shape placeholder; the test does not depend on the source value + // matching the body — only on the destination digest being recomputed. + checksumValue: 'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBA=', + checksumType: 'COMPOSITE', + }, err => { + if (err) { + return done(err); + } + return Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + assertRecomputed(algo, xmlTag, expectedDigest, done)); + }, done); + }); + }); + + it(`should recompute ${algo} when source is COMPOSITE and different algorithm requested`, done => { + // Force a recompute by both source-type (COMPOSITE) and algo mismatch. + // Seed source with sha256 COMPOSITE so the requested algo always differs. + const sourceAlgo = algo === 'sha256' ? 'sha1' : 'sha256'; + const sourceValue = sourceAlgo === 'sha1' + ? 'AAAAAAAAAAAAAAAAAAAAAAAAAAA=' + : '47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU='; + setSourceChecksum({ + checksumAlgorithm: sourceAlgo, + checksumValue: sourceValue, + checksumType: 'COMPOSITE', + }, err => { + if (err) { + return done(err); + } + return Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': header, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + assertRecomputed(algo, xmlTag, expectedDigest, done)); + }, done); + }); + }); + } + + it(`should compute ${algo} when source has no checksum and algorithm is requested`, done => { + setSourceChecksum(null, err => { + if (err) { + return done(err); + } + return Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': header, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + assertRecomputed(algo, xmlTag, expectedDigest, done)); + }, done); + }); + }); + }); + + it('should recompute the checksum across all parts of a multi-part source', done => { + const partA = Buffer.from('part-a-bytes', 'utf8'); + const partB = Buffer.from('part-b-bytes-longer', 'utf8'); + const fullBody = Buffer.concat([partA, partB]); + const parts = { 'mpart-a': partA, 'mpart-b': partB }; + const dataGetStub = sinon.stub(data, 'get').callsFake((info, _response, _l, cb) => { + const buf = parts[info.key]; + return cb(null, Readable.from(buf)); + }); + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => { + assert.ifError(err); + // eslint-disable-next-line no-param-reassign + md.location = [ + { key: 'mpart-a', dataStoreName: 'us-east-1', size: partA.length, start: 0 }, + { key: 'mpart-b', dataStoreName: 'us-east-1', size: partB.length, start: partA.length }, + ]; + // eslint-disable-next-line no-param-reassign + md['content-length'] = fullBody.length; + metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, err => { + assert.ifError(err); + Promise.resolve(algorithms.sha256.digest(fullBody)).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': 'SHA256', + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + (err, xml) => { + dataGetStub.restore(); + assertRecomputed('sha256', 'ChecksumSHA256', expectedDigest, done)(err, xml); + }); + }, err => { + dataGetStub.restore(); + done(err); + }); + }); + }); + }); + + it('should recompute the checksum and encrypt the destination when SSE is requested', done => { + // Recompute path must create a cipher bundle via kms when SSE is configured. + // Confirm the cipher bundle is forwarded to data.put and the checksum is + // still over the cleartext bytes. + const cipherBundleSpy = sinon.spy(kms, 'createCipherBundle'); + const dataPutSpy = sinon.spy(data, 'put'); + Promise.resolve(algorithms.sha256.digest(objData[0])).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': 'SHA256', + 'x-amz-server-side-encryption': 'AES256', + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + assertRecomputed('sha256', 'ChecksumSHA256', expectedDigest, err => { + cipherBundleSpy.restore(); + dataPutSpy.restore(); + if (err) { + return done(err); + } + try { + assert(cipherBundleSpy.calledOnce, + 'kms.createCipherBundle should be called once'); + const sseConfig = cipherBundleSpy.firstCall.args[0]; + assert.strictEqual(sseConfig.algorithm, 'AES256'); + // The data.put call that consumed the checksum stream + // (size > 0) should have received a non-null cipherBundle. + const recomputePut = dataPutSpy.getCalls() + .find(call => call.args[1] !== null); + assert(recomputePut, + 'expected at least one data.put with a real stream'); + assert(recomputePut.args[0], + 'cipherBundle should be non-null for SSE recompute'); + return done(); + } catch (e) { + return done(e); + } + })); + }, done); + }); + + it('should reject an unknown x-amz-checksum-algorithm value with InvalidRequest', done => { + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': 'GARBAGE', + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => { + assert(err); + assert.strictEqual(err.is.InvalidRequest, true); + done(); + }); + }); + const copyToSelfFixtures = [ + { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32' }, + { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C' }, + { algo: 'crc64nvme', header: 'CRC64NVME', xmlTag: 'ChecksumCRC64NVME' }, + { algo: 'sha1', header: 'SHA1', xmlTag: 'ChecksumSHA1' }, + { algo: 'sha256', header: 'SHA256', xmlTag: 'ChecksumSHA256' }, + ]; + + copyToSelfFixtures.forEach(({ algo, header, xmlTag }) => { + it(`should compute the ${algo} checksum on copy-to-self without rewriting data`, done => { + const dataPutSpy = sinon.spy(data, 'put'); + const batchDeleteSpy = sinon.spy(data, 'batchDelete'); + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, srcMd) => { + assert.ifError(err); + const sourceLocation = srcMd.location; + Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => { + const req = new DummyRequest({ + bucketName: sourceBucketName, + namespace, + objectKey, + headers: { + 'x-amz-checksum-algorithm': header, + 'x-amz-metadata-directive': 'REPLACE', + }, + url: `/${sourceBucketName}/${objectKey}`, + socket: {}, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => { + dataPutSpy.restore(); + batchDeleteSpy.restore(); + assert.ifError(err); + assert(!dataPutSpy.called, 'data.put should NOT be called'); + assert(!batchDeleteSpy.called, 'data.batchDelete should NOT be called'); + assert(xml.includes(`<${xmlTag}>${expectedDigest}`), + 'response XML should carry the new checksum'); + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => { + assert.ifError(err); + assert.deepStrictEqual( + md.location.map(l => l.key), + sourceLocation.map(l => l.key), + 'data location keys should be reused'); + assert.strictEqual(md.checksum.checksumAlgorithm, algo); + assert.strictEqual(md.checksum.checksumValue, expectedDigest); + assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT'); + done(); + }); + }); + }, done); + }); + }); + }); + + it('should still PUT on copy-to-self when versioning is enabled (no metadata-only shortcut)', done => { + // Versioned copies produce a new version-id; the metadata-only path + // is skipped so the new version gets its own data write. + const enableVersioning = versioningTestUtils + .createBucketPutVersioningReq(sourceBucketName, 'Enabled'); + bucketPutVersioning(authInfo, enableVersioning, log, err => { + assert.ifError(err); + const dataPutSpy = sinon.spy(data, 'put'); + const req = new DummyRequest({ + bucketName: sourceBucketName, + namespace, + objectKey, + headers: { + 'x-amz-checksum-algorithm': 'SHA256', + 'x-amz-metadata-directive': 'REPLACE', + }, + url: `/${sourceBucketName}/${objectKey}`, + socket: {}, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => { + dataPutSpy.restore(); + assert.ifError(err); + assert(dataPutSpy.called, + 'data.put should be called because versioning forces a new version write'); + done(); + }); + }); + }); +}); +describe('objectCopy checksum recompute on same external backend', () => { + const prevConfigBackendsData = data.config.backends.data; + const prevConfigLocationConstraint = data.config.locationConstraints['us-east-1'].type; + const prevConfigLocationConstraint2 = data.config.locationConstraints['us-east-2'].type; + + before(() => { + data.config.backends.data = 'multiple'; + data.config.locationConstraints['us-east-1'].type = 'aws_s3'; + data.config.locationConstraints['us-east-2'].type = 'aws_s3'; + }); + + after(() => { + data.config.backends.data = prevConfigBackendsData; + data.config.locationConstraints['us-east-1'].type = prevConfigLocationConstraint; + data.config.locationConstraints['us-east-2'].type = prevConfigLocationConstraint2; + }); + + beforeEach(done => { + cleanup(); + async.series([ + next => bucketPut(authInfo, putDestBucketRequest, log, next), + next => bucketPut(authInfo, putSourceBucketRequest, log, next), + next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest( + sourceBucketName, objectKey, objData[0]), undefined, log, next), + ], done); + }); + + afterEach(() => cleanup()); + + it('should skip recompute on cross-key copy on external backend and not store a checksum', done => { + // Cross-key copy + recompute on same external backend should take the + // native server-side copy path with no checksum stored on the destination. + const copyObjectStub = sinon.stub(data, 'copyObject').callsFake( + (req, srcLoc, sMP, dataLocator, ctx, backendInfo, srcBM, dstBM, sse, l, cb) => + cb(null, dataLocator)); + const dataPutSpy = sinon.spy(data, 'put'); + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': 'SHA256', + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => { + copyObjectStub.restore(); + dataPutSpy.restore(); + assert.ifError(err); + assert(copyObjectStub.calledOnce, 'data.copyObject should be called (native path)'); + assert(!dataPutSpy.called, 'data.put should NOT be called (no stream-through)'); + assert(!xml.includes(''), + 'response XML should not carry a checksum'); + metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => { + assert.ifError(err); + assert(!md.checksum, 'destination metadata should not carry a checksum'); + done(); + }); + }); + }); + + it('should skip recompute on a cross-location copy within the same external backend type', done => { + // us-east-1 -> us-east-2, both aws_s3: even though locations differ, a + // native server-side copy is available (externalBackendCopy), so we keep + // the native path — no GET + PUT through CloudServer — and store no checksum. + const copyObjectStub = sinon.stub(data, 'copyObject').callsFake( + (req, srcLoc, sMP, dataLocator, ctx, backendInfo, srcBM, dstBM, sse, l, cb) => + cb(null, [{ key: 'new-key', dataStoreName: 'us-east-2', size: objData[0].length, start: 0 }])); + const dataPutSpy = sinon.spy(data, 'put'); + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': 'SHA256', + 'x-amz-metadata-directive': 'REPLACE', + 'x-amz-meta-scal-location-constraint': 'us-east-2', + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => { + copyObjectStub.restore(); + dataPutSpy.restore(); + assert.ifError(err); + assert(copyObjectStub.calledOnce, + 'data.copyObject should be called (native cross-location path)'); + assert(!dataPutSpy.called, + 'data.put should NOT be called (no stream-through GET + PUT)'); + assert(!xml.includes(''), + 'response XML should not carry a checksum'); + metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => { + assert.ifError(err); + assert(!md.checksum, 'destination metadata should not carry a checksum'); + done(); + }); + }); + }); + + it('should early-return on copy-to-self and not call data.copyObject or data.put', done => { + // Copy-to-self + recompute on same external backend should be a pure metadata-only operation. + const copyObjectSpy = sinon.spy(data, 'copyObject'); + const dataPutSpy = sinon.spy(data, 'put'); + const req = new DummyRequest({ + bucketName: sourceBucketName, + namespace, + objectKey, + headers: { + 'x-amz-checksum-algorithm': 'SHA256', + 'x-amz-metadata-directive': 'REPLACE', + }, + url: `/${sourceBucketName}/${objectKey}`, + socket: {}, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => { + copyObjectSpy.restore(); + dataPutSpy.restore(); + assert.ifError(err); + assert(!copyObjectSpy.called, 'data.copyObject should NOT be called'); + assert(!dataPutSpy.called, 'data.put should NOT be called'); + assert(!xml.includes(''), + 'response XML should not carry a checksum'); + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => { + assert.ifError(err); + assert(!md.checksum, 'metadata should not carry a checksum'); + done(); + }); + }); + }); +}); + describe('objectCopy data orphan cleanup on cross-backend copy-to-self', () => { const prevConfigBackendsData = data.config.backends.data; const prevConfigLocationConstraint1 = data.config.locationConstraints['us-east-1'].type; From 3fd4c9d98e1be4e80ff804b27df6c38ded9fd4db Mon Sep 17 00:00:00 2001 From: Leif Henriksen Date: Wed, 27 May 2026 21:10:01 +0200 Subject: [PATCH 4/8] CLDSRV-908: handle 0-byte source recompute on CopyObject --- lib/api/objectCopy.js | 55 ++++++++---- tests/unit/api/objectCopy.js | 164 ++++++++++++++++++++++++++++------- 2 files changed, 170 insertions(+), 49 deletions(-) diff --git a/lib/api/objectCopy.js b/lib/api/objectCopy.js index e2b074d5af..463d805487 100644 --- a/lib/api/objectCopy.js +++ b/lib/api/objectCopy.js @@ -708,26 +708,47 @@ function objectCopy(authInfo, request, sourceBucket, externalVersioningErrorMessage), destBucketMD); } if (dataLocator.length === 0) { - if (!storeMetadataParams.locationMatch && destLocationConstraintType - && constants.externalBackends[destLocationConstraintType]) { - return data.put(null, null, storeMetadataParams.size, - dataStoreContext, backendInfoDest, - log, (error, objectRetrievalInfo) => { - if (error) { - return next(error, destBucketMD); - } - const putResult = { - key: objectRetrievalInfo.key, - dataStoreName: objectRetrievalInfo.dataStoreName, - dataStoreType: objectRetrievalInfo.dataStoreType, - size: storeMetadataParams.size, + const finishZeroByte = () => { + if (!storeMetadataParams.locationMatch && destLocationConstraintType + && constants.externalBackends[destLocationConstraintType]) { + return data.put(null, null, storeMetadataParams.size, + dataStoreContext, backendInfoDest, + log, (error, objectRetrievalInfo) => { + if (error) { + return next(error, destBucketMD); + } + const putResult = { + key: objectRetrievalInfo.key, + dataStoreName: objectRetrievalInfo.dataStoreName, + dataStoreType: objectRetrievalInfo.dataStoreType, + size: storeMetadataParams.size, + }; + return next(null, storeMetadataParams, [putResult], + destObjMD, serverSideEncryption, destBucketMD); + }); + } + return next(null, storeMetadataParams, dataLocator, destObjMD, + serverSideEncryption, destBucketMD); + }; + if (shouldRecomputeChecksum) { + // No bytes to stream, but AWS still writes the empty-bytes digest of the chosen algorithm. + const algoName = _getCopyObjectChecksumAlgorithm(requestedAlgo, sourceObjMD); + return Promise.resolve(algorithms[algoName].digest(Buffer.alloc(0))) + .then(digest => { + // eslint-disable-next-line no-param-reassign + storeMetadataParams.checksum = { + algorithm: algoName, + value: digest, + type: 'FULL_OBJECT', }; - return next(null, storeMetadataParams, [putResult], - destObjMD, serverSideEncryption, destBucketMD); + return finishZeroByte(); + }, err => { + log.error('failed to compute empty checksum digest', + { algorithm: algoName, error: err }); + return next(errors.InternalError, destBucketMD); }); } - return next(null, storeMetadataParams, dataLocator, destObjMD, - serverSideEncryption, destBucketMD); + return finishZeroByte(); } if (shouldRecomputeChecksum && skipExternalBackendRecompute) { log.debug('skipping checksum recompute on same external backend', diff --git a/tests/unit/api/objectCopy.js b/tests/unit/api/objectCopy.js index 0f5a9eaaaf..bdd1c73a4b 100644 --- a/tests/unit/api/objectCopy.js +++ b/tests/unit/api/objectCopy.js @@ -662,6 +662,42 @@ function setSourceChecksum(checksum, cb) { }); } +// Truncate the source object to 0 bytes by clearing its data location and +// content-length. Lets a single beforeEach setup serve both non-empty and +// empty-source tests without rebuilding the bucket each time. +function setSourceEmptyBody(cb) { + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => { + if (err) { + return cb(err); + } + // eslint-disable-next-line no-param-reassign + md.location = null; + // eslint-disable-next-line no-param-reassign + md['content-length'] = 0; + return metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, cb); + }); +} + +// Builds the objectCopy callback that asserts a successful FULL_OBJECT recompute: +// response XML and destination metadata both carry the expected algo and digest. +function assertRecomputed(algo, xmlTag, expectedDigest, done) { + return (err, xml) => { + assert.ifError(err); + assert(xml.includes(`<${xmlTag}>${expectedDigest}`), + `XML should contain ${xmlTag}=${expectedDigest}`); + assert(xml.includes('FULL_OBJECT'), + 'XML should contain ChecksumType FULL_OBJECT'); + metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => { + assert.ifError(err); + assert(md.checksum, 'destination should have a checksum'); + assert.strictEqual(md.checksum.checksumAlgorithm, algo); + assert.strictEqual(md.checksum.checksumValue, expectedDigest); + assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT'); + done(); + }); + }; +} + describe('objectCopy checksum propagation', () => { beforeEach(done => { cleanup(); @@ -764,38 +800,6 @@ describe('objectCopy checksum recompute', () => { afterEach(() => cleanup()); - // Builds the objectCopy callback that asserts a successful FULL_OBJECT recompute: - // response XML and destination metadata both carry the expected algo and digest. - function assertRecomputed(algo, xmlTag, expectedDigest, done) { - return (err, xml) => { - if (err) { - return done(err); - } - try { - assert(xml.includes(`<${xmlTag}>${expectedDigest}`), - `XML should contain ${xmlTag}=${expectedDigest}`); - assert(xml.includes('FULL_OBJECT'), - 'XML should contain ChecksumType FULL_OBJECT'); - } catch (e) { - return done(e); - } - return metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => { - if (err) { - return done(err); - } - try { - assert(md.checksum, 'destination should have a checksum'); - assert.strictEqual(md.checksum.checksumAlgorithm, algo); - assert.strictEqual(md.checksum.checksumValue, expectedDigest); - assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT'); - return done(); - } catch (e) { - return done(e); - } - }); - }; - } - const recomputeFixtures = [ { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32' }, { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C' }, @@ -1179,6 +1183,102 @@ describe('objectCopy checksum recompute on same external backend', () => { }); }); +describe('objectCopy checksum recompute on 0-byte source', () => { + beforeEach(done => { + cleanup(); + async.series([ + next => bucketPut(authInfo, putDestBucketRequest, log, next), + next => bucketPut(authInfo, putSourceBucketRequest, log, next), + next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest( + sourceBucketName, objectKey, objData[0]), undefined, log, next), + // Truncate the source to 0 bytes (no data location, content-length 0). + // Matches the AWS behavior we're exercising: empty source + recompute. + next => setSourceEmptyBody(next), + ], done); + }); + + afterEach(() => cleanup()); + + const recomputeFixtures = [ + { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32' }, + { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C' }, + { algo: 'crc64nvme', header: 'CRC64NVME', xmlTag: 'ChecksumCRC64NVME' }, + { algo: 'sha1', header: 'SHA1', xmlTag: 'ChecksumSHA1' }, + { algo: 'sha256', header: 'SHA256', xmlTag: 'ChecksumSHA256' }, + ]; + + recomputeFixtures.forEach(({ algo, header, xmlTag }) => { + it(`should compute empty-bytes ${algo} digest when source has no checksum`, done => { + setSourceChecksum(null, err => { + if (err) { + return done(err); + } + return Promise.resolve(algorithms[algo].digest(Buffer.alloc(0))).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': header, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + assertRecomputed(algo, xmlTag, expectedDigest, done)); + }, done); + }); + }); + }); + + it('should recompute empty-bytes digest on COMPOSITE 0-byte source (no algo header)', done => { + // COMPOSITE source forces recompute even with no algorithm header. + // Use sha256 placeholder; the dest digest will be the empty-bytes sha256. + setSourceChecksum({ + checksumAlgorithm: 'sha256', + checksumValue: '47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=', + checksumType: 'COMPOSITE', + }, err => { + assert.ifError(err); + Promise.resolve(algorithms.sha256.digest(Buffer.alloc(0))).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + assertRecomputed('sha256', 'ChecksumSHA256', expectedDigest, done)); + }, done); + }); + }); + + it('should propagate FULL_OBJECT checksum on 0-byte source (no algo header)', done => { + // The 0-byte recompute path must not override propagation set by _prepMetadata. + setSourceChecksum({ + checksumAlgorithm: 'crc32', + checksumValue: 'AAAAAA==', + checksumType: 'FULL_OBJECT', + }, err => { + assert.ifError(err); + const req = _createObjectCopyRequest(destBucketName); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => { + assert.ifError(err); + assert(xml.includes('AAAAAA==')); + assert(xml.includes('FULL_OBJECT')); + metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => { + assert.ifError(err); + assert.strictEqual(md.checksum.checksumAlgorithm, 'crc32'); + assert.strictEqual(md.checksum.checksumValue, 'AAAAAA=='); + assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT'); + done(); + }); + }); + }); + }); + + it('should compute empty-bytes CRC64NVME on 0-byte source with no source checksum and no algo header', done => { + setSourceChecksum(null, err => { + if (err) { + return done(err); + } + return Promise.resolve(algorithms.crc64nvme.digest(Buffer.alloc(0))).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + assertRecomputed('crc64nvme', 'ChecksumCRC64NVME', expectedDigest, done)); + }, done); + }); + }); +}); + describe('objectCopy data orphan cleanup on cross-backend copy-to-self', () => { const prevConfigBackendsData = data.config.backends.data; const prevConfigLocationConstraint1 = data.config.locationConstraints['us-east-1'].type; From 79584e005a0d0fe640ab99bbe020d2a86871e4e9 Mon Sep 17 00:00:00 2001 From: Leif Henriksen Date: Wed, 27 May 2026 21:31:53 +0200 Subject: [PATCH 5/8] CLDSRV-908: support Azure-source recompute on CopyObject --- lib/api/objectCopy.js | 21 +++++++++++++++--- tests/unit/api/objectCopy.js | 42 ++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/lib/api/objectCopy.js b/lib/api/objectCopy.js index 463d805487..e178f42ee5 100644 --- a/lib/api/objectCopy.js +++ b/lib/api/objectCopy.js @@ -79,11 +79,26 @@ function _orphanedDataLocations(dataToDelete, newDataGetInfo) { function _pipeSourcePartsThrough(dataLocator, log) { const passthrough = new PassThrough(); async.eachSeries(dataLocator, (part, cb) => { - data.get(part, null, log, (err, partStream) => { + const done = jsutil.once(cb); + if (part.dataStoreType === 'azure') { + // Azure's data.get writes part bytes into the provided writable + // instead of returning a Readable. Pipe a per-part PassThrough + // into the master passthrough and use its 'end' as the completion + // signal — same pattern arsenal's data.copyObject uses. + const perPart = new PassThrough(); + perPart.once('error', done); + perPart.once('end', () => done()); + perPart.pipe(passthrough, { end: false }); + return data.get(part, perPart, log, err => { + if (err) { + done(err); + } + }); + } + return data.get(part, null, log, (err, partStream) => { if (err) { - return cb(err); + return done(err); } - const done = jsutil.once(cb); partStream.once('error', done); partStream.once('end', () => done()); partStream.pipe(passthrough, { end: false }); diff --git a/tests/unit/api/objectCopy.js b/tests/unit/api/objectCopy.js index bdd1c73a4b..35d894758b 100644 --- a/tests/unit/api/objectCopy.js +++ b/tests/unit/api/objectCopy.js @@ -984,6 +984,48 @@ describe('objectCopy checksum recompute', () => { done(); }); }); + it('should recompute checksum on an Azure source via the per-part PassThrough path', done => { + // The mem backend's data.get returns a Readable; Azure backend's data.get + // instead writes part bytes into the response writable. Stub data.get for + // Azure-flagged parts so the test exercises the Azure branch end-to-end + // without a real Azure backend. + const originalGet = data.get; + const dataGetStub = sinon.stub(data, 'get').callsFake((info, response, l, cb) => { + if (info?.dataStoreType === 'azure') { + // Simulate Azure: write the (already-known) source bytes to the + // provided writable, end it, and signal completion. + setImmediate(() => { + response.write(objData[0]); + response.end(); + cb(null); + }); + return; + } + originalGet.call(data, info, response, l, cb); + }); + metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => { + assert.ifError(err); + // eslint-disable-next-line no-param-reassign + md.location = [{ ...md.location[0], dataStoreType: 'azure' }]; + metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, err => { + assert.ifError(err); + Promise.resolve(algorithms.sha256.digest(objData[0])).then(expectedDigest => { + const req = _createObjectCopyRequest(destBucketName, { + 'x-amz-checksum-algorithm': 'SHA256', + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, + (err, xml) => { + dataGetStub.restore(); + assertRecomputed('sha256', 'ChecksumSHA256', expectedDigest, done)(err, xml); + }); + }, err => { + dataGetStub.restore(); + done(err); + }); + }); + }); + }); + const copyToSelfFixtures = [ { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32' }, { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C' }, From 78c824694b5f57deef926f88eefffcdb44e88176 Mon Sep 17 00:00:00 2001 From: Leif Henriksen Date: Wed, 27 May 2026 21:48:30 +0200 Subject: [PATCH 6/8] CLDSRV-908: add functional tests for CopyObject checksum behavior --- .../aws-node-sdk/test/object/objectCopy.js | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/tests/functional/aws-node-sdk/test/object/objectCopy.js b/tests/functional/aws-node-sdk/test/object/objectCopy.js index c7b6ceedcc..871ea39015 100644 --- a/tests/functional/aws-node-sdk/test/object/objectCopy.js +++ b/tests/functional/aws-node-sdk/test/object/objectCopy.js @@ -18,6 +18,8 @@ const { taggingTests } = require('../../lib/utility/tagging'); const genMaxSizeMetaHeaders = require('../../lib/utility/genMaxSizeMetaHeaders'); const constants = require('../../../../../constants'); +const { algorithms } = + require('../../../../../lib/api/apiUtils/integrity/validateChecksums'); const sourceBucketName = 'supersourcebucket8102016'; const sourceObjName = 'supersourceobject'; @@ -1535,3 +1537,167 @@ describe('Object Copy with object lock enabled on both destination ' + }); }); + +describe('Object Copy checksum behavior', () => { + withV4(sigCfg => { + let bucketUtil; + let s3; + + before(async () => { + try { + bucketUtil = new BucketUtility('default', sigCfg); + s3 = bucketUtil.s3; + await bucketUtil.empty(sourceBucketName); + await bucketUtil.empty(destBucketName); + await bucketUtil.deleteMany([sourceBucketName, destBucketName]); + } catch (err) { + if (err.name !== 'NoSuchBucket') { + throw err; + } + } + await bucketUtil.createOne(sourceBucketName); + await bucketUtil.createOne(destBucketName); + }); + + afterEach(async () => { + await bucketUtil.empty(sourceBucketName, true); + await bucketUtil.empty(destBucketName, true); + }); + + after(async () => bucketUtil.deleteMany([sourceBucketName, destBucketName])); + + const checksumFixtures = [ + { algo: 'crc32', header: 'CRC32', key: 'ChecksumCRC32' }, + { algo: 'crc32c', header: 'CRC32C', key: 'ChecksumCRC32C' }, + { algo: 'crc64nvme', header: 'CRC64NVME', key: 'ChecksumCRC64NVME' }, + { algo: 'sha1', header: 'SHA1', key: 'ChecksumSHA1' }, + { algo: 'sha256', header: 'SHA256', key: 'ChecksumSHA256' }, + ]; + + const propagateBody = 'checksum-propagate-body'; + const recomputeBody = 'recompute-different-algo-body'; + + checksumFixtures.forEach(({ algo, header, key }) => { + it(`should propagate a FULL_OBJECT ${algo} checksum on CopyObject`, async () => { + const putRes = await s3.send(new PutObjectCommand({ + Bucket: sourceBucketName, + Key: sourceObjName, + Body: propagateBody, + ChecksumAlgorithm: header, + })); + const sourceValue = putRes[key]; + assert(sourceValue, `PutObject response should carry ${key}`); + + const copyRes = await s3.send(new CopyObjectCommand({ + Bucket: destBucketName, + Key: destObjName, + CopySource: `${sourceBucketName}/${sourceObjName}`, + })); + assert.strictEqual(copyRes.CopyObjectResult[key], sourceValue); + assert.strictEqual(copyRes.CopyObjectResult.ChecksumType, 'FULL_OBJECT'); + + const headRes = await s3.send(new HeadObjectCommand({ + Bucket: destBucketName, + Key: destObjName, + ChecksumMode: 'ENABLED', + })); + assert.strictEqual(headRes[key], sourceValue); + assert.strictEqual(headRes.ChecksumType, 'FULL_OBJECT'); + }); + }); + + // Recompute path: every (source algo, requested algo) pair where the + // two differ should produce the requested algo's digest of the body. + checksumFixtures.forEach(({ algo: sourceAlgo, header: sourceHeader }) => { + checksumFixtures.forEach(({ algo, header, key }) => { + if (sourceAlgo === algo) { + return; // same-algo is the propagation case, covered above + } + it(`should recompute ${algo} when source is ${sourceAlgo} and ${header} requested`, async () => { + await s3.send(new PutObjectCommand({ + Bucket: sourceBucketName, + Key: sourceObjName, + Body: recomputeBody, + ChecksumAlgorithm: sourceHeader, + })); + const expectedDigest = await Promise.resolve( + algorithms[algo].digest(Buffer.from(recomputeBody))); + + const copyRes = await s3.send(new CopyObjectCommand({ + Bucket: destBucketName, + Key: destObjName, + CopySource: `${sourceBucketName}/${sourceObjName}`, + ChecksumAlgorithm: header, + })); + assert.strictEqual(copyRes.CopyObjectResult[key], expectedDigest); + assert.strictEqual(copyRes.CopyObjectResult.ChecksumType, 'FULL_OBJECT'); + + const headRes = await s3.send(new HeadObjectCommand({ + Bucket: destBucketName, + Key: destObjName, + ChecksumMode: 'ENABLED', + })); + assert.strictEqual(headRes[key], expectedDigest); + assert.strictEqual(headRes.ChecksumType, 'FULL_OBJECT'); + }); + }); + }); + + // 0-byte source: each requested algo produces the empty-bytes digest. + // Source algo doesn't matter (empty body has the same content + // regardless), so we pivot on CRC32 ↔ SHA256 to force recompute. + checksumFixtures.forEach(({ algo, header, key }) => { + const sourceHeader = header === 'CRC32' ? 'SHA256' : 'CRC32'; + it(`should compute empty-bytes ${algo} digest on a 0-byte source recompute`, async () => { + await s3.send(new PutObjectCommand({ + Bucket: sourceBucketName, + Key: sourceObjName, + Body: '', + ChecksumAlgorithm: sourceHeader, + })); + const expectedDigest = await Promise.resolve( + algorithms[algo].digest(Buffer.alloc(0))); + + const copyRes = await s3.send(new CopyObjectCommand({ + Bucket: destBucketName, + Key: destObjName, + CopySource: `${sourceBucketName}/${sourceObjName}`, + ChecksumAlgorithm: header, + })); + assert.strictEqual(copyRes.CopyObjectResult[key], expectedDigest); + assert.strictEqual(copyRes.CopyObjectResult.ChecksumType, 'FULL_OBJECT'); + + const headRes = await s3.send(new HeadObjectCommand({ + Bucket: destBucketName, + Key: destObjName, + ChecksumMode: 'ENABLED', + })); + assert.strictEqual(headRes[key], expectedDigest); + assert.strictEqual(headRes.ChecksumType, 'FULL_OBJECT'); + }); + }); + + it('should reject an unknown ChecksumAlgorithm with InvalidRequest', async () => { + await s3.send(new PutObjectCommand({ + Bucket: sourceBucketName, + Key: sourceObjName, + Body: 'whatever', + })); + try { + await s3.send(new CopyObjectCommand({ + Bucket: destBucketName, + Key: destObjName, + CopySource: `${sourceBucketName}/${sourceObjName}`, + ChecksumAlgorithm: 'GARBAGE', + })); + throw new Error('Expected 400 InvalidRequest'); + } catch (err) { + checkError(err, 'InvalidRequest', 400); + const expected = 'Checksum algorithm provided is unsupported. ' + + 'Please try again with any of the valid types: ' + + '[CRC32, CRC32C, CRC64NVME, SHA1, SHA256]'; + assert.strictEqual(err.message, expected); + } + }); + }); +}); From f7d71070bd7c6359fed0a1fb7864d056a3bed0e4 Mon Sep 17 00:00:00 2001 From: Leif Henriksen Date: Tue, 2 Jun 2026 23:42:05 +0200 Subject: [PATCH 7/8] CLDSRV-908: improve Azure-source read error handling on CopyObject --- lib/api/objectCopy.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/api/objectCopy.js b/lib/api/objectCopy.js index e178f42ee5..ca15493556 100644 --- a/lib/api/objectCopy.js +++ b/lib/api/objectCopy.js @@ -86,12 +86,16 @@ function _pipeSourcePartsThrough(dataLocator, log) { // into the master passthrough and use its 'end' as the completion // signal — same pattern arsenal's data.copyObject uses. const perPart = new PassThrough(); - perPart.once('error', done); + const wrapErr = err => Object.assign(err, { + copyPart: { key: part.key, dataStoreName: part.dataStoreName, dataStoreType: part.dataStoreType }, + }); + perPart.once('error', err => done(wrapErr(err))); perPart.once('end', () => done()); perPart.pipe(passthrough, { end: false }); return data.get(part, perPart, log, err => { if (err) { - done(err); + perPart.destroy(err); + done(wrapErr(err)); } }); } From 22f8acf10ef6e9bb24b993850e5255c2d9fb5fff Mon Sep 17 00:00:00 2001 From: Leif Henriksen Date: Wed, 3 Jun 2026 21:45:54 +0200 Subject: [PATCH 8/8] CLDSRV-908: skip copy-to-self recompute GET on PFS and TLP Copy-to-self reuses the existing data location, so the bytes never move. On an external backend a checksum recompute would mean a GET purely to hash. Skip it for any external backend (constants.externalBackends, incl. pfs/tlp which have no native server-side copy); local backends still recompute in place. The byte-moving path keeps using externalBackendCopy. --- lib/api/objectCopy.js | 9 ++++++++- tests/unit/api/objectCopy.js | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/lib/api/objectCopy.js b/lib/api/objectCopy.js index ca15493556..532b2b4f4e 100644 --- a/lib/api/objectCopy.js +++ b/lib/api/objectCopy.js @@ -704,12 +704,19 @@ function objectCopy(authInfo, request, sourceBucket, destLocationConstraintName, sourceBucketMD, destBucketMD); const skipExternalBackendRecompute = shouldRecomputeChecksum && willUseNativeServerSideCopy; + // Copy-to-self reuses the existing data location — the bytes never + // move — so on any external backend a recompute would be a GET + // purely to hash. Skip it for all external backends (incl. pfs/tlp, + // which have no native copy); local backends recompute in place. + const destIsExternalBackend = config.backends.data === 'multiple' + && !!constants.externalBackends[destLocationConstraintType]; // skip if source and dest and location constraint the same and // versioning is not enabled, unless we need to recompute a // checksum (which requires streaming the bytes through us). if (sourceIsDestination && storeMetadataParams.locationMatch && !isVersionedObj && !needsEncryption - && (!shouldRecomputeChecksum || skipExternalBackendRecompute)) { + && (!shouldRecomputeChecksum || skipExternalBackendRecompute + || destIsExternalBackend)) { return next(null, storeMetadataParams, dataLocator, destObjMD, serverSideEncryption, destBucketMD); } diff --git a/tests/unit/api/objectCopy.js b/tests/unit/api/objectCopy.js index 35d894758b..99ce0954ad 100644 --- a/tests/unit/api/objectCopy.js +++ b/tests/unit/api/objectCopy.js @@ -1223,6 +1223,42 @@ describe('objectCopy checksum recompute on same external backend', () => { }); }); }); + + ['pfs', 'tlp'].forEach(backendType => { + it(`should not GET on copy-to-self recompute for a ${backendType} external backend`, done => { + // pfs/tlp are external but have no native server-side copy. On copy-to-self + // the location is reused (no byte movement), so recompute must not trigger + // a GET purely to hash. + const prevType = data.config.locationConstraints['us-east-1'].type; + data.config.locationConstraints['us-east-1'].type = backendType; + const dataGetSpy = sinon.spy(data, 'get'); + const copyObjectSpy = sinon.spy(data, 'copyObject'); + const dataPutSpy = sinon.spy(data, 'put'); + const req = new DummyRequest({ + bucketName: sourceBucketName, + namespace, + objectKey, + headers: { + 'x-amz-checksum-algorithm': 'SHA256', + 'x-amz-metadata-directive': 'REPLACE', + }, + url: `/${sourceBucketName}/${objectKey}`, + socket: {}, + }); + objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => { + data.config.locationConstraints['us-east-1'].type = prevType; + dataGetSpy.restore(); + copyObjectSpy.restore(); + dataPutSpy.restore(); + assert.ifError(err); + assert(!dataGetSpy.called, 'data.get should NOT be called (no GET to hash on copy-to-self)'); + assert(!copyObjectSpy.called, 'data.copyObject should NOT be called'); + assert(!dataPutSpy.called, 'data.put should NOT be called'); + assert(!xml.includes(''), 'response XML should not carry a checksum'); + done(); + }); + }); + }); }); describe('objectCopy checksum recompute on 0-byte source', () => {