diff --git a/lib/api/apiUtils/integrity/validateChecksums.js b/lib/api/apiUtils/integrity/validateChecksums.js
index f14f6bd4f9..ee9f13b9c0 100644
--- a/lib/api/apiUtils/integrity/validateChecksums.js
+++ b/lib/api/apiUtils/integrity/validateChecksums.js
@@ -39,6 +39,13 @@ const errMPUTypeWithoutAlgo = errorInstances.InvalidRequest.customizeDescription
'The x-amz-checksum-type header can only be used ' + 'with the x-amz-checksum-algorithm header.',
);
+// TODO(S3C-11278): Update with 'MD5', 'SHA512', 'XXHASH128', 'XXHASH3', 'XXHASH64' when they are introduced.
+// https://scality.atlassian.net/browse/S3C-11278
+const errCopyChecksumAlgoNotSupported = errorInstances.InvalidRequest.customizeDescription(
+ 'Checksum algorithm provided is unsupported. Please try again with any of the valid types: ' +
+ '[CRC32, CRC32C, CRC64NVME, SHA1, SHA256]',
+);
+
const checksumedMethods = Object.freeze({
completeMultipartUpload: true,
multiObjectDelete: true,
@@ -81,6 +88,7 @@ const ChecksumError = Object.freeze({
MPUTypeInvalid: 'MPUTypeInvalid',
MPUTypeWithoutAlgo: 'MPUTypeWithoutAlgo',
MPUInvalidCombination: 'MPUInvalidCombination',
+ CopyChecksumAlgoNotSupported: 'CopyChecksumAlgoNotSupported',
});
const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/;
@@ -401,6 +409,8 @@ function arsenalErrorFromChecksumError(err) {
`The ${err.details.type} checksum type cannot be used ` +
`with the ${err.details.algorithm.toUpperCase()} checksum algorithm.`,
);
+ case ChecksumError.CopyChecksumAlgoNotSupported:
+ return errCopyChecksumAlgoNotSupported;
default:
return ArsenalErrors.BadDigest;
}
@@ -585,6 +595,31 @@ function computeFullObjectMPUChecksum(algorithm, parts) {
return { checksum: combinePartCrcs(parts, params.polyReversed, params.dim), error: null };
}
+/**
+ * Read and validate the x-amz-checksum-algorithm header for CopyObject.
+ *
+ * @param {object} headers - request headers
+ * @returns {{ error: null, algorithm: string|null }
+ * | { error: { error: string, details: { algorithm: string } }, algorithm: null }}
+ */
+function getCopyObjectChecksumAlgorithm(headers) {
+ const requested = headers['x-amz-checksum-algorithm'];
+ if (requested === undefined) {
+ return { error: null, algorithm: null };
+ }
+ const algorithm = requested.toLowerCase();
+ if (!algorithms[algorithm]) {
+ return {
+ error: {
+ error: ChecksumError.CopyChecksumAlgoNotSupported,
+ details: { algorithm: requested },
+ },
+ algorithm: null,
+ };
+ }
+ return { error: null, algorithm };
+}
+
module.exports = {
ChecksumError,
defaultChecksumData,
@@ -597,4 +632,5 @@ module.exports = {
getChecksumDataFromMPUHeaders,
computeCompositeMPUChecksum,
computeFullObjectMPUChecksum,
+ getCopyObjectChecksumAlgorithm,
};
diff --git a/lib/api/objectCopy.js b/lib/api/objectCopy.js
index 02b0a31d5b..532b2b4f4e 100644
--- a/lib/api/objectCopy.js
+++ b/lib/api/objectCopy.js
@@ -1,6 +1,8 @@
const async = require('async');
+const { PassThrough } = require('stream');
-const { errors, errorInstances, versioning, s3middleware, s3routes } = require('arsenal');
+const { errors, errorInstances, jsutil, versioning, s3middleware, s3routes, storage } = require('arsenal');
+const { externalBackendCopy } = storage.data.external.backendUtils;
const { validateObjectKeyLength } = s3routes.routesUtils;
const getMetaHeaders = s3middleware.userMetadata.getMetaHeaders;
const validateHeaders = s3middleware.validateConditionalHeaders;
@@ -28,6 +30,10 @@ const { verifyColdObjectAvailable } = require('./apiUtils/object/coldStorage');
const { setSSEHeaders } = require('./apiUtils/object/sseHeaders');
const { updateEncryption } = require('./apiUtils/bucket/updateEncryption');
const { initializeInternalLogRequestQueue, queueInternalLogRequest } = require('../utilities/serverAccessLogger');
+const { algorithms, arsenalErrorFromChecksumError, getCopyObjectChecksumAlgorithm } =
+ require('./apiUtils/integrity/validateChecksums');
+const ChecksumTransform = require('../auth/streamingV4/ChecksumTransform');
+const kms = require('../kms/wrapper');
const versionIdUtils = versioning.VersionID;
const locationHeader = constants.objectLocationConstraintHeader;
@@ -35,6 +41,235 @@ const versioningNotImplBackends = constants.versioningNotImplBackends;
const externalVersioningErrorMessage = 'We do not currently support putting ' +
'a versioned object to a location-constraint of type AWS or Azure or GCP.';
+/**
+ * Compute the prior data locations that are orphaned.
+ *
+ * @param {Array} dataToDelete - prior data locations from versioningPreprocessing
+ * @param {Array} newDataGetInfo - new data locations stored in the new metadata
+ * @returns {Array|null} orphaned entries to batchDelete, or null if none
+ */
+function _orphanedDataLocations(dataToDelete, newDataGetInfo) {
+ // Identity is (dataStoreName, key): the same key under a different
+ // dataStoreName points to a different backend slot, so the old slot
+ // is an orphan even when the key string collides. dataStoreName is a
+ // location-constraint name (never contains '|'), so the delimiter is
+ // unambiguous.
+
+ // We need to normalize to support old legacy location names.
+ const normalize = loc => (typeof loc === 'string' ? { key: loc } : loc);
+ const locId = loc => {
+ const n = normalize(loc);
+ return `${n.dataStoreName}|${n.key}`;
+ };
+ const newIds = new Set((newDataGetInfo || []).map(locId));
+ const orphans = (dataToDelete || []).filter(loc => !newIds.has(locId(loc)));
+ return orphans.length > 0 ? orphans : null;
+}
+
+/**
+ * Concatenate the source object's parts into a single Readable stream by
+ * reading them sequentially through `data.get`. Returns the PassThrough
+ * immediately; consumers should pipe it to the next stage and observe
+ * `error` on this stream for any read failure along the way.
+ *
+ * @param {Array} dataLocator - ordered source parts
+ * @param {object} log - request logger
+ * @return {PassThrough}
+ */
+function _pipeSourcePartsThrough(dataLocator, log) {
+ const passthrough = new PassThrough();
+ async.eachSeries(dataLocator, (part, cb) => {
+ const done = jsutil.once(cb);
+ if (part.dataStoreType === 'azure') {
+ // Azure's data.get writes part bytes into the provided writable
+ // instead of returning a Readable. Pipe a per-part PassThrough
+ // into the master passthrough and use its 'end' as the completion
+ // signal — same pattern arsenal's data.copyObject uses.
+ const perPart = new PassThrough();
+ const wrapErr = err => Object.assign(err, {
+ copyPart: { key: part.key, dataStoreName: part.dataStoreName, dataStoreType: part.dataStoreType },
+ });
+ perPart.once('error', err => done(wrapErr(err)));
+ perPart.once('end', () => done());
+ perPart.pipe(passthrough, { end: false });
+ return data.get(part, perPart, log, err => {
+ if (err) {
+ perPart.destroy(err);
+ done(wrapErr(err));
+ }
+ });
+ }
+ return data.get(part, null, log, (err, partStream) => {
+ if (err) {
+ return done(err);
+ }
+ partStream.once('error', done);
+ partStream.once('end', () => done());
+ partStream.pipe(passthrough, { end: false });
+ return undefined;
+ });
+ }, err => {
+ if (err) {
+ passthrough.destroy(err);
+ } else {
+ passthrough.end();
+ }
+ });
+ return passthrough;
+}
+
+/**
+ * Decide whether the destination's checksum needs to be recomputed by
+ * streaming the source bytes through a ChecksumTransform.
+ *
+ * @param {object} headers - request headers
+ * @param {object} sourceObjMD - source object metadata
+ * @returns {boolean}
+ */
+function _shouldRecomputeChecksum(headers, sourceObjMD) {
+ const requestedAlgo = headers['x-amz-checksum-algorithm']?.toLowerCase();
+ if (sourceObjMD.checksum?.checksumType === 'FULL_OBJECT'
+ && (!requestedAlgo || requestedAlgo === sourceObjMD.checksum.checksumAlgorithm)) {
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Pick the checksum algorithm to write on CopyObject. Defaults to CRC64NVME when the source object has no checksum.
+ *
+ * @param {string|null} requestedAlgo - validated request algorithm
+ * @param {object} sourceObjMD - source object metadata
+ * @returns {string}
+ */
+function _getCopyObjectChecksumAlgorithm(requestedAlgo, sourceObjMD) {
+ return requestedAlgo || sourceObjMD.checksum?.checksumAlgorithm || 'crc64nvme';
+}
+
+/**
+ * Recompute the destination's checksum by streaming the source bytes
+ * through a ChecksumTransform. When the bytes don't have to move
+ * (copy-to-self, same location, no SSE, not versioned) the source is GET'ed
+ * solely to compute the new digest; the bytes are then discarded and the
+ * existing data location is reused — only cloudserver's metadata is updated.
+ *
+ * @param {object} log - request logger
+ * @param {object} request - HTTP request
+ * @param {string|null} requestedAlgo - client-requested algorithm, lowercase
+ * @param {object} sourceObjMD - source object metadata
+ * @param {Array} dataLocator - ordered source parts
+ * @param {boolean} sourceIsDestination - source key == destination key
+ * @param {boolean} isVersionedObj - destination bucket has versioning enabled
+ * @param {boolean} needsEncryption - destination requires SSE
+ * @param {object} storeMetadataParams - mutable carrier for destination metadata
+ * @param {object} destObjMD - existing destination object metadata
+ * @param {object} destBucketMD - destination bucket metadata
+ * @param {object} serverSideEncryption - SSE config for the destination
+ * @param {object} dataStoreContext - context passed to data.put
+ * @param {object} backendInfoDest - backend info for the destination
+ * @param {function} next - waterfall callback
+ * @return {undefined}
+ */
+function _recomputeChecksumAndStore(log, request, requestedAlgo, sourceObjMD, dataLocator,
+ sourceIsDestination, isVersionedObj, needsEncryption,
+ storeMetadataParams, destObjMD, destBucketMD, serverSideEncryption,
+ dataStoreContext, backendInfoDest, next) {
+ const algoName = _getCopyObjectChecksumAlgorithm(requestedAlgo, sourceObjMD);
+
+ // Copy-to-self where the bytes don't have to move (same location, no SSE
+ // change, not versioned): GET the source through a ChecksumTransform to
+ // compute the new digest, discard the bytes, and reuse the existing data
+ // locations. No PUT, no DELETE — only the metadata gets updated.
+ if (sourceIsDestination && storeMetadataParams.locationMatch
+ && !needsEncryption && !isVersionedObj) {
+ log.debug('computing checksum on copy-to-self without rewriting data',
+ { algorithm: algoName, size: storeMetadataParams.size });
+ const sourceStream = _pipeSourcePartsThrough(dataLocator, log);
+ const checksumStream = new ChecksumTransform(algoName, undefined, false, log);
+ const finish = jsutil.once(err => {
+ if (err) {
+ sourceStream.destroy(err);
+ checksumStream.destroy(err);
+ return next(err, destBucketMD);
+ }
+ // eslint-disable-next-line no-param-reassign
+ storeMetadataParams.checksum = {
+ algorithm: algoName,
+ value: checksumStream.digest,
+ type: 'FULL_OBJECT',
+ };
+ return next(null, storeMetadataParams, dataLocator, destObjMD,
+ serverSideEncryption, destBucketMD);
+ });
+ sourceStream.once('error', finish);
+ checksumStream.once('error', finish);
+ checksumStream.on('data', () => {});
+ checksumStream.once('end', () => finish());
+ sourceStream.pipe(checksumStream);
+ return undefined;
+ }
+
+ // Stream source bytes through a ChecksumTransform and write them out as a single put.
+ log.debug('recomputing checksum on CopyObject', { algorithm: algoName, size: storeMetadataParams.size });
+ const sourceStream = _pipeSourcePartsThrough(dataLocator, log);
+ const checksumStream = new ChecksumTransform(algoName, undefined, false, log);
+ const done = jsutil.once((err, results) => {
+ if (err) {
+ sourceStream.destroy(err);
+ checksumStream.destroy(err);
+ if (request.sourceServerAccessLog) {
+ // eslint-disable-next-line no-param-reassign
+ request.sourceServerAccessLog.error = err;
+ }
+ return next(err, destBucketMD);
+ }
+ return next(null, storeMetadataParams, results, destObjMD, serverSideEncryption, destBucketMD);
+ });
+ sourceStream.once('error', done);
+ checksumStream.once('error', done);
+ sourceStream.pipe(checksumStream);
+ const doPut = cipherBundle => data.put(
+ cipherBundle, checksumStream, storeMetadataParams.size,
+ dataStoreContext, backendInfoDest, log,
+ (err, dataRetrievalInfo) => {
+ if (err) {
+ return done(err);
+ }
+ // eslint-disable-next-line no-param-reassign
+ storeMetadataParams.checksum = {
+ algorithm: algoName,
+ value: checksumStream.digest,
+ type: 'FULL_OBJECT',
+ };
+ const putResult = {
+ key: dataRetrievalInfo.key,
+ size: storeMetadataParams.size,
+ start: 0,
+ dataStoreName: dataRetrievalInfo.dataStoreName,
+ dataStoreType: dataRetrievalInfo.dataStoreType,
+ dataStoreETag: dataRetrievalInfo.dataStoreETag,
+ dataStoreVersionId:
+ dataRetrievalInfo.dataStoreVersionId,
+ };
+ if (cipherBundle) {
+ putResult.cryptoScheme = cipherBundle.cryptoScheme;
+ putResult.cipheredDataKey =
+ cipherBundle.cipheredDataKey;
+ }
+ return done(null, [putResult]);
+ });
+ if (serverSideEncryption?.algorithm) {
+ return kms.createCipherBundle(serverSideEncryption, log,
+ (err, cipherBundle) => {
+ if (err) {
+ return done(err);
+ }
+ return doPut(cipherBundle);
+ });
+ }
+ return doPut(null);
+}
+
/**
* Preps metadata to be saved (based on copy or replace request header)
* @param {object} request - request
@@ -188,6 +423,14 @@ function _prepMetadata(request, sourceObjMD, headers, sourceIsDestination,
storeMetadataParams.defaultRetention = defaultRetentionConfig;
}
+ if (sourceObjMD.checksum && !_shouldRecomputeChecksum(headers, sourceObjMD)) {
+ storeMetadataParams.checksum = {
+ algorithm: sourceObjMD.checksum.checksumAlgorithm,
+ value: sourceObjMD.checksum.checksumValue,
+ type: sourceObjMD.checksum.checksumType,
+ };
+ }
+
// In case whichMetadata === 'REPLACE' but contentType is undefined in copy
// request headers, make sure to keep the original header instead
if (!storeMetadataParams.contentType) {
@@ -278,6 +521,13 @@ function objectCopy(authInfo, request, sourceBucket,
'PUT', destBucketName, err.code, 'copyObject');
return callback(err);
}
+ const { error: checksumAlgoErr, algorithm: requestedAlgo } = getCopyObjectChecksumAlgorithm(request.headers);
+ if (checksumAlgoErr) {
+ const err = arsenalErrorFromChecksumError(checksumAlgoErr);
+ log.debug('invalid x-amz-checksum-algorithm', { error: checksumAlgoErr });
+ monitoring.promMetrics('PUT', destBucketName, err.code, 'copyObject');
+ return callback(err);
+ }
const queryContainsVersionId = checkQueryVersionId(request.query);
if (queryContainsVersionId instanceof Error) {
return callback(queryContainsVersionId);
@@ -417,12 +667,12 @@ function objectCopy(authInfo, request, sourceBucket,
return next(null, storeMetadataParams, dataLocator,
sourceBucketMD, destBucketMD, destObjMD,
- sourceLocationConstraintName, backendInfoDest);
+ sourceLocationConstraintName, backendInfoDest, sourceObjMD);
});
},
function getSSEConfiguration(storeMetadataParams, dataLocator, sourceBucketMD,
destBucketMD, destObjMD, sourceLocationConstraintName,
- backendInfoDest, next) {
+ backendInfoDest, sourceObjMD, next) {
getObjectSSEConfiguration(
request.headers,
destBucketMD,
@@ -430,22 +680,43 @@ function objectCopy(authInfo, request, sourceBucket,
(err, sseConfig) =>
next(err, storeMetadataParams, dataLocator, sourceBucketMD,
destBucketMD, destObjMD, sourceLocationConstraintName,
- backendInfoDest, sseConfig));
+ backendInfoDest, sseConfig, sourceObjMD));
},
function goGetData(storeMetadataParams, dataLocator, sourceBucketMD,
destBucketMD, destObjMD, sourceLocationConstraintName,
- backendInfoDest, serverSideEncryption, next) {
+ backendInfoDest, serverSideEncryption, sourceObjMD, next) {
const vcfg = destBucketMD.getVersioningConfiguration();
const isVersionedObj = vcfg && vcfg.Status === 'Enabled';
const destLocationConstraintName =
storeMetadataParams.dataStoreName;
const needsEncryption = serverSideEncryption && !!serverSideEncryption.algo;
+ const shouldRecomputeChecksum = _shouldRecomputeChecksum(request.headers, sourceObjMD);
+ let destLocationConstraintType;
+ if (config.backends.data === 'multiple') {
+ destLocationConstraintType = config.getLocationConstraintType(destLocationConstraintName);
+ }
+ // Skip recompute whenever the copy is a native server-side copy. The
+ // bytes never transit CloudServer, so we keep the native copy and
+ // write no checksum rather than forcing a GET + PUT just to hash.
+ const willUseNativeServerSideCopy = config.backends.data === 'multiple'
+ && !serverSideEncryption
+ && externalBackendCopy(config, sourceLocationConstraintName,
+ destLocationConstraintName, sourceBucketMD, destBucketMD);
+ const skipExternalBackendRecompute = shouldRecomputeChecksum
+ && willUseNativeServerSideCopy;
+ // Copy-to-self reuses the existing data location — the bytes never
+ // move — so on any external backend a recompute would be a GET
+ // purely to hash. Skip it for all external backends (incl. pfs/tlp,
+ // which have no native copy); local backends recompute in place.
+ const destIsExternalBackend = config.backends.data === 'multiple'
+ && !!constants.externalBackends[destLocationConstraintType];
// skip if source and dest and location constraint the same and
- // versioning is not enabled
- // still send along serverSideEncryption info so algo
- // and masterKeyId stored properly in metadata
+ // versioning is not enabled, unless we need to recompute a
+ // checksum (which requires streaming the bytes through us).
if (sourceIsDestination && storeMetadataParams.locationMatch
- && !isVersionedObj && !needsEncryption) {
+ && !isVersionedObj && !needsEncryption
+ && (!shouldRecomputeChecksum || skipExternalBackendRecompute
+ || destIsExternalBackend)) {
return next(null, storeMetadataParams, dataLocator, destObjMD,
serverSideEncryption, destBucketMD);
}
@@ -453,11 +724,6 @@ function objectCopy(authInfo, request, sourceBucket,
// also skip if 0 byte object, unless location constraint is an
// external backend and differs from source, in which case put
// metadata to backend
- let destLocationConstraintType;
- if (config.backends.data === 'multiple') {
- destLocationConstraintType =
- config.getLocationConstraintType(destLocationConstraintName);
- }
if (destLocationConstraintType &&
versioningNotImplBackends[destLocationConstraintType]
&& isVersionedObj) {
@@ -468,30 +734,57 @@ function objectCopy(authInfo, request, sourceBucket,
externalVersioningErrorMessage), destBucketMD);
}
if (dataLocator.length === 0) {
- if (!storeMetadataParams.locationMatch &&
- destLocationConstraintType &&
- constants.externalBackends[destLocationConstraintType]) {
- return data.put(null, null, storeMetadataParams.size,
- dataStoreContext, backendInfoDest,
- log, (error, objectRetrievalInfo) => {
- if (error) {
- return next(error, destBucketMD);
- }
- const putResult = {
- key: objectRetrievalInfo.key,
- dataStoreName: objectRetrievalInfo.
- dataStoreName,
- dataStoreType: objectRetrievalInfo.
- dataStoreType,
- size: storeMetadataParams.size,
+ const finishZeroByte = () => {
+ if (!storeMetadataParams.locationMatch && destLocationConstraintType
+ && constants.externalBackends[destLocationConstraintType]) {
+ return data.put(null, null, storeMetadataParams.size,
+ dataStoreContext, backendInfoDest,
+ log, (error, objectRetrievalInfo) => {
+ if (error) {
+ return next(error, destBucketMD);
+ }
+ const putResult = {
+ key: objectRetrievalInfo.key,
+ dataStoreName: objectRetrievalInfo.dataStoreName,
+ dataStoreType: objectRetrievalInfo.dataStoreType,
+ size: storeMetadataParams.size,
+ };
+ return next(null, storeMetadataParams, [putResult],
+ destObjMD, serverSideEncryption, destBucketMD);
+ });
+ }
+ return next(null, storeMetadataParams, dataLocator, destObjMD,
+ serverSideEncryption, destBucketMD);
+ };
+ if (shouldRecomputeChecksum) {
+ // No bytes to stream, but AWS still writes the empty-bytes digest of the chosen algorithm.
+ const algoName = _getCopyObjectChecksumAlgorithm(requestedAlgo, sourceObjMD);
+ return Promise.resolve(algorithms[algoName].digest(Buffer.alloc(0)))
+ .then(digest => {
+ // eslint-disable-next-line no-param-reassign
+ storeMetadataParams.checksum = {
+ algorithm: algoName,
+ value: digest,
+ type: 'FULL_OBJECT',
};
- const putResultArr = [putResult];
- return next(null, storeMetadataParams, putResultArr,
- destObjMD, serverSideEncryption, destBucketMD);
+ return finishZeroByte();
+ }, err => {
+ log.error('failed to compute empty checksum digest',
+ { algorithm: algoName, error: err });
+ return next(errors.InternalError, destBucketMD);
});
}
- return next(null, storeMetadataParams, dataLocator, destObjMD,
- serverSideEncryption, destBucketMD);
+ return finishZeroByte();
+ }
+ if (shouldRecomputeChecksum && skipExternalBackendRecompute) {
+ log.debug('skipping checksum recompute on same external backend',
+ { source: sourceLocationConstraintName, dest: destLocationConstraintName });
+ }
+ if (shouldRecomputeChecksum && !skipExternalBackendRecompute) {
+ return _recomputeChecksumAndStore(log, request, requestedAlgo, sourceObjMD, dataLocator,
+ sourceIsDestination, isVersionedObj, needsEncryption,
+ storeMetadataParams, destObjMD, destBucketMD, serverSideEncryption,
+ dataStoreContext, backendInfoDest, next);
}
const originalIdentityImpDenies = request.actionImplicitDenies;
// eslint-disable-next-line no-param-reassign
@@ -547,7 +840,8 @@ function objectCopy(authInfo, request, sourceBucket,
if (options.extraMD) {
Object.assign(storeMetadataParams, options.extraMD);
}
- const dataToDelete = options.dataToDelete;
+ const dataToDelete = _orphanedDataLocations(
+ options.dataToDelete, destDataGetInfoArr);
return next(null, storeMetadataParams, destDataGetInfoArr,
destObjMD, serverSideEncryption, destBucketMD,
dataToDelete);
@@ -589,11 +883,7 @@ function objectCopy(authInfo, request, sourceBucket,
function deleteExistingData(dataToDelete, storingNewMdResult,
destBucketMD, storeMetadataParams, serverSideEncryption,
sourceObjSize, destObjPrevSize, next) {
- // Clean up any potential orphans in data if object
- // put is an overwrite of already existing
- // object with same name, so long as the source is not
- // the same as the destination
- if (!sourceIsDestination && dataToDelete) {
+ if (dataToDelete) {
const newDataStoreName = storeMetadataParams.dataStoreName;
return data.batchDelete(dataToDelete, request.method,
newDataStoreName, log, err => {
@@ -639,12 +929,23 @@ function objectCopy(authInfo, request, sourceBucket,
'PUT', destBucketName, err.code, 'copyObject');
return callback(err, null, corsHeaders);
}
+ let checksumXml = '';
+ const checksum = storeMetadataParams?.checksum;
+ const checksumAlgo = checksum && algorithms[checksum.algorithm];
+ if (checksum && checksumAlgo) {
+ checksumXml =
+ `<${checksumAlgo.xmlTag}>${checksum.value}${checksumAlgo.xmlTag}>` +
+ `${checksum.type}`;
+ } else if (checksum) {
+ log.error('unknown checksum algorithm in source object metadata', { algorithm: checksum.algorithm });
+ }
const xml = [
'',
'',
'', new Date(storeMetadataParams.lastModifiedDate)
.toISOString(), '',
'"', storeMetadataParams.contentMD5, '"',
+ checksumXml,
'',
].join('');
const additionalHeaders = corsHeaders || {};
@@ -688,3 +989,5 @@ function objectCopy(authInfo, request, sourceBucket,
}
module.exports = objectCopy;
+// Exposed for unit testing only.
+module.exports._orphanedDataLocations = _orphanedDataLocations;
diff --git a/tests/functional/aws-node-sdk/test/object/objectCopy.js b/tests/functional/aws-node-sdk/test/object/objectCopy.js
index c7b6ceedcc..871ea39015 100644
--- a/tests/functional/aws-node-sdk/test/object/objectCopy.js
+++ b/tests/functional/aws-node-sdk/test/object/objectCopy.js
@@ -18,6 +18,8 @@ const { taggingTests } = require('../../lib/utility/tagging');
const genMaxSizeMetaHeaders
= require('../../lib/utility/genMaxSizeMetaHeaders');
const constants = require('../../../../../constants');
+const { algorithms } =
+ require('../../../../../lib/api/apiUtils/integrity/validateChecksums');
const sourceBucketName = 'supersourcebucket8102016';
const sourceObjName = 'supersourceobject';
@@ -1535,3 +1537,167 @@ describe('Object Copy with object lock enabled on both destination ' +
});
});
+
+describe('Object Copy checksum behavior', () => {
+ withV4(sigCfg => {
+ let bucketUtil;
+ let s3;
+
+ before(async () => {
+ try {
+ bucketUtil = new BucketUtility('default', sigCfg);
+ s3 = bucketUtil.s3;
+ await bucketUtil.empty(sourceBucketName);
+ await bucketUtil.empty(destBucketName);
+ await bucketUtil.deleteMany([sourceBucketName, destBucketName]);
+ } catch (err) {
+ if (err.name !== 'NoSuchBucket') {
+ throw err;
+ }
+ }
+ await bucketUtil.createOne(sourceBucketName);
+ await bucketUtil.createOne(destBucketName);
+ });
+
+ afterEach(async () => {
+ await bucketUtil.empty(sourceBucketName, true);
+ await bucketUtil.empty(destBucketName, true);
+ });
+
+ after(async () => bucketUtil.deleteMany([sourceBucketName, destBucketName]));
+
+ const checksumFixtures = [
+ { algo: 'crc32', header: 'CRC32', key: 'ChecksumCRC32' },
+ { algo: 'crc32c', header: 'CRC32C', key: 'ChecksumCRC32C' },
+ { algo: 'crc64nvme', header: 'CRC64NVME', key: 'ChecksumCRC64NVME' },
+ { algo: 'sha1', header: 'SHA1', key: 'ChecksumSHA1' },
+ { algo: 'sha256', header: 'SHA256', key: 'ChecksumSHA256' },
+ ];
+
+ const propagateBody = 'checksum-propagate-body';
+ const recomputeBody = 'recompute-different-algo-body';
+
+ checksumFixtures.forEach(({ algo, header, key }) => {
+ it(`should propagate a FULL_OBJECT ${algo} checksum on CopyObject`, async () => {
+ const putRes = await s3.send(new PutObjectCommand({
+ Bucket: sourceBucketName,
+ Key: sourceObjName,
+ Body: propagateBody,
+ ChecksumAlgorithm: header,
+ }));
+ const sourceValue = putRes[key];
+ assert(sourceValue, `PutObject response should carry ${key}`);
+
+ const copyRes = await s3.send(new CopyObjectCommand({
+ Bucket: destBucketName,
+ Key: destObjName,
+ CopySource: `${sourceBucketName}/${sourceObjName}`,
+ }));
+ assert.strictEqual(copyRes.CopyObjectResult[key], sourceValue);
+ assert.strictEqual(copyRes.CopyObjectResult.ChecksumType, 'FULL_OBJECT');
+
+ const headRes = await s3.send(new HeadObjectCommand({
+ Bucket: destBucketName,
+ Key: destObjName,
+ ChecksumMode: 'ENABLED',
+ }));
+ assert.strictEqual(headRes[key], sourceValue);
+ assert.strictEqual(headRes.ChecksumType, 'FULL_OBJECT');
+ });
+ });
+
+ // Recompute path: every (source algo, requested algo) pair where the
+ // two differ should produce the requested algo's digest of the body.
+ checksumFixtures.forEach(({ algo: sourceAlgo, header: sourceHeader }) => {
+ checksumFixtures.forEach(({ algo, header, key }) => {
+ if (sourceAlgo === algo) {
+ return; // same-algo is the propagation case, covered above
+ }
+ it(`should recompute ${algo} when source is ${sourceAlgo} and ${header} requested`, async () => {
+ await s3.send(new PutObjectCommand({
+ Bucket: sourceBucketName,
+ Key: sourceObjName,
+ Body: recomputeBody,
+ ChecksumAlgorithm: sourceHeader,
+ }));
+ const expectedDigest = await Promise.resolve(
+ algorithms[algo].digest(Buffer.from(recomputeBody)));
+
+ const copyRes = await s3.send(new CopyObjectCommand({
+ Bucket: destBucketName,
+ Key: destObjName,
+ CopySource: `${sourceBucketName}/${sourceObjName}`,
+ ChecksumAlgorithm: header,
+ }));
+ assert.strictEqual(copyRes.CopyObjectResult[key], expectedDigest);
+ assert.strictEqual(copyRes.CopyObjectResult.ChecksumType, 'FULL_OBJECT');
+
+ const headRes = await s3.send(new HeadObjectCommand({
+ Bucket: destBucketName,
+ Key: destObjName,
+ ChecksumMode: 'ENABLED',
+ }));
+ assert.strictEqual(headRes[key], expectedDigest);
+ assert.strictEqual(headRes.ChecksumType, 'FULL_OBJECT');
+ });
+ });
+ });
+
+ // 0-byte source: each requested algo produces the empty-bytes digest.
+ // Source algo doesn't matter (empty body has the same content
+ // regardless), so we pivot on CRC32 ↔ SHA256 to force recompute.
+ checksumFixtures.forEach(({ algo, header, key }) => {
+ const sourceHeader = header === 'CRC32' ? 'SHA256' : 'CRC32';
+ it(`should compute empty-bytes ${algo} digest on a 0-byte source recompute`, async () => {
+ await s3.send(new PutObjectCommand({
+ Bucket: sourceBucketName,
+ Key: sourceObjName,
+ Body: '',
+ ChecksumAlgorithm: sourceHeader,
+ }));
+ const expectedDigest = await Promise.resolve(
+ algorithms[algo].digest(Buffer.alloc(0)));
+
+ const copyRes = await s3.send(new CopyObjectCommand({
+ Bucket: destBucketName,
+ Key: destObjName,
+ CopySource: `${sourceBucketName}/${sourceObjName}`,
+ ChecksumAlgorithm: header,
+ }));
+ assert.strictEqual(copyRes.CopyObjectResult[key], expectedDigest);
+ assert.strictEqual(copyRes.CopyObjectResult.ChecksumType, 'FULL_OBJECT');
+
+ const headRes = await s3.send(new HeadObjectCommand({
+ Bucket: destBucketName,
+ Key: destObjName,
+ ChecksumMode: 'ENABLED',
+ }));
+ assert.strictEqual(headRes[key], expectedDigest);
+ assert.strictEqual(headRes.ChecksumType, 'FULL_OBJECT');
+ });
+ });
+
+ it('should reject an unknown ChecksumAlgorithm with InvalidRequest', async () => {
+ await s3.send(new PutObjectCommand({
+ Bucket: sourceBucketName,
+ Key: sourceObjName,
+ Body: 'whatever',
+ }));
+ try {
+ await s3.send(new CopyObjectCommand({
+ Bucket: destBucketName,
+ Key: destObjName,
+ CopySource: `${sourceBucketName}/${sourceObjName}`,
+ ChecksumAlgorithm: 'GARBAGE',
+ }));
+ throw new Error('Expected 400 InvalidRequest');
+ } catch (err) {
+ checkError(err, 'InvalidRequest', 400);
+ const expected = 'Checksum algorithm provided is unsupported. '
+ + 'Please try again with any of the valid types: '
+ + '[CRC32, CRC32C, CRC64NVME, SHA1, SHA256]';
+ assert.strictEqual(err.message, expected);
+ }
+ });
+ });
+});
diff --git a/tests/unit/api/apiUtils/integrity/validateChecksums.js b/tests/unit/api/apiUtils/integrity/validateChecksums.js
index 01bebac4a5..ce773ffcce 100644
--- a/tests/unit/api/apiUtils/integrity/validateChecksums.js
+++ b/tests/unit/api/apiUtils/integrity/validateChecksums.js
@@ -10,6 +10,7 @@ const {
getChecksumDataFromHeaders,
arsenalErrorFromChecksumError,
getChecksumDataFromMPUHeaders,
+ getCopyObjectChecksumAlgorithm,
} = require('../../../../../lib/api/apiUtils/integrity/validateChecksums');
const { errors: ArsenalErrors } = require('arsenal');
const { config } = require('../../../../../lib/Config');
@@ -930,3 +931,89 @@ describe('getChecksumDataFromMPUHeaders', () => {
}
});
});
+
+describe('getCopyObjectChecksumAlgorithm', () => {
+ it('should return algorithm null and no error when the header is absent', () => {
+ const result = getCopyObjectChecksumAlgorithm({});
+ assert.strictEqual(result.error, null);
+ assert.strictEqual(result.algorithm, null);
+ });
+
+ it('should ignore unrelated headers', () => {
+ const result = getCopyObjectChecksumAlgorithm({
+ 'content-type': 'application/octet-stream',
+ host: 'example.com',
+ });
+ assert.strictEqual(result.error, null);
+ assert.strictEqual(result.algorithm, null);
+ });
+
+ const validAlgorithms = [
+ ['CRC32', 'crc32'],
+ ['CRC32C', 'crc32c'],
+ ['CRC64NVME', 'crc64nvme'],
+ ['SHA1', 'sha1'],
+ ['SHA256', 'sha256'],
+ ];
+
+ for (const [header, normalized] of validAlgorithms) {
+ it(`should return algorithm '${normalized}' for header '${header}'`, () => {
+ const result = getCopyObjectChecksumAlgorithm({
+ 'x-amz-checksum-algorithm': header,
+ });
+ assert.strictEqual(result.error, null);
+ assert.strictEqual(result.algorithm, normalized);
+ });
+ }
+
+ it('should lowercase mixed-case input', () => {
+ const result = getCopyObjectChecksumAlgorithm({
+ 'x-amz-checksum-algorithm': 'Sha256',
+ });
+ assert.strictEqual(result.error, null);
+ assert.strictEqual(result.algorithm, 'sha256');
+ });
+
+ it('should accept algorithms already in lowercase', () => {
+ const result = getCopyObjectChecksumAlgorithm({
+ 'x-amz-checksum-algorithm': 'crc32',
+ });
+ assert.strictEqual(result.error, null);
+ assert.strictEqual(result.algorithm, 'crc32');
+ });
+
+ it('should return CopyChecksumAlgoNotSupported for an unknown algorithm', () => {
+ const result = getCopyObjectChecksumAlgorithm({
+ 'x-amz-checksum-algorithm': 'GARBAGE',
+ });
+ assert.strictEqual(result.algorithm, null);
+ assert(result.error);
+ assert.strictEqual(result.error.error,
+ ChecksumError.CopyChecksumAlgoNotSupported);
+ assert.strictEqual(result.error.details.algorithm, 'GARBAGE');
+ });
+
+ it('should reject algorithms AWS may know about but cloudserver does not support', () => {
+ // AWS error message lists MD5/SHA512/XXHASH* as known names; we only
+ // accept the five FULL_OBJECT-capable algorithms.
+ for (const algo of ['MD5', 'SHA512', 'XXHASH128', 'XXHASH3', 'XXHASH64']) {
+ const result = getCopyObjectChecksumAlgorithm({
+ 'x-amz-checksum-algorithm': algo,
+ });
+ assert.strictEqual(result.algorithm, null,
+ `expected null algorithm for ${algo}`);
+ assert.strictEqual(result.error.error,
+ ChecksumError.CopyChecksumAlgoNotSupported);
+ }
+ });
+
+ it('should produce an arsenal InvalidRequest error via the standard mapping', () => {
+ const result = getCopyObjectChecksumAlgorithm({
+ 'x-amz-checksum-algorithm': 'GARBAGE',
+ });
+ const err = arsenalErrorFromChecksumError(result.error);
+ assert.strictEqual(err.is.InvalidRequest, true);
+ assert(err.description.includes('Checksum algorithm provided is unsupported'),
+ 'expected AWS-shaped error description');
+ });
+});
diff --git a/tests/unit/api/objectCopy.js b/tests/unit/api/objectCopy.js
index 2448f3276f..99ce0954ad 100644
--- a/tests/unit/api/objectCopy.js
+++ b/tests/unit/api/objectCopy.js
@@ -1,5 +1,6 @@
const assert = require('assert');
const async = require('async');
+const { Readable } = require('stream');
const { storage, versioning } = require('arsenal');
const sinon = require('sinon');
@@ -14,7 +15,10 @@ const { cleanup, DummyRequestLogger, makeAuthInfo, versioningTestUtils }
const mpuUtils = require('../utils/mpuUtils');
const metadata = require('../metadataswitch');
const { data } = require('../../../lib/data/wrapper');
+const kms = require('../../../lib/kms/wrapper');
const { objectLocationConstraintHeader } = require('../../../constants');
+const { algorithms } =
+ require('../../../lib/api/apiUtils/integrity/validateChecksums');
const { fakeMetadataArchive } = require('../../functional/aws-node-sdk/test/utils/init');
const { config } = require('../../../lib/Config');
@@ -640,3 +644,929 @@ describe('objectCopy with objectKeyByteLimit', () => {
});
});
});
+
+// Set or clear the source object's stored checksum directly in metadata.
+function setSourceChecksum(checksum, cb) {
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => {
+ if (err) {
+ return cb(err);
+ }
+ if (checksum) {
+ // eslint-disable-next-line no-param-reassign
+ md.checksum = checksum;
+ } else {
+ // eslint-disable-next-line no-param-reassign
+ delete md.checksum;
+ }
+ return metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, cb);
+ });
+}
+
+// Truncate the source object to 0 bytes by clearing its data location and
+// content-length. Lets a single beforeEach setup serve both non-empty and
+// empty-source tests without rebuilding the bucket each time.
+function setSourceEmptyBody(cb) {
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => {
+ if (err) {
+ return cb(err);
+ }
+ // eslint-disable-next-line no-param-reassign
+ md.location = null;
+ // eslint-disable-next-line no-param-reassign
+ md['content-length'] = 0;
+ return metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, cb);
+ });
+}
+
+// Builds the objectCopy callback that asserts a successful FULL_OBJECT recompute:
+// response XML and destination metadata both carry the expected algo and digest.
+function assertRecomputed(algo, xmlTag, expectedDigest, done) {
+ return (err, xml) => {
+ assert.ifError(err);
+ assert(xml.includes(`<${xmlTag}>${expectedDigest}${xmlTag}>`),
+ `XML should contain ${xmlTag}=${expectedDigest}`);
+ assert(xml.includes('FULL_OBJECT'),
+ 'XML should contain ChecksumType FULL_OBJECT');
+ metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => {
+ assert.ifError(err);
+ assert(md.checksum, 'destination should have a checksum');
+ assert.strictEqual(md.checksum.checksumAlgorithm, algo);
+ assert.strictEqual(md.checksum.checksumValue, expectedDigest);
+ assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT');
+ done();
+ });
+ };
+}
+
+describe('objectCopy checksum propagation', () => {
+ beforeEach(done => {
+ cleanup();
+ async.series([
+ next => bucketPut(authInfo, putDestBucketRequest, log, next),
+ next => bucketPut(authInfo, putSourceBucketRequest, log, next),
+ next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest(
+ sourceBucketName, objectKey, objData[0]), undefined, log, next),
+ ], done);
+ });
+
+ afterEach(() => cleanup());
+
+ const algorithmFixtures = [
+ { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32', value: 'AAAAAA==' },
+ { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C', value: 'AAAAAA==' },
+ { algo: 'crc64nvme', header: 'CRC64NVME', xmlTag: 'ChecksumCRC64NVME', value: 'AAAAAAAAAAA=' },
+ { algo: 'sha1', header: 'SHA1', xmlTag: 'ChecksumSHA1', value: 'AAAAAAAAAAAAAAAAAAAAAAAAAAA=' },
+ {
+ algo: 'sha256', header: 'SHA256', xmlTag: 'ChecksumSHA256',
+ value: 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='
+ },
+ ];
+
+ algorithmFixtures.forEach(({ algo, header, xmlTag, value }) => {
+ const sourceChecksum = {
+ checksumAlgorithm: algo,
+ checksumValue: value,
+ checksumType: 'FULL_OBJECT',
+ };
+
+ function assertPropagated(xml, cb) {
+ assert(xml.includes(`<${xmlTag}>${value}${xmlTag}>`), `XML should contain ${xmlTag}`);
+ assert(xml.includes('FULL_OBJECT'), 'XML should contain ChecksumType');
+ metadata.getObjectMD(destBucketName, objectKey, {}, log,
+ (err, md) => {
+ assert.ifError(err);
+ assert(md.checksum, 'destination should have a checksum');
+ assert.strictEqual(md.checksum.checksumAlgorithm, algo);
+ assert.strictEqual(md.checksum.checksumValue, value);
+ assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT');
+ cb();
+ });
+ }
+
+ it(`should propagate a FULL_OBJECT ${algo} checksum from source to destination`,
+ done => {
+ setSourceChecksum(sourceChecksum, err => {
+ assert.ifError(err);
+ const req = _createObjectCopyRequest(destBucketName);
+ objectCopy(authInfo, req, sourceBucketName, objectKey,
+ undefined, log, (err, xml) => {
+ assert.ifError(err);
+ assertPropagated(xml, done);
+ });
+ });
+ });
+
+ it(`should propagate when x-amz-checksum-algorithm matches source ${algo} algorithm`,
+ done => {
+ setSourceChecksum(sourceChecksum, err => {
+ assert.ifError(err);
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': header,
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey,
+ undefined, log, (err, xml) => {
+ assert.ifError(err);
+ assertPropagated(xml, done);
+ });
+ });
+ });
+ });
+
+ it('should default to CRC64NVME when source has no checksum and no algorithm is requested', done => {
+ setSourceChecksum(null, err => {
+ if (err) {
+ return done(err);
+ }
+ return Promise.resolve(algorithms.crc64nvme.digest(objData[0])).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName);
+ objectCopy(authInfo, req, sourceBucketName, objectKey,
+ undefined, log,
+ assertRecomputed('crc64nvme', 'ChecksumCRC64NVME', expectedDigest, done));
+ }, done);
+ });
+ });
+});
+
+describe('objectCopy checksum recompute', () => {
+ beforeEach(done => {
+ cleanup();
+ async.series([
+ next => bucketPut(authInfo, putDestBucketRequest, log, next),
+ next => bucketPut(authInfo, putSourceBucketRequest, log, next),
+ next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest(
+ sourceBucketName, objectKey, objData[0]), undefined, log, next),
+ ], done);
+ });
+
+ afterEach(() => cleanup());
+
+ const recomputeFixtures = [
+ { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32' },
+ { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C' },
+ { algo: 'crc64nvme', header: 'CRC64NVME', xmlTag: 'ChecksumCRC64NVME' },
+ { algo: 'sha1', header: 'SHA1', xmlTag: 'ChecksumSHA1' },
+ { algo: 'sha256', header: 'SHA256', xmlTag: 'ChecksumSHA256' },
+ ];
+
+ recomputeFixtures.forEach(({ algo, header, xmlTag }) => {
+ it(`should recompute ${algo} when x-amz-checksum-algorithm differs from source`, done => {
+ // Seed source with a different algorithm so the request forces a recompute.
+ // crc32 ↔ sha256 swap covers both pivots.
+ const sourceAlgo = algo === 'crc32' ? 'sha256' : 'crc32';
+ const sourceValue = sourceAlgo === 'crc32'
+ ? 'AAAAAA=='
+ : '47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=';
+ setSourceChecksum({
+ checksumAlgorithm: sourceAlgo,
+ checksumValue: sourceValue,
+ checksumType: 'FULL_OBJECT',
+ }, err => {
+ if (err) {
+ return done(err);
+ }
+ return Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': header,
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ assertRecomputed(algo, xmlTag, expectedDigest, done));
+ }, done);
+ });
+ });
+
+ // CRC64NVME cannot be used as COMPOSITE (AWS rejects it at MPU init); only test
+ // the COMPOSITE-source path for the four other algorithms.
+ if (algo !== 'crc64nvme') {
+ it(`should recompute ${algo} when source is COMPOSITE and no algorithm requested`, done => {
+ setSourceChecksum({
+ checksumAlgorithm: algo,
+ // valid-shape placeholder; the test does not depend on the source value
+ // matching the body — only on the destination digest being recomputed.
+ checksumValue: 'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBA=',
+ checksumType: 'COMPOSITE',
+ }, err => {
+ if (err) {
+ return done(err);
+ }
+ return Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName);
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ assertRecomputed(algo, xmlTag, expectedDigest, done));
+ }, done);
+ });
+ });
+
+ it(`should recompute ${algo} when source is COMPOSITE and different algorithm requested`, done => {
+ // Force a recompute by both source-type (COMPOSITE) and algo mismatch.
+ // Seed source with sha256 COMPOSITE so the requested algo always differs.
+ const sourceAlgo = algo === 'sha256' ? 'sha1' : 'sha256';
+ const sourceValue = sourceAlgo === 'sha1'
+ ? 'AAAAAAAAAAAAAAAAAAAAAAAAAAA='
+ : '47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=';
+ setSourceChecksum({
+ checksumAlgorithm: sourceAlgo,
+ checksumValue: sourceValue,
+ checksumType: 'COMPOSITE',
+ }, err => {
+ if (err) {
+ return done(err);
+ }
+ return Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': header,
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ assertRecomputed(algo, xmlTag, expectedDigest, done));
+ }, done);
+ });
+ });
+ }
+
+ it(`should compute ${algo} when source has no checksum and algorithm is requested`, done => {
+ setSourceChecksum(null, err => {
+ if (err) {
+ return done(err);
+ }
+ return Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': header,
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ assertRecomputed(algo, xmlTag, expectedDigest, done));
+ }, done);
+ });
+ });
+ });
+
+ it('should recompute the checksum across all parts of a multi-part source', done => {
+ const partA = Buffer.from('part-a-bytes', 'utf8');
+ const partB = Buffer.from('part-b-bytes-longer', 'utf8');
+ const fullBody = Buffer.concat([partA, partB]);
+ const parts = { 'mpart-a': partA, 'mpart-b': partB };
+ const dataGetStub = sinon.stub(data, 'get').callsFake((info, _response, _l, cb) => {
+ const buf = parts[info.key];
+ return cb(null, Readable.from(buf));
+ });
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => {
+ assert.ifError(err);
+ // eslint-disable-next-line no-param-reassign
+ md.location = [
+ { key: 'mpart-a', dataStoreName: 'us-east-1', size: partA.length, start: 0 },
+ { key: 'mpart-b', dataStoreName: 'us-east-1', size: partB.length, start: partA.length },
+ ];
+ // eslint-disable-next-line no-param-reassign
+ md['content-length'] = fullBody.length;
+ metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, err => {
+ assert.ifError(err);
+ Promise.resolve(algorithms.sha256.digest(fullBody)).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': 'SHA256',
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ (err, xml) => {
+ dataGetStub.restore();
+ assertRecomputed('sha256', 'ChecksumSHA256', expectedDigest, done)(err, xml);
+ });
+ }, err => {
+ dataGetStub.restore();
+ done(err);
+ });
+ });
+ });
+ });
+
+ it('should recompute the checksum and encrypt the destination when SSE is requested', done => {
+ // Recompute path must create a cipher bundle via kms when SSE is configured.
+ // Confirm the cipher bundle is forwarded to data.put and the checksum is
+ // still over the cleartext bytes.
+ const cipherBundleSpy = sinon.spy(kms, 'createCipherBundle');
+ const dataPutSpy = sinon.spy(data, 'put');
+ Promise.resolve(algorithms.sha256.digest(objData[0])).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': 'SHA256',
+ 'x-amz-server-side-encryption': 'AES256',
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ assertRecomputed('sha256', 'ChecksumSHA256', expectedDigest, err => {
+ cipherBundleSpy.restore();
+ dataPutSpy.restore();
+ if (err) {
+ return done(err);
+ }
+ try {
+ assert(cipherBundleSpy.calledOnce,
+ 'kms.createCipherBundle should be called once');
+ const sseConfig = cipherBundleSpy.firstCall.args[0];
+ assert.strictEqual(sseConfig.algorithm, 'AES256');
+ // The data.put call that consumed the checksum stream
+ // (size > 0) should have received a non-null cipherBundle.
+ const recomputePut = dataPutSpy.getCalls()
+ .find(call => call.args[1] !== null);
+ assert(recomputePut,
+ 'expected at least one data.put with a real stream');
+ assert(recomputePut.args[0],
+ 'cipherBundle should be non-null for SSE recompute');
+ return done();
+ } catch (e) {
+ return done(e);
+ }
+ }));
+ }, done);
+ });
+
+ it('should reject an unknown x-amz-checksum-algorithm value with InvalidRequest', done => {
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': 'GARBAGE',
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => {
+ assert(err);
+ assert.strictEqual(err.is.InvalidRequest, true);
+ done();
+ });
+ });
+ it('should recompute checksum on an Azure source via the per-part PassThrough path', done => {
+ // The mem backend's data.get returns a Readable; Azure backend's data.get
+ // instead writes part bytes into the response writable. Stub data.get for
+ // Azure-flagged parts so the test exercises the Azure branch end-to-end
+ // without a real Azure backend.
+ const originalGet = data.get;
+ const dataGetStub = sinon.stub(data, 'get').callsFake((info, response, l, cb) => {
+ if (info?.dataStoreType === 'azure') {
+ // Simulate Azure: write the (already-known) source bytes to the
+ // provided writable, end it, and signal completion.
+ setImmediate(() => {
+ response.write(objData[0]);
+ response.end();
+ cb(null);
+ });
+ return;
+ }
+ originalGet.call(data, info, response, l, cb);
+ });
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => {
+ assert.ifError(err);
+ // eslint-disable-next-line no-param-reassign
+ md.location = [{ ...md.location[0], dataStoreType: 'azure' }];
+ metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, err => {
+ assert.ifError(err);
+ Promise.resolve(algorithms.sha256.digest(objData[0])).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': 'SHA256',
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ (err, xml) => {
+ dataGetStub.restore();
+ assertRecomputed('sha256', 'ChecksumSHA256', expectedDigest, done)(err, xml);
+ });
+ }, err => {
+ dataGetStub.restore();
+ done(err);
+ });
+ });
+ });
+ });
+
+ const copyToSelfFixtures = [
+ { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32' },
+ { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C' },
+ { algo: 'crc64nvme', header: 'CRC64NVME', xmlTag: 'ChecksumCRC64NVME' },
+ { algo: 'sha1', header: 'SHA1', xmlTag: 'ChecksumSHA1' },
+ { algo: 'sha256', header: 'SHA256', xmlTag: 'ChecksumSHA256' },
+ ];
+
+ copyToSelfFixtures.forEach(({ algo, header, xmlTag }) => {
+ it(`should compute the ${algo} checksum on copy-to-self without rewriting data`, done => {
+ const dataPutSpy = sinon.spy(data, 'put');
+ const batchDeleteSpy = sinon.spy(data, 'batchDelete');
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, srcMd) => {
+ assert.ifError(err);
+ const sourceLocation = srcMd.location;
+ Promise.resolve(algorithms[algo].digest(objData[0])).then(expectedDigest => {
+ const req = new DummyRequest({
+ bucketName: sourceBucketName,
+ namespace,
+ objectKey,
+ headers: {
+ 'x-amz-checksum-algorithm': header,
+ 'x-amz-metadata-directive': 'REPLACE',
+ },
+ url: `/${sourceBucketName}/${objectKey}`,
+ socket: {},
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => {
+ dataPutSpy.restore();
+ batchDeleteSpy.restore();
+ assert.ifError(err);
+ assert(!dataPutSpy.called, 'data.put should NOT be called');
+ assert(!batchDeleteSpy.called, 'data.batchDelete should NOT be called');
+ assert(xml.includes(`<${xmlTag}>${expectedDigest}${xmlTag}>`),
+ 'response XML should carry the new checksum');
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => {
+ assert.ifError(err);
+ assert.deepStrictEqual(
+ md.location.map(l => l.key),
+ sourceLocation.map(l => l.key),
+ 'data location keys should be reused');
+ assert.strictEqual(md.checksum.checksumAlgorithm, algo);
+ assert.strictEqual(md.checksum.checksumValue, expectedDigest);
+ assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT');
+ done();
+ });
+ });
+ }, done);
+ });
+ });
+ });
+
+ it('should still PUT on copy-to-self when versioning is enabled (no metadata-only shortcut)', done => {
+ // Versioned copies produce a new version-id; the metadata-only path
+ // is skipped so the new version gets its own data write.
+ const enableVersioning = versioningTestUtils
+ .createBucketPutVersioningReq(sourceBucketName, 'Enabled');
+ bucketPutVersioning(authInfo, enableVersioning, log, err => {
+ assert.ifError(err);
+ const dataPutSpy = sinon.spy(data, 'put');
+ const req = new DummyRequest({
+ bucketName: sourceBucketName,
+ namespace,
+ objectKey,
+ headers: {
+ 'x-amz-checksum-algorithm': 'SHA256',
+ 'x-amz-metadata-directive': 'REPLACE',
+ },
+ url: `/${sourceBucketName}/${objectKey}`,
+ socket: {},
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => {
+ dataPutSpy.restore();
+ assert.ifError(err);
+ assert(dataPutSpy.called,
+ 'data.put should be called because versioning forces a new version write');
+ done();
+ });
+ });
+ });
+});
+describe('objectCopy checksum recompute on same external backend', () => {
+ const prevConfigBackendsData = data.config.backends.data;
+ const prevConfigLocationConstraint = data.config.locationConstraints['us-east-1'].type;
+ const prevConfigLocationConstraint2 = data.config.locationConstraints['us-east-2'].type;
+
+ before(() => {
+ data.config.backends.data = 'multiple';
+ data.config.locationConstraints['us-east-1'].type = 'aws_s3';
+ data.config.locationConstraints['us-east-2'].type = 'aws_s3';
+ });
+
+ after(() => {
+ data.config.backends.data = prevConfigBackendsData;
+ data.config.locationConstraints['us-east-1'].type = prevConfigLocationConstraint;
+ data.config.locationConstraints['us-east-2'].type = prevConfigLocationConstraint2;
+ });
+
+ beforeEach(done => {
+ cleanup();
+ async.series([
+ next => bucketPut(authInfo, putDestBucketRequest, log, next),
+ next => bucketPut(authInfo, putSourceBucketRequest, log, next),
+ next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest(
+ sourceBucketName, objectKey, objData[0]), undefined, log, next),
+ ], done);
+ });
+
+ afterEach(() => cleanup());
+
+ it('should skip recompute on cross-key copy on external backend and not store a checksum', done => {
+ // Cross-key copy + recompute on same external backend should take the
+ // native server-side copy path with no checksum stored on the destination.
+ const copyObjectStub = sinon.stub(data, 'copyObject').callsFake(
+ (req, srcLoc, sMP, dataLocator, ctx, backendInfo, srcBM, dstBM, sse, l, cb) =>
+ cb(null, dataLocator));
+ const dataPutSpy = sinon.spy(data, 'put');
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': 'SHA256',
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => {
+ copyObjectStub.restore();
+ dataPutSpy.restore();
+ assert.ifError(err);
+ assert(copyObjectStub.calledOnce, 'data.copyObject should be called (native path)');
+ assert(!dataPutSpy.called, 'data.put should NOT be called (no stream-through)');
+ assert(!xml.includes(''),
+ 'response XML should not carry a checksum');
+ metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => {
+ assert.ifError(err);
+ assert(!md.checksum, 'destination metadata should not carry a checksum');
+ done();
+ });
+ });
+ });
+
+ it('should skip recompute on a cross-location copy within the same external backend type', done => {
+ // us-east-1 -> us-east-2, both aws_s3: even though locations differ, a
+ // native server-side copy is available (externalBackendCopy), so we keep
+ // the native path — no GET + PUT through CloudServer — and store no checksum.
+ const copyObjectStub = sinon.stub(data, 'copyObject').callsFake(
+ (req, srcLoc, sMP, dataLocator, ctx, backendInfo, srcBM, dstBM, sse, l, cb) =>
+ cb(null, [{ key: 'new-key', dataStoreName: 'us-east-2', size: objData[0].length, start: 0 }]));
+ const dataPutSpy = sinon.spy(data, 'put');
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': 'SHA256',
+ 'x-amz-metadata-directive': 'REPLACE',
+ 'x-amz-meta-scal-location-constraint': 'us-east-2',
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => {
+ copyObjectStub.restore();
+ dataPutSpy.restore();
+ assert.ifError(err);
+ assert(copyObjectStub.calledOnce,
+ 'data.copyObject should be called (native cross-location path)');
+ assert(!dataPutSpy.called,
+ 'data.put should NOT be called (no stream-through GET + PUT)');
+ assert(!xml.includes(''),
+ 'response XML should not carry a checksum');
+ metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => {
+ assert.ifError(err);
+ assert(!md.checksum, 'destination metadata should not carry a checksum');
+ done();
+ });
+ });
+ });
+
+ it('should early-return on copy-to-self and not call data.copyObject or data.put', done => {
+ // Copy-to-self + recompute on same external backend should be a pure metadata-only operation.
+ const copyObjectSpy = sinon.spy(data, 'copyObject');
+ const dataPutSpy = sinon.spy(data, 'put');
+ const req = new DummyRequest({
+ bucketName: sourceBucketName,
+ namespace,
+ objectKey,
+ headers: {
+ 'x-amz-checksum-algorithm': 'SHA256',
+ 'x-amz-metadata-directive': 'REPLACE',
+ },
+ url: `/${sourceBucketName}/${objectKey}`,
+ socket: {},
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => {
+ copyObjectSpy.restore();
+ dataPutSpy.restore();
+ assert.ifError(err);
+ assert(!copyObjectSpy.called, 'data.copyObject should NOT be called');
+ assert(!dataPutSpy.called, 'data.put should NOT be called');
+ assert(!xml.includes(''),
+ 'response XML should not carry a checksum');
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => {
+ assert.ifError(err);
+ assert(!md.checksum, 'metadata should not carry a checksum');
+ done();
+ });
+ });
+ });
+
+ ['pfs', 'tlp'].forEach(backendType => {
+ it(`should not GET on copy-to-self recompute for a ${backendType} external backend`, done => {
+ // pfs/tlp are external but have no native server-side copy. On copy-to-self
+ // the location is reused (no byte movement), so recompute must not trigger
+ // a GET purely to hash.
+ const prevType = data.config.locationConstraints['us-east-1'].type;
+ data.config.locationConstraints['us-east-1'].type = backendType;
+ const dataGetSpy = sinon.spy(data, 'get');
+ const copyObjectSpy = sinon.spy(data, 'copyObject');
+ const dataPutSpy = sinon.spy(data, 'put');
+ const req = new DummyRequest({
+ bucketName: sourceBucketName,
+ namespace,
+ objectKey,
+ headers: {
+ 'x-amz-checksum-algorithm': 'SHA256',
+ 'x-amz-metadata-directive': 'REPLACE',
+ },
+ url: `/${sourceBucketName}/${objectKey}`,
+ socket: {},
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => {
+ data.config.locationConstraints['us-east-1'].type = prevType;
+ dataGetSpy.restore();
+ copyObjectSpy.restore();
+ dataPutSpy.restore();
+ assert.ifError(err);
+ assert(!dataGetSpy.called, 'data.get should NOT be called (no GET to hash on copy-to-self)');
+ assert(!copyObjectSpy.called, 'data.copyObject should NOT be called');
+ assert(!dataPutSpy.called, 'data.put should NOT be called');
+ assert(!xml.includes(''), 'response XML should not carry a checksum');
+ done();
+ });
+ });
+ });
+});
+
+describe('objectCopy checksum recompute on 0-byte source', () => {
+ beforeEach(done => {
+ cleanup();
+ async.series([
+ next => bucketPut(authInfo, putDestBucketRequest, log, next),
+ next => bucketPut(authInfo, putSourceBucketRequest, log, next),
+ next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest(
+ sourceBucketName, objectKey, objData[0]), undefined, log, next),
+ // Truncate the source to 0 bytes (no data location, content-length 0).
+ // Matches the AWS behavior we're exercising: empty source + recompute.
+ next => setSourceEmptyBody(next),
+ ], done);
+ });
+
+ afterEach(() => cleanup());
+
+ const recomputeFixtures = [
+ { algo: 'crc32', header: 'CRC32', xmlTag: 'ChecksumCRC32' },
+ { algo: 'crc32c', header: 'CRC32C', xmlTag: 'ChecksumCRC32C' },
+ { algo: 'crc64nvme', header: 'CRC64NVME', xmlTag: 'ChecksumCRC64NVME' },
+ { algo: 'sha1', header: 'SHA1', xmlTag: 'ChecksumSHA1' },
+ { algo: 'sha256', header: 'SHA256', xmlTag: 'ChecksumSHA256' },
+ ];
+
+ recomputeFixtures.forEach(({ algo, header, xmlTag }) => {
+ it(`should compute empty-bytes ${algo} digest when source has no checksum`, done => {
+ setSourceChecksum(null, err => {
+ if (err) {
+ return done(err);
+ }
+ return Promise.resolve(algorithms[algo].digest(Buffer.alloc(0))).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName, {
+ 'x-amz-checksum-algorithm': header,
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ assertRecomputed(algo, xmlTag, expectedDigest, done));
+ }, done);
+ });
+ });
+ });
+
+ it('should recompute empty-bytes digest on COMPOSITE 0-byte source (no algo header)', done => {
+ // COMPOSITE source forces recompute even with no algorithm header.
+ // Use sha256 placeholder; the dest digest will be the empty-bytes sha256.
+ setSourceChecksum({
+ checksumAlgorithm: 'sha256',
+ checksumValue: '47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=',
+ checksumType: 'COMPOSITE',
+ }, err => {
+ assert.ifError(err);
+ Promise.resolve(algorithms.sha256.digest(Buffer.alloc(0))).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName);
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ assertRecomputed('sha256', 'ChecksumSHA256', expectedDigest, done));
+ }, done);
+ });
+ });
+
+ it('should propagate FULL_OBJECT checksum on 0-byte source (no algo header)', done => {
+ // The 0-byte recompute path must not override propagation set by _prepMetadata.
+ setSourceChecksum({
+ checksumAlgorithm: 'crc32',
+ checksumValue: 'AAAAAA==',
+ checksumType: 'FULL_OBJECT',
+ }, err => {
+ assert.ifError(err);
+ const req = _createObjectCopyRequest(destBucketName);
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, (err, xml) => {
+ assert.ifError(err);
+ assert(xml.includes('AAAAAA=='));
+ assert(xml.includes('FULL_OBJECT'));
+ metadata.getObjectMD(destBucketName, objectKey, {}, log, (err, md) => {
+ assert.ifError(err);
+ assert.strictEqual(md.checksum.checksumAlgorithm, 'crc32');
+ assert.strictEqual(md.checksum.checksumValue, 'AAAAAA==');
+ assert.strictEqual(md.checksum.checksumType, 'FULL_OBJECT');
+ done();
+ });
+ });
+ });
+ });
+
+ it('should compute empty-bytes CRC64NVME on 0-byte source with no source checksum and no algo header', done => {
+ setSourceChecksum(null, err => {
+ if (err) {
+ return done(err);
+ }
+ return Promise.resolve(algorithms.crc64nvme.digest(Buffer.alloc(0))).then(expectedDigest => {
+ const req = _createObjectCopyRequest(destBucketName);
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log,
+ assertRecomputed('crc64nvme', 'ChecksumCRC64NVME', expectedDigest, done));
+ }, done);
+ });
+ });
+});
+
+describe('objectCopy data orphan cleanup on cross-backend copy-to-self', () => {
+ const prevConfigBackendsData = data.config.backends.data;
+ const prevConfigLocationConstraint1 = data.config.locationConstraints['us-east-1'].type;
+ const prevConfigLocationConstraint2 = data.config.locationConstraints['us-east-2'].type;
+
+ before(() => {
+ data.config.backends.data = 'multiple';
+ data.config.locationConstraints['us-east-1'].type = 'aws_s3';
+ data.config.locationConstraints['us-east-2'].type = 'aws_s3';
+ });
+
+ after(() => {
+ data.config.backends.data = prevConfigBackendsData;
+ data.config.locationConstraints['us-east-1'].type = prevConfigLocationConstraint1;
+ data.config.locationConstraints['us-east-2'].type = prevConfigLocationConstraint2;
+ });
+
+ beforeEach(done => {
+ cleanup();
+ async.series([
+ next => bucketPut(authInfo, putSourceBucketRequest, log, next),
+ next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest(
+ sourceBucketName, objectKey, objData[0]), undefined, log, next),
+ ], done);
+ });
+
+ afterEach(() => cleanup());
+
+ it('should reclaim the old location on copy-to-self that lands at a different backend key', done => {
+ // Copy-to-self where the new metadata points to a different data key
+ // than the source did (cross-backend rewrite via x-amz-meta-scal-
+ // location-constraint). The old key is no longer referenced and must
+ // be batchDeleted — guards against a pre-existing orphan bug.
+ const batchDeleteSpy = sinon.spy(data, 'batchDelete');
+ const copyObjectStub = sinon.stub(data, 'copyObject').callsFake(
+ (req, srcLoc, sMP, dataLocator, ctx, backendInfo, srcBM, dstBM, sse, l, cb) =>
+ cb(null, [{ key: 'new-backend-key', dataStoreName: 'us-east-2', size: objData[0].length, start: 0 }]));
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, srcMd) => {
+ assert.ifError(err);
+ const oldKeys = srcMd.location.map(l => l.key);
+ const req = new DummyRequest({
+ bucketName: sourceBucketName,
+ namespace,
+ objectKey,
+ headers: {
+ 'x-amz-metadata-directive': 'REPLACE',
+ 'x-amz-meta-scal-location-constraint': 'us-east-2',
+ },
+ url: `/${sourceBucketName}/${objectKey}`,
+ socket: {},
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => {
+ batchDeleteSpy.restore();
+ copyObjectStub.restore();
+ assert.ifError(err);
+ assert(batchDeleteSpy.calledOnce,
+ 'data.batchDelete should reclaim the old data location');
+ const reclaimed = batchDeleteSpy.firstCall.args[0];
+ const reclaimedKeys = reclaimed.map(l => l.key);
+ assert.deepStrictEqual(reclaimedKeys, oldKeys,
+ 'batchDelete should target the old source key, not the new backend key');
+ done();
+ });
+ });
+ });
+
+ it('should reclaim the old location when the new backend key collides with the old one', done => {
+ // bucketMatch-style external backends derive the backend key from the
+ // S3 object key, so a copy-to-self that changes dataStoreName lands at
+ // the same `key` in a different backend. Identity must include
+ // dataStoreName or the old slot is silently leaked.
+ const batchDeleteSpy = sinon.spy(data, 'batchDelete');
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, srcMd) => {
+ assert.ifError(err);
+ const oldLoc = srcMd.location[0];
+ const copyObjectStub = sinon.stub(data, 'copyObject').callsFake(
+ (req, srcLoc, sMP, dataLocator, ctx, backendInfo, srcBM, dstBM, sse, l, cb) =>
+ cb(null, [{ key: oldLoc.key, dataStoreName: 'us-east-2',
+ size: objData[0].length, start: 0 }]));
+ const req = new DummyRequest({
+ bucketName: sourceBucketName,
+ namespace,
+ objectKey,
+ headers: {
+ 'x-amz-metadata-directive': 'REPLACE',
+ 'x-amz-meta-scal-location-constraint': 'us-east-2',
+ },
+ url: `/${sourceBucketName}/${objectKey}`,
+ socket: {},
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => {
+ batchDeleteSpy.restore();
+ copyObjectStub.restore();
+ assert.ifError(err);
+ assert(batchDeleteSpy.calledOnce,
+ 'data.batchDelete should reclaim the old slot even when the backend key matches');
+ const reclaimed = batchDeleteSpy.firstCall.args[0];
+ assert.strictEqual(reclaimed.length, 1);
+ assert.strictEqual(reclaimed[0].dataStoreName, oldLoc.dataStoreName,
+ 'batchDelete must target the old dataStoreName, not the new one');
+ assert.strictEqual(reclaimed[0].key, oldLoc.key);
+ done();
+ });
+ });
+ });
+});
+
+describe('objectCopy legacy string-location copy-to-self', () => {
+ beforeEach(done => {
+ cleanup();
+ async.series([
+ next => bucketPut(authInfo, putSourceBucketRequest, log, next),
+ next => objectPut(authInfo, versioningTestUtils.createPutObjectRequest(
+ sourceBucketName, objectKey, objData[0]), undefined, log, next),
+ ], done);
+ });
+
+ afterEach(() => cleanup());
+
+ it('should not delete the reused location on copy-to-self with a legacy string location', done => {
+ // Pre-md-model-version-2 objects store `location` as a bare string;
+ // versioningPreprocessing then surfaces dataToDelete as a string array
+ // while the reused new locator is { key }. The orphan filter must
+ // normalize both or it deletes the live data on copy-to-self.
+ const batchDeleteSpy = sinon.spy(data, 'batchDelete');
+ metadata.getObjectMD(sourceBucketName, objectKey, {}, log, (err, md) => {
+ assert.ifError(err);
+ const legacyKey = String(md.location[0].key);
+ // eslint-disable-next-line no-param-reassign
+ md.location = legacyKey;
+ // FULL_OBJECT checksum + no algo header keeps us on the propagate
+ // (metadata-only reuse) path, so no data.get on the string key.
+ // eslint-disable-next-line no-param-reassign
+ md.checksum = { checksumAlgorithm: 'crc32', checksumValue: 'AAAAAA==', checksumType: 'FULL_OBJECT' };
+ metadata.putObjectMD(sourceBucketName, objectKey, md, {}, log, err => {
+ assert.ifError(err);
+ const req = new DummyRequest({
+ bucketName: sourceBucketName,
+ namespace,
+ objectKey,
+ headers: { 'x-amz-metadata-directive': 'REPLACE' },
+ url: `/${sourceBucketName}/${objectKey}`,
+ socket: {},
+ });
+ objectCopy(authInfo, req, sourceBucketName, objectKey, undefined, log, err => {
+ batchDeleteSpy.restore();
+ assert.ifError(err);
+ assert(batchDeleteSpy.notCalled,
+ 'the reused legacy string location must not be treated as an orphan');
+ done();
+ });
+ });
+ });
+ });
+});
+
+describe('_orphanedDataLocations', () => {
+ const orphanedDataLocations = objectCopy._orphanedDataLocations;
+
+ it('should return null when there is nothing to delete', () => {
+ const newLocs = [{ dataStoreName: 'l1', key: 'a' }];
+ assert.strictEqual(orphanedDataLocations(undefined, newLocs), null);
+ assert.strictEqual(orphanedDataLocations(null, newLocs), null);
+ assert.strictEqual(orphanedDataLocations([], newLocs), null);
+ });
+
+ it('should return null when every prior location is still referenced', () => {
+ const locs = [
+ { dataStoreName: 'l1', key: 'a' },
+ { dataStoreName: 'l1', key: 'b' },
+ ];
+ assert.strictEqual(orphanedDataLocations(locs, locs), null);
+ });
+
+ it('should flag a prior location whose key is no longer referenced', () => {
+ const oldLocs = [{ dataStoreName: 'l1', key: 'a' }];
+ const newLocs = [{ dataStoreName: 'l1', key: 'b' }];
+ assert.deepStrictEqual(orphanedDataLocations(oldLocs, newLocs), oldLocs);
+ });
+
+ it('should treat the same key under a different dataStoreName as an orphan', () => {
+ const oldLocs = [{ dataStoreName: 'us-east-1', key: 'a' }];
+ const newLocs = [{ dataStoreName: 'us-east-2', key: 'a' }];
+ assert.deepStrictEqual(orphanedDataLocations(oldLocs, newLocs), oldLocs);
+ });
+
+ it('should return only the subset of prior locations that are orphaned', () => {
+ const reused = { dataStoreName: 'l1', key: 'keep' };
+ const orphan = { dataStoreName: 'l1', key: 'gone' };
+ assert.deepStrictEqual(
+ orphanedDataLocations([reused, orphan], [reused]), [orphan]);
+ });
+
+ it('should not flag a key referenced by any of several new locations', () => {
+ const oldLocs = [{ dataStoreName: 'l1', key: 'a' }];
+ const newLocs = [
+ { dataStoreName: 'l1', key: 'x' },
+ { dataStoreName: 'l1', key: 'a' },
+ ];
+ assert.strictEqual(orphanedDataLocations(oldLocs, newLocs), null);
+ });
+
+ it('should normalize a reused legacy string location (not an orphan)', () => {
+ // pre-md-model-version-2 string location reused as { key } by goGetData
+ assert.strictEqual(
+ orphanedDataLocations(['legacyKey'], [{ key: 'legacyKey' }]), null);
+ });
+
+ it('should flag a legacy string location that is no longer referenced', () => {
+ assert.deepStrictEqual(
+ orphanedDataLocations(['oldKey'], [{ key: 'newKey' }]), ['oldKey']);
+ });
+});