s3api: add compat test sending too much body with checksum

Adds a test that verifies extra body content beyond the content-length
is ignored provided that the checksum value matches that of the
content-length bytes.

Add comment to explain why this is the case.

Drive-by: add clarifying comment to unit test.

Change-Id: I8f198298a817be47223e2f45fbc48a6f393b3bef
Signed-off-by: Alistair Coles <alistairncoles@gmail.com>
This commit is contained in:
Alistair Coles
2025-07-10 14:51:34 +01:00
parent be56c1e258
commit 351ee72790
3 changed files with 71 additions and 35 deletions

View File

@@ -239,6 +239,8 @@ class ChecksummingInput(InputProxy):
# says nothing about whether the underlying stream uses aws-chunked
self._checksum_hasher.update(chunk)
if self.bytes_received < self._expected_length:
# wrapped input is likely to have timed out before this clause is
# reached with eof==True, but just in case...
error = eof
elif self.bytes_received == self._expected_length:
# Lazy fetch checksum value because it may have come in trailers
@@ -256,6 +258,8 @@ class ChecksummingInput(InputProxy):
raise S3InputChecksumTrailerInvalid(self._checksum_key)
error = self._checksum_hasher.digest() != expected_raw_checksum
else:
# the underlying wsgi.Input stops reading at content-length so we
# don't expect to reach this clause, but just in case...
error = True
if error:

View File

@@ -58,6 +58,24 @@ def _crc32(payload=b''):
).decode('ascii')
def get_raw_conn(request):
# requests is going to try *real hard* to either send a "Content-Length" or
# "Transfer-encoding: chunked" header so dip down to our bufferedhttp to do
# the sending/parsing when we want to override those headers
host, port = parse_socket_string(request['host'], None)
if port:
port = int(port)
return bufferedhttp.http_connect_raw(
host,
port,
request['method'],
request['path'],
request['headers'],
'&'.join('%s=%s' % (k, v) for k, v in request['query'].items()),
request['https']
)
EMPTY_SHA256 = _sha256()
EPOCH = datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
@@ -771,23 +789,9 @@ class InputErrorsMixin(object):
method='PUT',
)
self.conn.sign_request(request)
# requests is not our friend here; it's going to try *real hard* to
# either send a "Content-Length" or "Transfer-encoding: chunked" header
# so dip down to our bufferedhttp to do the sending/parsing
host, port = parse_socket_string(request['host'], None)
if port:
port = int(port)
conn = bufferedhttp.http_connect_raw(
host,
port,
request['method'],
request['path'],
request['headers'],
'&'.join('%s=%s' % (k, v) for k, v in request['query'].items()),
request['https']
)
conn.send(TEST_BODY)
return conn.getresponse()
raw_conn = get_raw_conn(request)
raw_conn.send(TEST_BODY)
return raw_conn.getresponse()
def test_no_md5_no_sha_no_content_length(self):
resp = self.get_response_put_object_no_md5_no_sha_no_content_length()
@@ -1180,23 +1184,9 @@ class InputErrorsMixin(object):
headers={'x-amz-content-sha256': _sha256(TEST_BODY)},
)
self.conn.sign_request(request)
# requests is not our friend here; it's going to try *real hard* to
# either send a "Content-Length" or "Transfer-encoding: chunked" header
# so dip down to our bufferedhttp to do the sending/parsing
host, port = parse_socket_string(request['host'], None)
if port:
port = int(port)
conn = bufferedhttp.http_connect_raw(
host,
port,
request['method'],
request['path'],
request['headers'],
'&'.join('%s=%s' % (k, v) for k, v in request['query'].items()),
request['https']
)
conn.send(TEST_BODY)
resp = conn.getresponse()
raw_conn = get_raw_conn(request)
raw_conn.send(TEST_BODY)
resp = raw_conn.getresponse()
body = resp.read()
self.assertEqual(resp.status, 411, body)
self.assertIn(b'<Code>MissingContentLength</Code>', body)
@@ -3578,6 +3568,47 @@ class NotV4AuthHeadersMixin:
self.assertSHA256Mismatch(resp, 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD',
_sha256(chunked_body))
def test_no_md5_no_sha_good_crc_header_body_too_long_ok(self):
request = self.conn.build_request(
self.bucket_name,
'test-obj',
method='PUT',
headers={
'x-amz-checksum-crc32': _crc32(TEST_BODY),
'content-length': str(len(TEST_BODY)),
},
)
self.conn.sign_request(request)
raw_conn = get_raw_conn(request)
# send more than expected body; as long as the crc is correct for the
# content-length part of the body that is actually read then it's ok
raw_conn.send(TEST_BODY + b'0123456789')
resp = raw_conn.getresponse()
body = resp.read()
self.assertEqual(resp.status, 200, body)
def test_no_md5_no_sha_bad_crc_header_body_too_long(self):
request = self.conn.build_request(
self.bucket_name,
'test-obj',
method='PUT',
headers={
'x-amz-checksum-crc32': _crc32(TEST_BODY),
'content-length': str(len(TEST_BODY[:-10])),
},
)
self.conn.sign_request(request)
raw_conn = get_raw_conn(request)
# content-length is less than sent body; not all the body is read so
# the calculated crc will mismatch the body crc
raw_conn.send(TEST_BODY)
resp = raw_conn.getresponse()
body = resp.read().decode('utf-8')
self.assertEqual(resp.status, 400, body)
self.assertIn('<Code>BadDigest</Code>', body)
self.assertIn('<Message>The CRC32 you specified did not match the '
'calculated checksum.</Message>', body)
class TestV4AuthQuery(InputErrorsMixin,
NotV4AuthHeadersMixin,

View File

@@ -1651,9 +1651,10 @@ class TestRequest(S3ApiTestCase):
req.environ['wsgi.input'].read()
def test_check_sig_v4_streaming_aws_hmac_sha256_payload_trailer_bad(self):
# second chunk has bad signature
body = 'a;chunk-signature=c9dd07703599d3d0bd51c96193110756d4f7091d5a' \
'4408314a53a802e635b1ad\r\nabcdefghij\r\n' \
'a;chunk-signature=000000000000000000000000000000000000000000' \
'a;chunk-signature=badbadbadbad000000000000000000000000000000' \
'0000000000000000000000\r\nklmnopqrst\r\n' \
'7;chunk-signature=b63f141c2012de9ac60b961795ef31ad3202b125aa' \
'873b4142cf9d815360abc0\r\nuvwxyz\n\r\n' \