Move S3 objects to an other bucket/prefix based on condition(s)
For one or many reasons, you may want to move or do any operation on a list of S3 objects. The following snippet, in python, allows you to move from one prefix to another in the same bucket after a certain period of time if you use EventBridge as CRON and/or if certain conditions are met. Feel free to adapt it to your needs. It uses Boto3 to interact with S3.
Environment variables which need to be add to Lambda Function:
- S3_BUCKET : the bucket where to operate
- S3_PREFIX_DESTINATION : the prefix destination (ending with
/
) - S3_PREFIX_SOURCE : the prefix source (ending with
/
)
import boto3, os
S3_BUCKET = os.environ['S3_BUCKET']
S3_PREFIX_DESTINATION = os.environ['S3_PREFIX_DESTINATION']
S3_PREFIX_SOURCE = os.environ['S3_PREFIX_SOURCE']
s3_client = boto3.client("s3")
def lambda_handler(event, context):
# Get all S3 objects
response = s3_client.list_objects_v2(Bucket=S3_BUCKET, Prefix=S3_PREFIX_SOURCE)
s3_objects = response["Contents"]
# Remove first item which is not an object but the prefix
s3_objects.pop(0)
print(str(len(s3_objects)) + " objects to check")
# Create a list of S3 objects to archive
s3_objects_to_archive = list(filter(is_file_need_to_be_archive, s3_objects))
TOTAL_ARCHIVED_OBJECTS = str(len(s3_objects_to_archive))
print(TOTAL_ARCHIVED_OBJECTS + " objects to archive")
# Archive/move objects
for s3_object in s3_objects_to_archive:
archive_file(s3_object)
return TOTAL_ARCHIVED_OBJECTS
def is_file_need_to_be_archive(s3_object):
# Your logic code here...
if # ...
return True
return False
def archive_file(s3_object):
# Determine new object prefix (you could put it in deeper prefixes, with the date for example)
s3_object_new_key = s3_object["Key"].replace(S3_PREFIX_SOURCE, S3_PREFIX_DESTINATION)
print("Archiving " + s3_object["Key"] + " to " + s3_object_new_key)
# Copy object to its new prefix & delete original object
s3_client.copy_object(Bucket=S3_BUCKET, Key=s3_object_new_key, CopySource={'Bucket':S3_BUCKET, 'Key':s3_object["Key"]})
s3_client.delete_object(Bucket=S3_BUCKET, Key=s3_object['Key'])