Friday, December 12, 2014

Playing with python library(boto) and S3

Here is a simple example of a python script that play with boto api:

 import requests, json
import datetime
import hashlib
import boto
import boto.s3.connection
from boto.s3.key import Key
import os,math 
from filechunkio import FileChunkIO

access_key = 'you access key'
secret_key = 'your secret key'
if __name__ == "__main__":  
      conn = boto.connect_s3(access_key,secret_key)  
      print conn.get_all_buckets()  
      '''list all bucket'''  
      for bucket in conn.get_all_buckets():  
           print bucket  
      '''create new bucket'''  
      #bucket = conn.create_bucket('hsn-bucket')  
      ''' select a bucket '''  
      bucket = conn.get_bucket('hsn-bucket')  
      ''' create a text file and upload it in bucket'''  
      k = Key(bucket)  
      k.key = 'dir/foobar'  
      k.set_contents_from_string('This is a test of S3')  
      ## Get file info  
      source_path = r'D:\file\test1.pdf'  
      source_size = os.stat(source_path).st_size  
      #get all keys in bucket  
      print bucket.get_all_keys()  
      '''upload local file once at time'''  
      #the name of the file has to be set as key  
      k.key=os.path.basename(source_path)  
      k.set_contents_from_filename(source_path)  
      '''upload file divided into multiple parts'''  
      # Create a multipart upload request  
      mp = bucket.initiate_multipart_upload(os.path.basename(source_path))  
      # Use a chunk size (minimum size is 50MiB) 
      chunk_size = 5242880  
      chunk_count = int(math.ceil(source_size / chunk_size))  
      # Send the file parts, using FileChunkIO to create a file-like object that points to a certain byte range within the original file. We set bytes to never exceed the original file size.  
      for i in range(chunk_count + 1):  
           offset = chunk_size * i  
           bytes = min(chunk_size, source_size - offset)  
           with FileChunkIO(source_path, 'r', offset=offset,bytes=bytes) as fp:  
                mp.upload_part_from_file(fp, part_num=i + 1)  
      # Finish the upload  
      mp.complete_upload()  

3 comments:

  1. 5MB = 5000000 bytes

    I'm assuming you were looking at the boto documentation for filechunkio. The example they use splits files up into chunks of size 50MiB (fifty mebibytes), not 50MB.

    ReplyDelete
    Replies
    1. However, the minimum may actually be 5MiB and you may have been correct all along.

      Delete