阿里云OSS修改大文件数据优化方案

来源:互联网 发布:软件使用费增值税税率 编辑:程序博客网 时间:2024/06/08 10:49
一、问题描述
阿里云OSS的程序开发中会遇到修改一个很大对象(object)中一段数据的情况。而OSS的特性决定了,在整个生命周期内,OSS对象的数据不可变更。因此,OSS不支持修改文件的部分内容等操作。重复上传同名的对象会覆盖之前的对象。如果需要修改一个文件的部分内容,通常的方案是将整个文件下载到本地,然后将修改后的文件上传到OSS,覆盖原始文件。对于大文件而言,这样的做会带来不必要的网络开销,浪费大量时间。利用OSS的一些数据接口的特性,可以对改问题的处理流程做一定的优化。
 
二、优化原理
1)拷贝对象:OSS提供了CopyObject的功能来实现OSS的内部拷贝,这样在用户和OSS之间就无需传输大量的数据。
2)分片上传:将要上传的文件分成多个数据块(OSS里又称之为Part)来分别上传,上传完成之后再调用OSS的接口将这些Part组合成一个Object
3)分片拷贝:OSS提供了类似断点续传上传的功能来完成大文件的拷贝。基本操作步骤和断点续传上传基本一致,只是数据源从HTTP请求直接上传改成从源Object中获取。分片上传和分片拷贝操作流程基本一致,因此可以考虑结合两种方式优化修改大文件部分数据的方案
(4)创建分片上传任务时,分片任务对应的Object名字可以和已经存在的Object相同,在分片上传任务没有设置结束时,已经存在的Object不会受到影响。上传任务结束时,新的Object才会覆盖原来的Object。
 
三、优化方案
 
文件修改目标

实现方案示意
 

流程如下
  1. 将要文件按照修改部分和未修改部分分片。
  2. 初始化一个分片上传任务(InitiateMultipartUpload)。
  3. 未修改部分采用分片拷贝方式创建数据(UploadPartCopy)。
  4. 修改部分本地分片上传UploadPart)。
  5. 完成上传(CompleteMultipartUpload)。
注意的问题
       大小限制:在这种上传方式下,Object的大小是由Part来决定的。最大支持10000Part,每块最小100KB(最后一块可以比100KB小),最大5GB
       对象名字问题:object无需重命名,执行CompleteMultipartUpload后新上传的Object才会取代原来的Object。
 
四、测试代码
测试包含两个步骤代码:创建原始Object -> 组合使用分片上传和分片拷贝生成新的Object
 
以下代码片段基于阿里官方OSS C sdk库开发
,演示了本方法的主要实现过程。
 
创建一个文件A,大小1MB,数据全部为0xAA
void put_object_from_buffer(){aos_pool_t *p = NULL;aos_string_t bucket;aos_string_t object;int is_cname = 0;aos_table_t *headers = NULL;aos_table_t *resp_headers = NULL;oss_request_options_t *options = NULL;aos_list_t buffer;aos_buf_t *content = NULL;aos_status_t *s = NULL;char* pbuf = NULL;aos_pool_create(&p, NULL);options = oss_request_options_create(p);init_sample_request_options(options, is_cname);headers = aos_table_make(p, 1);apr_table_set(headers, "x-oss-meta-author", "oss");aos_str_set(&bucket, BUCKET_NAME);aos_str_set(&object, "testfile1");aos_list_init(&buffer);pbuf = aos_palloc(p, 1024*1024);memset(pbuf, 0xaa, 1024*1024);content = aos_buf_pack(options->pool, pbuf, 1024*1024);aos_list_add_tail(&content->node, &buffer);s = oss_put_object_from_buffer(options, &bucket, &object, &buffer, headers, &resp_headers);if (aos_status_is_ok(s)) {printf("put object from buffer succeeded\n");} else {printf("put object from buffer failed\n"); } aos_pool_destroy(p);}

通过分片上传和分片拷贝创建同名文件A’,文件内容由文件A的部分数据修改为0xBB得到。
void multipart_upload_file_copy(){aos_pool_t *p;oss_request_options_t *options;char *dest_object_name = "testfile1";aos_string_t dest_bucket;aos_string_t dest_object;aos_string_t upload_id;aos_table_t *init_headers;aos_table_t *copy_headers;aos_table_t *list_part_resp_headers;aos_table_t *complete_resp_headers;aos_table_t *resp_headers;aos_status_t *s;oss_list_upload_part_params_t *list_upload_part_params;oss_upload_part_copy_params_t *upload_part_copy_params1;aos_list_t complete_part_list;oss_list_part_content_t *part_content;oss_complete_part_content_t *complete_content;//int max_ret = 1000;int is_cname = 0;// int part1 = 1;int64_t range_start1 = 0;int64_t range_end1 = 200*1024-1; // 0~200*1024-1//int part2 = 2;aos_list_t buffer;//200*1024~500*1024-1 -> 300*1024char* pbuf = NULL;aos_buf_t *content = NULL;//int part3 = 3;int64_t range_start3 = 500*1024; int64_t range_end3 = 1024*1024-1;// aos_pool_create(&p, NULL);options = oss_request_options_create(p);init_sample_request_options(options, is_cname);aos_str_set(&dest_bucket, BUCKET_NAME);aos_str_set(&dest_object, dest_object_name);init_headers = aos_table_make(p, 0);s = oss_init_multipart_upload(options, &dest_bucket, &dest_object, &upload_id, init_headers, &resp_headers);if (aos_status_is_ok(s)) {printf("Init multipart upload succeeded, upload_id:%.*s\n", upload_id.len, upload_id.data);} else {printf("Init multipart upload failed\n");aos_pool_destroy(p);return;} /* 拷贝第一个分片数据*/upload_part_copy_params1 = oss_create_upload_part_copy_params(p);aos_str_set(&upload_part_copy_params1->source_bucket, BUCKET_NAME);aos_str_set(&upload_part_copy_params1->source_object, OBJECT_NAME);aos_str_set(&upload_part_copy_params1->dest_bucket, BUCKET_NAME);aos_str_set(&upload_part_copy_params1->dest_object, dest_object_name);aos_str_set(&upload_part_copy_params1->upload_id, upload_id.data);upload_part_copy_params1->part_num = part1;upload_part_copy_params1->range_start = range_start1;upload_part_copy_params1->range_end = range_end1;copy_headers = aos_table_make(p, 0);s = oss_upload_part_copy(options, upload_part_copy_params1, copy_headers, &resp_headers);if (aos_status_is_ok(s)) {printf("upload part copy succeeded\n");} else {printf("upload part copy failed\n");}/* 本地上传第二个分片数据*/aos_list_init(&buffer);pbuf = aos_palloc(p, 300*1024);memset(pbuf, 0xbb, 300*1024);content = aos_buf_pack(options->pool, pbuf, 300*1024);aos_list_add_tail(&content->node, &buffer);s = oss_upload_part_from_buffer(options, &dest_bucket, &dest_object, &upload_id,part2, &buffer, &resp_headers);if (aos_status_is_ok(s)) {printf("Upload multipart part succeeded\n");} else {printf("Upload multipart part failed\n");aos_pool_destroy(p);return;} /* 拷贝第三个分片数据*/upload_part_copy_params1 = oss_create_upload_part_copy_params(p);aos_str_set(&upload_part_copy_params1->source_bucket, BUCKET_NAME);aos_str_set(&upload_part_copy_params1->source_object, OBJECT_NAME);aos_str_set(&upload_part_copy_params1->dest_bucket, BUCKET_NAME);aos_str_set(&upload_part_copy_params1->dest_object, dest_object_name);aos_str_set(&upload_part_copy_params1->upload_id, upload_id.data);upload_part_copy_params1->part_num = part3;upload_part_copy_params1->range_start = range_start3;upload_part_copy_params1->range_end = range_end3;copy_headers = aos_table_make(p, 0);s = oss_upload_part_copy(options, upload_part_copy_params1, copy_headers, &resp_headers);if (aos_status_is_ok(s)) {printf("upload part copy succeeded\n");} else {printf("upload part copy failed\n");}/* 列出分片*/list_upload_part_params = oss_create_list_upload_part_params(p);list_upload_part_params->max_ret = max_ret;aos_list_init(&complete_part_list);s = oss_list_upload_part(options, &dest_bucket, &dest_object, &upload_id,list_upload_part_params, &list_part_resp_headers);aos_list_for_each_entry(oss_list_part_content_t, part_content, &list_upload_part_params->part_list, node) {complete_content = oss_create_complete_part_content(p);aos_str_set(&complete_content->part_number, part_content->part_number.data);aos_str_set(&complete_content->etag, part_content->etag.data);printf("part_number:%.*s, etag:%.*s\n",complete_content->part_number.len, complete_content->part_number.data, complete_content->etag.len, complete_content->etag.data);aos_list_add_tail(&complete_content->node, &complete_part_list);}/* 完成分片拷贝*/complete_resp_headers = aos_table_make(p, 1); apr_table_add(complete_resp_headers, OSS_CONTENT_TYPE, "video/MP2T");s = oss_complete_multipart_upload(options, &dest_bucket, &dest_object, &upload_id, &complete_part_list, complete_resp_headers, &resp_headers);if (aos_status_is_ok(s)) {printf("complete multipart upload succeeded\n");} else {printf("complete multipart upload failed, %d, %s, %s, %s\n", s->code, s->error_code, s->error_msg, s->req_id);}aos_pool_destroy(p);}