大文件上传优化方案
AI摘要: 本文介绍了一个医学影像分割项目,该项目使用Vue3和Django框架进行开发。前端使用Vue3创建表单,后端使用Django处理文件上传和分片上传。
以前做过一个AI医学影像分割的项目,技术栈采用vue3+Django。医学影像文件都比较巨大,从20M~50M不等,当时的解决方案直接采用form表单上传,完全没有考虑到优化,上传失败了就手动重传。现在了解到了业界有一些比较常见的优化方案,毕竟文件上传是一个比较通用场景。
最直接简单的文件上传
直接通过form表单上传文件,将文件转化为字节流,也是我之前项目采用的方案. 在文件稍大的时候效率低,容易上传失败
<el-upload
ref="uploadRef"
drag
class="upload-demo"
action="api/upload/"
:limit="100"
:on-exceed="handleExceed"
:auto-upload="false"
multiple = "true"
accept = ".dcm, .nii, .nii.gz, .raw"
>
<el-icon class="el-icon--upload"> <upload-filled /></el-icon>
<div class="el-upload__text">
<em> 点击或者拖入文件</em>
</div>
<template #tip>
<div class="el-upload__tip text-red">
为降低服务器负载,一次最多上传100个文件
</div>
</template>
</el-upload>
分片上传
将一个大文件分割成若干个小文件(称为Part),然后依次上传,在服务端重新拼接位原始文件。适合大文件上传,此外还能在此基础上实现断点续传。
实现流程
-
前端对大文件计算MD5值
-
前端将大文件切割为N份小文件,每个小文件做上序号
-
后端接受每个上传的小文件,放入缓存目录
-
如果前端对于某个小文件上传失败,就发送一个失败请求给后端,让后端删除这个小文件
-
如果前端发送完所有的小文件后,再发送一个合并请求给后端,代表已经上传完所有的小文件
-
后端读取所有的小文件,然后按照序号顺序合并
-
后端对合并后的大文件计算MD5值,和前端的MD5值做比较,防止传输过程中被篡改
异步上传文件控制并发数量
上传切片,如果切片过多,同时发起的异步请求就会过多。同一时间申请tcp连接过多的话,浏览器也会造成卡顿,因而需要控制异步请求并发数。
实现思路,可以把请求放一个队列,比如并发数是4,就先同时发起4个请求,然后有请求结束了,再发起下一个请求即可,思路清楚,具体代码如下
封装实现
因为这是一个非常常见的需求,所以业界有许多优秀框架已经封装好了,可以直接调用,避免重复造轮子
前端实现
import React, { useState } from 'react';
import axios from 'axios';
import { useDropzone } from 'react-dropzone';
const CHUNK_SIZE = 5 * 1024 * 1024; // 5MB
const FileUpload: React.FC = () => {
const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [uploadProgress, setUploadProgress] = useState<number>(0);
const [errorMessage, setErrorMessage] = useState<string | null>(null);
const [uploadedChunks, setUploadedChunks] = useState<Set<number>>(new Set());
const onDrop = (acceptedFiles: File[]) => {
setSelectedFile(acceptedFiles[0]);
setErrorMessage(null);
};
const { getRootProps, getInputProps } = useDropzone({ onDrop });
const queryUploadedChunks = async (fileName: string) => {
try {
const response = await axios.get(`http://localhost:8080/upload/status?fileName=${fileName}`);
setUploadedChunks(new Set(response.data.uploadedChunks));
} catch (error) {
setErrorMessage('Failed to query uploaded chunks');
}
};
const uploadChunk = async (chunk: Blob, chunkIndex: number, totalChunks: number) => {
const formData = new FormData();
formData.append('file', chunk);
formData.append('chunkIndex', chunkIndex.toString());
formData.append('totalChunks', totalChunks.toString());
formData.append('fileName', selectedFile!.name);
try {
await axios.post('http://localhost:8080/upload', formData, {
headers: {
'Content-Type': 'multipart/form-data'
},
onUploadProgress: (event) => {
const progress = (event.loaded / event.total!) * 100;
setUploadProgress(((chunkIndex / totalChunks) * 100) + (progress / totalChunks));
}
});
} catch (error) {
throw new Error(`Failed to upload chunk ${chunkIndex + 1}: ${error.message}`);
}
};
const handleFileUpload = async () => {
if (!selectedFile) return;
await queryUploadedChunks(selectedFile.name);
const totalChunks = Math.ceil(selectedFile.size / CHUNK_SIZE);
for (let chunkIndex = 0; chunkIndex < totalChunks; chunkIndex++) {
if (uploadedChunks.has(chunkIndex)) continue; // 断点续传
const start = chunkIndex * CHUNK_SIZE;
const end = Math.min(start + CHUNK_SIZE, selectedFile.size);
const chunk = selectedFile.slice(start, end);
try {
// 这里是并发上传,并没有限制并发上限,如果优化的话,可以限制并发上传数量为5
await uploadChunk(chunk, chunkIndex, totalChunks);
setUploadedChunks(new Set([...uploadedChunks, chunkIndex]));
} catch (error) {
setErrorMessage(error.message);
break;
}
}
if (uploadedChunks.size === totalChunks) {
console.log('File uploaded successfully');
}
};
return (
<div>
<div {...getRootProps()} style={{ border: '2px dashed #000', padding: '20px', cursor: 'pointer' }}>
<input {...getInputProps()} />
<p>Drag 'n' drop some files here, or click to select files</p>
</div>
{selectedFile && (
<div>
<button onClick={handleFileUpload}>Upload</button>
<progress value={uploadProgress} max="100">{uploadProgress}%</progress>
{errorMessage && <div style={{ color: 'red' }}>{errorMessage}</div>}
</div>
)}
</div>
);
};
export default FileUpload;
后端实现
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.Set;
@RestController
@RequestMapping("/upload")
Blog class FileUploadController {
private static final String TEMP_UPLOAD_DIR = "temp-uploads/";
private static final String FINAL_UPLOAD_DIR = "uploads/";
@PostMapping
Blog ResponseEntity<String> handleChunkedFileUpload(
@RequestParam("file") MultipartFile file,
@RequestParam("chunkIndex") int chunkIndex,
@RequestParam("totalChunks") int totalChunks,
@RequestParam("fileName") String fileName) {
fileName = sanitizeFileName(fileName);
Path tempFilePath = Paths.get(TEMP_UPLOAD_DIR + fileName + ".part" + chunkIndex);
try {
Files.createDirectories(tempFilePath.getParent());
Files.write(tempFilePath, file.getBytes());
if (chunkIndex == totalChunks - 1) {
mergeChunks(fileName, totalChunks);
return ResponseEntity.status(HttpStatus.OK).body("File uploaded successfully: " + fileName);
}
return ResponseEntity.status(HttpStatus.OK).body("Chunk " + (chunkIndex + 1) + " uploaded successfully");
} catch (IOException e) {
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("Error uploading chunk: " + e.getMessage());
}
}
@GetMapping("/status")
Blog ResponseEntity<Set<Integer>> getUploadedChunks(@RequestParam("fileName") String fileName) {
fileName = sanitizeFileName(fileName);
Path tempFilePath = Paths.get(TEMP_UPLOAD_DIR + fileName);
Set<Integer> uploadedChunks = new HashSet<>();
try (Stream<Path> files = Files.list(tempFilePath.getParent())) {
uploadedChunks = files
.filter(file -> file.getFileName().toString().startsWith(fileName))
.map(file -> {
String fileNameWithPart = file.getFileName().toString();
String partIndex = fileNameWithPart.substring(fileNameWithPart.lastIndexOf(".part") + 5);
return Integer.parseInt(partIndex);
})
.collect(Collectors.toSet());
} catch (IOException e) {
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(uploadedChunks);
}
return ResponseEntity.status(HttpStatus.OK).body(uploadedChunks);
}
private void mergeChunks(String fileName, int totalChunks) throws IOException {
Path finalFilePath = Paths.get(FINAL_UPLOAD_DIR + fileName);
Files.createDirectories(finalFilePath.getParent());
try (FileOutputStream outputStream = new FileOutputStream(finalFilePath.toFile(), true)) {
for (int i = 0; i < totalChunks; i++) {
Path chunkPath = Paths.get(TEMP_UPLOAD_DIR + fileName + ".part" + i);
Files.copy(chunkPath, outputStream);
Files.delete(chunkPath); // 清理空间
}
} catch (IOException e) {
throw new IOException("Error merging chunks: " + e.getMessage());
}
}
private String sanitizeFileName(String fileName) {
return fileName.replaceAll("[^a-zA-Z0-9\\.\\-]", "_");
}
}