Use js-spark-md5 in the browser to calculate the MD5 of the file

Use js-spark-md5 in the browser to calculate the MD5 of the file

Recently, a video system was developed. Users need to upload files to the server. The server uploads it to the transcoding server and the code is in m3u8 format. I think the client can upload directly to the transcoding server. After the transcoding server is set to cross domain, the client can directly upload video files, but the upload interface requires the MD5 value of the file. We found an MD5 library js-spark-md5 (it is said that the performance is the best)

https://github.com/satazor/js-spark-md5

Official Demo

document.getElementById('file').addEventListener('change', function () {
    var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
        file = this.files[0],
        chunkSize = 2097152,                             // Read in chunks of 2MB
        chunks = Math.ceil(file.size / chunkSize),
        currentChunk = 0,
        spark = new SparkMD5.ArrayBuffer(),
        fileReader = new FileReader();

    fileReader.onload = function (e) {
        console.log('read chunk nr', currentChunk + 1, 'of', chunks);
        spark.append(e.target.result);                   // Append array buffer
        currentChunk++;

        if (currentChunk < chunks) {
            loadNext();
        } else {
            console.log('finished loading');
            console.info('computed hash', spark.end());  // Compute hash
        }
    };

    fileReader.onerror = function () {
        console.warn('oops, something went wrong.');
    };

    function loadNext() {
        var start = currentChunk * chunkSize,
            end = ((start + chunkSize) >= file.size) ? file.size : start + chunkSize;

        fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
    }

    loadNext();
});

It seems a little troublesome. The main reason is that after slicing the file, the fileReader is used to read the allocated file data as an array of array buffer bytes. This read operation is asynchronous and can only be obtained by listening to the onload event of fileReader. This asynchronous behavior adds a little complexity.

I think if the API is designed like this, it will be more likable. (pseudo code)

// Slice size, 1Mb
const CHUNK_SIZE = 1024 * 1024;
// file
const file = ....;
// file size
const size = file.size;
// Total number of slices
let totalChunk = Math.ceil(size / CHUNK_SIZE);
// spark is used to calculate md5
const spark = new SparkMD5.ArrayBuffer(),

for (let i = 0; i < totalChunk; i ++){
	let start = i * CHUNK_SIZE;
    let end = ((start + CHUNK_SIZE) >= size) ? size : start + CHUNK_SIZE;
    // Fragmented file
    let chunk = file.slice(start, end);
    // Add to spark
    spark.append(chunk);
}
// Calculate md5
const md5 = spark.end();

I encapsulated a method, which seems to be easy to use

This md5 method passes a file object and a chunkSize parameter, and returns a Promise. In the then callback, md5 will be passed as a formal parameter.

/**
 * @param file file
 * @param chunkSize Slice size
 * @returns Promise
 */
function md5(file, chunkSize) {
	return new Promise((resolve, reject) => {
		let blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
		let chunks = Math.ceil(file.size / chunkSize);
		let currentChunk = 0;
		let spark = new SparkMD5.ArrayBuffer();
		let fileReader = new FileReader();
	
		fileReader.onload = function(e) {
			spark.append(e.target.result); 
			currentChunk++;
			if (currentChunk < chunks) {
				loadNext();
			} else {
				let md5 = spark.end();
				resolve(md5);
			}
		};
	
		fileReader.onerror = function(e) {
			reject(e);
		};
	
		function loadNext() {
			let start = currentChunk * chunkSize;
			let end = start + chunkSize;
			if (end > file.size){
				end = file.size;
			}
			fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
		}
		loadNext();
	});
}

Test it

<!DOCTYPE html>
<html>
	<head>
		<meta charset="UTF-8">
		<title>Upload</title>
	</head>
	<body>
		<input type="file" onchange="change(this);"/>
	<!-- spark-md5.js library-->
	<script type="text/javascript" src="./spark-md5.js"></script>
	<script type="text/javascript">
		
		function change(node){
			md5(node.files[0], 1024).then(e => {
				// Get md5 of file
				console.log("md5=" + e);
			}).catch(e => {
				// Handling exceptions
				console.error(e);
			});
		}
		
		/**
		 * MD5 of calculation file 
		 * @param file file
		 * @param chunkSize Slice size
		 * @returns Promise
		 */
		function md5(file, chunkSize) {
			return new Promise((resolve, reject) => {
				let blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
				let chunks = Math.ceil(file.size / chunkSize);
				let currentChunk = 0;
				let spark = new SparkMD5.ArrayBuffer();
				let fileReader = new FileReader();
			
				fileReader.onload = function(e) {
					spark.append(e.target.result); 
					currentChunk++;
					if (currentChunk < chunks) {
						loadNext();
					} else {
						let md5 = spark.end();
						resolve(md5);
					}
				};
			
				fileReader.onerror = function(e) {
					reject(e);
				};
			
				function loadNext() {
					let start = currentChunk * chunkSize;
					let end = start + chunkSize;
					if (end > file.size){
						end = file.size;
					}
					fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
				}
				loadNext();
			});
		}
	</script>
	</body>
</html>

The final calculation result is correct

MD5 calculation of disk files using certutil provided with Windows

Original address: https://springboot.io/t/topic/2171

Tags: Javascript

Posted by Nat on Mon, 30 May 2022 15:41:33 +0530