Multipart Parsing in Node.js Express 4

Just how, exactly, do you use the multiparty module?

If you’ve migrated to Express 4 you may have noticed that with the removal of Connect middleware, there is no bundled solution for multipart form parsing.  We’ve been playing with the commonly recommended multiparty module and it seems to be a reasonable alternative, with one major problem: the documentation isn’t very helpful.  I consider it a fundamental part of the Node.js experience to go look at a package’s npm page, and within minutes know exactly how to use it based on the docs and helpful examples, and here I was let down.  Hopefully the examples below help to fill in the gaps.

Here’s an example showing how to work with both fields and files from a multipart/form-data post.

var multiparty = require("multiparty");

function(req, res) {

	// ...

	var form = new multiparty.Form({maxFieldSize: 8192, maxFields: 10, autoFiles: false});
	form.on("part", function(part) {
		if (!part.filename)
		{
			return;
		}

		// if we got this far, the part represents a file
		var stream; // = (whatever)

		// ...

		part.pipe(stream);
	});
	form.on("field", function(name, value) {
		// do something with the field
	});
	form.on("close", function() {
		// continue with the rest of your handler
	});
	form.parse(req);

	// ...
}

The key here is that you deal with form field values in the “field” handler, and files in the “part” handler. However, you cannot touch a field part in the “part” handler (with a part.resume(), etc) otherwise the part doesn’t come through properly downstream. In the example above, if we see that the part has no filename, we return immediately and never look back.

Here’s a kitchen-sink example, showing how to (blindly) save all form data to MongoDB while streaming file attachments into GridFS using gridfs-stream. The asynchronous bits between multiparty and gridfs-stream can be a little tricky, so hopefully the example is helpful to someone:

var express = require('express');
var multiparty = require("multiparty");
var async = require("async");
var mongo = require("mongodb");
var MongoClient = mongo.MongoClient;
var Grid = require("gridfs-stream");

function saveRequest(req, options, callback) {
    var context = {
        db: null,
        gfs: null,
        gfsOps: 0,
        request: {
            time: new Date()
            // ...
        }
    };
    async.waterfall([
        function(callback) {
            MongoClient.connect(options.connString, callback);
        },
        function(db, callback) {
            context.db = db;
            context.gfs = Grid(db, mongo);
            callback(null);
        },
        function(callback) {
            var form = new multiparty.Form({maxFieldSize:8192, maxFields:10, autoFiles:false});
            form.on("part", function(part) {
                if (!part.filename)
                {
                    return;
                }
                
                context.gfsOps++;
                var writeStream = context.gfs.createWriteStream({
                    mode: "w",
                    filename: part.filename,
                    content_type: part.headers["content-type"]
                });
                writeStream.on("close", function() {
                    context.gfsOps--;
                    if (context.gfsOps == 0)
                    {
                        callback(null);
                    }
                });
                
                part.pipe(writeStream);
            });
            form.on("field", function(name, value) {
                context.request[name] = value;
            });
            form.on("close", function() {
                if (context.gfsOps == 0)
                {
                    callback(null);
                }
            });
            form.parse(req);
        },
        function(callback)
        {
            var collection = context.db.collection("blargh");
            collection.insert(context.request, callback);
        }
    ],
    function(err) {
        if (context.db)
        {
            context.db.close();
        }
        callback(err);
    });
}

module.exports.router = function(options) {
    var router = express.Router();    
    router.post('/blargh', function(req, res) {
        saveRequest(req, options, function(err) {
            if (err)
            {
                res.writeHead(500);
                res.end();
            }
            else
            {
                res.redirect("/");
                res.end();
            }    
        });
    });

    return router;
}

The issue here is that you need to know when everything is done before you close the db connection. So, we increment a ‘gfs Operations’ counter every time we start streaming a new part into GridFS, and decrement it when the stream closes off. The first handler (either stream-close or form-close) to see that there are no pending operations will invoke the callback to advance. This may not be the best way to handle the problem, but it’s the first thing that occurred to me.

That’s all for now.  If you have any comments, please leave them below.

Posted in Development and tagged , , , , , , .

Leave a Reply

Your email address will not be published. Required fields are marked *