The better way to do natural sort in JavaScript

One thing about JavaScript that I wouldn’t put in The Good Parts is how it handles array sorting.

The default sort order is built upon converting the elements into strings, then comparing their sequences of UTF-16 code units values. - MDN

What does that do?

1
2
3
4
5
const months = ['March', 'Jan', 'Feb', 'Dec']
months.sort()
console.log(months)

// ["Dec", "Feb", "Jan", "March"]

Nice. How about this?

1
2
3
4
5
const array1 = [1, 30, 4, 21, 100000]
array1.sort()
console.log(array1)

// [1, 100000, 21, 30, 4]

Well…that’s not what I wanted. If you know you’re getting an array of numbers ahead of time, you can make a sort function accordingly.

1
2
3
4
5
const numbers = [4, 2, 5, 1, 3]
numbers.sort((a, b) => a - b)
console.log(numbers)

// [1, 2, 3, 4, 5]

OK, not bad. We can sort strings and numbers differently. Unless your array has numbers or strings containing numbers in it.

1
2
3
4
5
const uhoh = ['3rd', 'Apple', '24th', '99 in the shade', 'Dec', '10000', '101']
uhoh.sort()
console.log(uhoh)

// [ "10000", "101", "24th", "3rd", "99 in the shade", "Apple", "Dec" ]

The traditional way around this problem was to create a natural sort function of such profound and startling ugliness I advise the faint of heart to look away.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
function naturalSort (a, b) {
var re = /(^([+\-]?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?(?=\D|\s|$))|^0x[\da-fA-F]+$|\d+)/g,
sre = /^\s+|\s+$/g, // trim pre-post whitespace
snre = /\s+/g, // normalize all whitespace to single ' ' character
dre = /(^([\w ]+,?[\w ]+)?[\w ]+,?[\w ]+\d+:\d+(:\d+)?[\w ]?|^\d{1,4}[\/\-]\d{1,4}[\/\-]\d{1,4}|^\w+, \w+ \d+, \d{4})/,
hre = /^0x[0-9a-f]+$/i,
ore = /^0/,
i = function(s) {
return (naturalSort.insensitive && ('' + s).toLowerCase() || '' + s).replace(sre, '');
},
// convert all to strings strip whitespace
x = i(a),
y = i(b),
// chunk/tokenize
xN = x.replace(re, '\0$1\0').replace(/\0$/,'').replace(/^\0/,'').split('\0'),
yN = y.replace(re, '\0$1\0').replace(/\0$/,'').replace(/^\0/,'').split('\0'),
// numeric, hex or date detection
xD = parseInt(x.match(hre), 16) || (xN.length !== 1 && Date.parse(x)),
yD = parseInt(y.match(hre), 16) || xD && y.match(dre) && Date.parse(y) || null,
normChunk = function(s, l) {
// normalize spaces; find floats not starting with '0', string or 0 if not defined (Clint Priest)
return (!s.match(ore) || l == 1) && parseFloat(s) || s.replace(snre, ' ').replace(sre, '') || 0;
},
oFxNcL, oFyNcL;
// first try and sort Hex codes or Dates
if (yD) {
if (xD < yD) { return -1; }
else if (xD > yD) { return 1; }
}
// natural sorting through split numeric strings and default strings
for(var cLoc = 0, xNl = xN.length, yNl = yN.length, numS = Math.max(xNl, yNl); cLoc < numS; cLoc++) {
oFxNcL = normChunk(xN[cLoc] || '', xNl);
oFyNcL = normChunk(yN[cLoc] || '', yNl);
// handle numeric vs string comparison - number < string - (Kyle Adams)
if (isNaN(oFxNcL) !== isNaN(oFyNcL)) {
return isNaN(oFxNcL) ? 1 : -1;
}
// if unicode use locale comparison
if (/[^\x00-\x80]/.test(oFxNcL + oFyNcL) && oFxNcL.localeCompare) {
var comp = oFxNcL.localeCompare(oFyNcL);
return comp / Math.abs(comp);
}
if (oFxNcL < oFyNcL) { return -1; }
else if (oFxNcL > oFyNcL) { return 1; }
}
}

I warned you. Good luck with your therapist.

I needed a natural sort on a new project, and before I copied Big Ugly, I stumbled across this thing of beauty.

1
2
3
4
5
const items =  ['3rd', 'Apple', '24th', '99 in the shade', 'Dec', '10000', '101', '$1.23']
items.sort((a, b) => a.localeCompare(b, navigator.languages[0] || navigator.language, {numeric: true, ignorePunctuation: true}))
console.log(items)

// [ "$1.23", "3rd", "24th", "99 in the shade", "101", "10000", "Apple", "Dec" ]

Now we’re talking.

The localeCompare() method returns a number indicating whether a reference string comes before or after or is the same as the given string in sort order. - MDN

The localeCompare() string method has arguments for the locale, which sets the sort algorithm (here I get the locale from the browser), and additional options, which I use to turn on numeric collation and ignore punctuation. You can read all about the options here.

50 incomprehensible lines of JavaScript down to one. I think we can stick Array sorting in The Good Parts now. Works in IE11 too.