Code style fixes for RadixSort.

This commit is contained in:
Oleksii Trekhleb 2018-05-29 09:01:16 +03:00
parent 71985337b5
commit afd5617144
4 changed files with 157 additions and 97 deletions

View File

@ -2,8 +2,6 @@ export const sortedArr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
export const reverseArr = [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1];
export const notSortedArr = [15, 8, 5, 12, 10, 1, 16, 9, 11, 7, 20, 3, 2, 6, 17, 18, 4, 13, 14, 19];
export const equalArr = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
export const stringArr = ['zzz', 'bb', 'a', 'rr', 'rrb', 'rrba'];
export const intArr = [3, 1, 75, 32, 884, 523, 4343456, 232, 123, 656, 343];
export class SortTester {
static testSort(SortingClass) {

View File

@ -1,13 +1,29 @@
# Radix Sort
In computer science, radix sort is a non-comparative integer sorting algorithm that sorts data with integer keys by grouping keys by the individual digits which share the same significant position and value. A positional notation is required, but because integers can represent strings of characters (e.g., names or dates) and specially formatted floating point numbers, radix sort is not limited to integers. Radix sort dates back as far as 1887 to the work of Herman Hollerith on tabulating machines.
In computer science, **radix sort** is a non-comparative integer sorting
algorithm that sorts data with integer keys by grouping keys by the individual
digits which share the same significant position and value. A positional notation
is required, but because integers can represent strings of characters
(e.g., names or dates) and specially formatted floating point numbers, radix
sort is not limited to integers.
## Efficiency
The topic of the efficiency of radix sort compared to other sorting algorithms is somewhat tricky and subject to quite a lot of misunderstandings. Whether radix sort is equally efficient, less efficient or more efficient than the best comparison-based algorithms depends on the details of the assumptions made. Radix sort complexity is O(wn) for n keys which are integers of word size w. Sometimes w is presented as a constant, which would make radix sort better (for sufficiently large n) than the best comparison-based sorting algorithms, which all perform O(n log n) comparisons to sort n keys. However, in general w cannot be considered a constant: if all n keys are distinct, then w has to be at least log n for a random-access machine to be able to store them in memory, which gives at best a time complexity O(n log n).[2] That would seem to make radix sort at most equally efficient as the best comparison-based sorts (and worse if keys are much longer than log n).
The counter argument is that comparison-based algorithms are measured in number of comparisons, not actual time complexity. Under some assumptions the comparisons will be constant time on average, under others they will not. Comparisons of randomly generated keys takes constant time on average, as keys differ on the very first bit in half the cases, and differ on the second bit in half of the remaining half, and so on, resulting in an average of two bits that need to be compared. In a sorting algorithm the first comparisons made satisfies the randomness condition, but as the sort progresses the keys compared are clearly not randomly chosen anymore. For example, consider a bottom-up merge sort. The first pass will compare pairs of random keys, but the last pass will compare keys that are very close in the sorting order. This makes merge sort, on this class of inputs, take O(n (log n)2) time. That assumes all memory accesses cost the same, which is not a physically reasonable assumption as we scale n to infinity, and not, in practice, how real computers work.
The topic of the efficiency of radix sort compared to other sorting algorithms is
somewhat tricky and subject to quite a lot of misunderstandings. Whether radix
sort is equally efficient, less efficient or more efficient than the best
comparison-based algorithms depends on the details of the assumptions made.
Radix sort complexity is `O(wn)` for `n` keys which are integers of word size `w`.
Sometimes `w` is presented as a constant, which would make radix sort better
(for sufficiently large `n`) than the best comparison-based sorting algorithms,
which all perform `O(n log n)` comparisons to sort `n` keys. However, in
general `w` cannot be considered a constant: if all `n` keys are distinct,
then `w` has to be at least `log n` for a random-access machine to be able to
store them in memory, which gives at best a time complexity `O(n log n)`. That
would seem to make radix sort at most equally efficient as the best
comparison-based sorts (and worse if keys are much longer than `log n`).
## References
- [Wikipedia](https://en.wikipedia.org/wiki/Radix_sort)
- [YouTube](https://www.youtube.com/watch?v=XiuSW_mEn7g&index=62&t=0s&list=PLLXdhg_r2hKA7DPDsunoDZ-Z769jWn4R8)

View File

@ -1,96 +1,27 @@
import Sort from '../Sort';
// Using charCode (a = 97, b = 98, etc), we can map characters to buckets from 0 - 25
const BASE_CHAR_CODE = 97;
const NUMBER_OF_DIGITS = 10;
const ENGLISH_ALPHABET_LENGTH = 26;
export default class RadixSort extends Sort {
/**
* @param {*[]} originalArray
* @return {*[]}
*/
sort(originalArray) {
const isNumber = (element) => {
return Number.isInteger(element);
};
const createBuckets = (numBuckets) => {
/**
* Mapping buckets to an array instead of filling them with
* an array prevents each bucket from containing a reference to the same array
*/
return new Array(numBuckets).fill(null).map(() => []);
};
const placeElementsInNumberBuckets = (array, index) => {
// See below. These are used to determine which digit to use for bucket allocation
const modded = 10 ** (index + 1);
const divided = 10 ** index;
const buckets = createBuckets(10);
array.forEach((element) => {
this.callbacks.visitingCallback(element);
if (element < divided) {
buckets[0].push(element);
} else {
/**
* Say we have element of 1,052 and are currently on index 1 (starting from 0). This means
* we want to use '5' as the bucket. `modded` would be 10 ** (1 + 1), which
* is 100. So we take 1,052 % 100 (52) and divide it by 10 (5.2) and floor it (5).
*/
const currentDigit = Math.floor((element % modded) / divided);
buckets[currentDigit].push(element);
}
});
return buckets;
};
const placeElementsInCharacterBuckets = (array, index, numPasses) => {
const getCharCodeOfElementAtIndex = (element) => {
// Place element in last bucket if not ready to organize
if ((numPasses - index) > element.length) return 25;
// Using charCode (a = 97, b = 98, etc), we can map characters to buckets from 0 - 25
const BASE_CHAR_CODE = 97;
/**
* If each character has been organized, use first character to determine bucket,
* otherwise iterate backwards through element
*/
const charPos = index > element.length - 1 ? 0 : element.length - index - 1;
return element.toLowerCase().charCodeAt(charPos) - BASE_CHAR_CODE;
};
const buckets = createBuckets(26);
array.forEach((element) => {
this.callbacks.visitingCallback(element);
const currentBucket = getCharCodeOfElementAtIndex(element);
buckets[currentBucket].push(element);
});
return buckets;
};
// Assumes all elements of array are of the same type
const isArrayOfNumbers = isNumber(originalArray[0]);
/** Number of passes is determined by the length of the longest element in the array.
* For integers, this log10(num), and for strings, this would be the lenght of the string.
*/
const determineNumPasses = () => {
const getLengthOfLongestElement = () => {
if (isArrayOfNumbers) {
return Math.floor(Math.log10(Math.max(...originalArray))) + 1;
}
return originalArray.reduce((acc, val) => {
return val.length > acc ? val.length : acc;
}, -Infinity);
};
return getLengthOfLongestElement(originalArray);
};
const isArrayOfNumbers = this.isArrayOfNumbers(originalArray);
let sortedArray = [...originalArray];
const numPasses = determineNumPasses();
const numPasses = this.determineNumPasses(sortedArray);
for (let currentIndex = 0; currentIndex < numPasses; currentIndex += 1) {
const buckets = isArrayOfNumbers ?
placeElementsInNumberBuckets(sortedArray, currentIndex) :
placeElementsInCharacterBuckets(sortedArray, currentIndex, numPasses);
this.placeElementsInNumberBuckets(sortedArray, currentIndex) :
this.placeElementsInCharacterBuckets(sortedArray, currentIndex, numPasses);
// Flatten buckets into sortedArray, and repeat at next index
sortedArray = buckets.reduce((acc, val) => {
return [...acc, ...val];
@ -99,4 +30,123 @@ export default class RadixSort extends Sort {
return sortedArray;
}
/**
* @param {*[]} array
* @param {number} index
* @return {*[]}
*/
placeElementsInNumberBuckets(array, index) {
// See below. These are used to determine which digit to use for bucket allocation
const modded = 10 ** (index + 1);
const divided = 10 ** index;
const buckets = this.createBuckets(NUMBER_OF_DIGITS);
array.forEach((element) => {
this.callbacks.visitingCallback(element);
if (element < divided) {
buckets[0].push(element);
} else {
/**
* Say we have element of 1,052 and are currently on index 1 (starting from 0). This means
* we want to use '5' as the bucket. `modded` would be 10 ** (1 + 1), which
* is 100. So we take 1,052 % 100 (52) and divide it by 10 (5.2) and floor it (5).
*/
const currentDigit = Math.floor((element % modded) / divided);
buckets[currentDigit].push(element);
}
});
return buckets;
}
/**
* @param {*[]} array
* @param {number} index
* @param {number} numPasses
* @return {*[]}
*/
placeElementsInCharacterBuckets(array, index, numPasses) {
const buckets = this.createBuckets(ENGLISH_ALPHABET_LENGTH);
array.forEach((element) => {
this.callbacks.visitingCallback(element);
const currentBucket = this.getCharCodeOfElementAtIndex(element, index, numPasses);
buckets[currentBucket].push(element);
});
return buckets;
}
/**
* @param {string} element
* @param {number} index
* @param {number} numPasses
* @return {number}
*/
getCharCodeOfElementAtIndex(element, index, numPasses) {
// Place element in last bucket if not ready to organize
if ((numPasses - index) > element.length) {
return ENGLISH_ALPHABET_LENGTH - 1;
}
/**
* If each character has been organized, use first character to determine bucket,
* otherwise iterate backwards through element
*/
const charPos = index > element.length - 1 ? 0 : element.length - index - 1;
return element.toLowerCase().charCodeAt(charPos) - BASE_CHAR_CODE;
}
/**
* Number of passes is determined by the length of the longest element in the array.
* For integers, this log10(num), and for strings, this would be the length of the string.
*/
determineNumPasses(array) {
return this.getLengthOfLongestElement(array);
}
/**
* @param {*[]} array
* @return {number}
*/
getLengthOfLongestElement(array) {
if (this.isArrayOfNumbers(array)) {
return Math.floor(Math.log10(Math.max(...array))) + 1;
}
return array.reduce((acc, val) => {
return val.length > acc ? val.length : acc;
}, -Infinity);
}
/**
* @param {*[]} array
* @return {boolean}
*/
isArrayOfNumbers(array) {
// Assumes all elements of array are of the same type
return this.isNumber(array[0]);
}
/**
* @param {number} numBuckets
* @return {*[]}
*/
createBuckets(numBuckets) {
/**
* Mapping buckets to an array instead of filling them with
* an array prevents each bucket from containing a reference to the same array
*/
return new Array(numBuckets).fill(null).map(() => []);
}
/**
* @param {*} element
* @return {boolean}
*/
isNumber(element) {
return Number.isInteger(element);
}
}

View File

@ -1,9 +1,5 @@
import RadixSort from '../RadixSort';
import {
stringArr,
intArr,
SortTester,
} from '../../SortTester';
import { SortTester } from '../../SortTester';
// Complexity constants.
const ARRAY_OF_STRINGS_VISIT_COUNT = 24;
@ -16,7 +12,7 @@ describe('RadixSort', () => {
it('should visit array of strings n (number of strings) x m (length of longest element) times', () => {
SortTester.testAlgorithmTimeComplexity(
RadixSort,
stringArr,
['zzz', 'bb', 'a', 'rr', 'rrb', 'rrba'],
ARRAY_OF_STRINGS_VISIT_COUNT,
);
});
@ -24,7 +20,7 @@ describe('RadixSort', () => {
it('should visit array of integers n (number of elements) x m (length of longest integer) times', () => {
SortTester.testAlgorithmTimeComplexity(
RadixSort,
intArr,
[3, 1, 75, 32, 884, 523, 4343456, 232, 123, 656, 343],
ARRAY_OF_INTEGERS_VISIT_COUNT,
);
});