Add SimplePolynomialHash function.

This commit is contained in:
Oleksii Trekhleb 2018-08-09 15:46:22 +03:00
parent d5be477bd8
commit 51f496ce2e
3 changed files with 115 additions and 1 deletions

View File

@ -23,7 +23,6 @@ export default class PolynomialHash {
const charCodes = Array.from(word).map(char => this.charToNumber(char));
let hash = 0;
for (let charIndex = 0; charIndex < charCodes.length; charIndex += 1) {
hash *= this.base;
hash %= this.modulus;

View File

@ -0,0 +1,63 @@
const DEFAULT_BASE = 17;
export default class SimplePolynomialHash {
/**
* @param {number} [base] - Base number that is used to create the polynomial.
*/
constructor(base = DEFAULT_BASE) {
this.base = base;
}
/**
* Function that creates hash representation of the word.
*
* Time complexity: O(word.length).
*
* @assumption: This version of the function doesn't use modulo operator.
* Thus it may produce number overflows by generating numbers that are
* bigger than Number.MAX_SAFE_INTEGER. This function is mentioned here
* for simplicity and LEARNING reasons.
*
* @param {string} word - String that needs to be hashed.
* @return {number}
*/
hash(word) {
let hash = 0;
for (let charIndex = 0; charIndex < word.length; charIndex += 1) {
hash += word.charCodeAt(charIndex) * (this.base ** charIndex);
}
return hash;
}
/**
* Function that creates hash representation of the word
* based on previous word (shifted by one character left) hash value.
*
* Recalculates the hash representation of a word so that it isn't
* necessary to traverse the whole word again.
*
* Time complexity: O(1).
*
* @assumption: This function doesn't use modulo operator and thus is not safe since
* it may deal with numbers that are bigger than Number.MAX_SAFE_INTEGER. This
* function is mentioned here for simplicity and LEARNING reasons.
*
* @param {number} prevHash
* @param {string} prevWord
* @param {string} newWord
* @return {number}
*/
roll(prevHash, prevWord, newWord) {
let hash = prevHash;
const prevValue = prevWord.charCodeAt(0);
const newValue = newWord.charCodeAt(newWord.length - 1);
hash -= prevValue;
hash /= this.base;
hash += newValue * (this.base ** (newWord.length - 1));
return hash;
}
}

View File

@ -0,0 +1,52 @@
import SimplePolynomialHash from '../SimplePolynomialHash';
describe('PolynomialHash', () => {
it('should calculate new hash based on previous one', () => {
const bases = [3, 5];
const frameSizes = [5, 10];
const text = 'Lorem Ipsum is simply dummy text of the printing and '
+ 'typesetting industry. Lorem Ipsum has been the industry\'s standard '
+ 'galley of type and \u{ffff} scrambled it to make a type specimen book. It '
+ 'electronic 耀 typesetting, remaining essentially unchanged. It was '
+ 'popularised in the 1960s with the release of Letraset sheets '
+ 'publishing software like Aldus 耀 PageMaker including versions of Lorem.';
// Check hashing for different prime base.
bases.forEach((base) => {
const polynomialHash = new SimplePolynomialHash(base);
// Check hashing for different word lengths.
frameSizes.forEach((frameSize) => {
let previousWord = text.substr(0, frameSize);
let previousHash = polynomialHash.hash(previousWord);
// Shift frame through the whole text.
for (let frameShift = 1; frameShift < (text.length - frameSize); frameShift += 1) {
const currentWord = text.substr(frameShift, frameSize);
const currentHash = polynomialHash.hash(currentWord);
const currentRollingHash = polynomialHash.roll(previousHash, previousWord, currentWord);
// Check that rolling hash is the same as directly calculated hash.
expect(currentRollingHash).toBe(currentHash);
previousWord = currentWord;
previousHash = currentHash;
}
});
});
});
it('should generate numeric hashed', () => {
const polynomialHash = new SimplePolynomialHash();
expect(polynomialHash.hash('Test')).toBe(604944);
expect(polynomialHash.hash('a')).toBe(97);
expect(polynomialHash.hash('b')).toBe(98);
expect(polynomialHash.hash('c')).toBe(99);
expect(polynomialHash.hash('d')).toBe(100);
expect(polynomialHash.hash('e')).toBe(101);
expect(polynomialHash.hash('ab')).toBe(1763);
expect(polynomialHash.hash('abc')).toBe(30374);
});
});