Added Polynomial Hash Function.

This commit is contained in:
Oleksii Trekhleb 2018-08-09 07:24:31 +03:00
parent b3a503aa6d
commit 98a44ea832
4 changed files with 217 additions and 0 deletions

View File

@ -123,6 +123,8 @@ a set of rules that precisely define a sequence of operations.
* `A` [Hamiltonian Cycle](src/algorithms/graph/hamiltonian-cycle) - Visit every vertex exactly once * `A` [Hamiltonian Cycle](src/algorithms/graph/hamiltonian-cycle) - Visit every vertex exactly once
* `A` [Strongly Connected Components](src/algorithms/graph/strongly-connected-components) - Kosaraju's algorithm * `A` [Strongly Connected Components](src/algorithms/graph/strongly-connected-components) - Kosaraju's algorithm
* `A` [Travelling Salesman Problem](src/algorithms/graph/travelling-salesman) - shortest possible route that visits each city and returns to the origin city * `A` [Travelling Salesman Problem](src/algorithms/graph/travelling-salesman) - shortest possible route that visits each city and returns to the origin city
* **Cryptography**
* `B` [Polynomial Hash](src/algorithms/cryptography/polynomial-hash) - rolling hash function based on polynomial
* **Uncategorized** * **Uncategorized**
* `B` [Tower of Hanoi](src/algorithms/uncategorized/hanoi-tower) * `B` [Tower of Hanoi](src/algorithms/uncategorized/hanoi-tower)
* `B` [Square Matrix Rotation](src/algorithms/uncategorized/square-matrix-rotation) - in-place algorithm * `B` [Square Matrix Rotation](src/algorithms/uncategorized/square-matrix-rotation) - in-place algorithm

View File

@ -0,0 +1,53 @@
const DEFAULT_PRIME = 37;
export default class PolynomialHash {
/**
* @param {number} [prime] - A prime number used to create the hash representation of a word.
*/
constructor(prime = DEFAULT_PRIME) {
this.prime = prime;
this.primeModulus = 101;
}
/**
* Function that creates hash representation of the word.
*
* Time complexity: O(word.length).
*
* @param {string} word - String that needs to be hashed.
* @return {number}
*/
hash(word) {
let hash = 0;
for (let charIndex = 0; charIndex < word.length; charIndex += 1) {
hash += word.charCodeAt(charIndex) * (this.prime ** charIndex);
}
return hash;
}
/**
* Function that creates hash representation of the word
* based on previous word (shifted by one character left) hash value.
*
* Recalculates the hash representation of a word so that it isn't
* necessary to traverse the whole word again.
*
* Time complexity: O(1).
*
* @param {number} prevHash
* @param {string} prevWord
* @param {string} newWord
* @return {number}
*/
roll(prevHash, prevWord, newWord) {
const newWordLastIndex = newWord.length - 1;
let hash = prevHash - prevWord.charCodeAt(0);
hash /= this.prime;
hash += newWord.charCodeAt(newWordLastIndex) * (this.prime ** newWordLastIndex);
return hash;
}
}

View File

@ -0,0 +1,59 @@
# Polynomial Rolling Hash
## Hash Function
**Hash functions** are used to map large data sets of elements of an arbitrary
length (*the keys*) to smaller data sets of elements of a fixed length
(*the fingerprints*).
The basic application of hashing is efficient testing of equality of keys by
comparing their fingerprints.
A *collision* happens when two different keys have the same fingerprint. The way
in which collisions are handled is crucial in most applications of hashing.
Hashing is particularly useful in construction of efficient practical algorithms.
## Rolling Hash
A **rolling hash** (also known as recursive hashing or rolling checksum) is a hash
function where the input is hashed in a window that moves through the input.
A few hash functions allow a rolling hash to be computed very quickly — the new
hash value is rapidly calculated given only the following data:
- old hash value,
- the old value removed from the window,
- and the new value added to the window.
## Polynomial String Hashing
An ideal hash function for strings should obviously depend both on the *multiset* of
the symbols present in the key and on the *order* of the symbols. The most common
family of such hash functions treats the symbols of a string as coefficients of
a *polynomial* with an integer variable `p` and computes its value modulo an
integer constant `M`:
The *RabinKarp string search algorithm* is often explained using a very simple
rolling hash function that only uses multiplications and
additions - **polynomial rolling hash**:
> H(s<sub>0</sub>, s<sub>1</sub>, ..., s<sub>k</sub>) = (s<sub>0</sub> * p<sup>0</sup> + s<sub>1</sub> * p<sup>1</sup> + ... + s<sub>k</sub> * p<sup>k</sup>) mod M
where `p` is a constant, and *(s<sub>1</sub>, ... , s<sub>k</sub>)* are the input
characters.
A careful choice of the parameters `M`, `p` is important to obtain “good”
properties of the hash function, i.e., low collision rate.
In order to avoid manipulating huge `H` values, all math is done modulo `M`.
Removing and adding characters simply involves adding or subtracting the first or
last term. Shifting all characters by one position to the right requires multiplying
the entire sum `H` by `a`. Shifting all characters by one position to the left
requires dividing the entire sum `H` by `a`.
## References
- [Where to Use Polynomial String Hashing](https://www.mii.lt/olympiads_in_informatics/pdf/INFOL119.pdf)
- [Hash Function on Wikipedia](https://en.wikipedia.org/wiki/Hash_function)
- [Rolling Hash on Wikipedia](https://en.wikipedia.org/wiki/Rolling_hash)

View File

@ -0,0 +1,103 @@
import PolynomialHash from '../PolynomialHash';
describe('PolynomialHash', () => {
it('should calculate new hash based on previous one', () => {
// const primes = [3, 79, 101, 3251, 13229, 122743, 3583213];
// const frameSizes = [5, 20];
const primes = [3];
const frameSizes = [20];
const text = 'Lorem Ipsum is simply dummy text of the printing and '
+ 'typesetting industry. Lorem Ipsum has been the industry\'s standard '
+ 'galley of type and \u{ffff} scrambled it to make a type specimen book. It '
+ 'electronic 耀 typesetting, remaining essentially unchanged. It was '
+ 'popularised in the \u{20005} \u{20000}1960s with the release of Letraset sheets '
+ 'publishing software like Aldus PageMaker 耀 including versions of Lorem.';
// Check hashing for different prime base.
primes.forEach((prime) => {
const polynomialHash = new PolynomialHash(prime);
// Check hashing for different word lengths.
frameSizes.forEach((frameSize) => {
let previousWord = text.substr(0, frameSize);
let previousHash = polynomialHash.hash(previousWord);
// Shift frame through the whole text.
for (let frameShift = 1; frameShift < (text.length - frameSize); frameShift += 1) {
const currentWord = text.substr(frameShift, frameSize);
const currentHash = polynomialHash.hash(currentWord);
const currentRollingHash = polynomialHash.roll(previousHash, previousWord, currentWord);
// Check that rolling hash is the same as directly calculated hash.
expect(currentRollingHash).toBe(currentHash);
previousWord = currentWord;
previousHash = currentHash;
}
});
});
});
// it('should calculate new hash based on previous one', () => {
// const polynomialHash = new PolynomialHash();
//
// const wordLength = 3;
// const string = 'Hello World!';
//
// const word1 = string.substr(0, wordLength);
// const word2 = string.substr(1, wordLength);
// const word3 = string.substr(2, wordLength);
// const word4 = string.substr(3, wordLength);
//
// const directHash1 = polynomialHash.hash(word1);
// const directHash2 = polynomialHash.hash(word2);
// const directHash3 = polynomialHash.hash(word3);
// const directHash4 = polynomialHash.hash(word4);
//
// const rollingHash2 = polynomialHash.roll(directHash1, word1, word2);
// const rollingHash3 = polynomialHash.roll(directHash2, word2, word3);
// const rollingHash4 = polynomialHash.roll(directHash3, word3, word4);
//
// expect(directHash1).toBe(151661);
// expect(directHash2).toBe(151949);
// expect(directHash3).toBe(156063);
// expect(directHash4).toBe(48023);
//
// expect(rollingHash2).toBe(directHash2);
// expect(rollingHash3).toBe(directHash3);
// expect(rollingHash4).toBe(directHash4);
// });
//
// it('should calculate new hash based on previous one with 3 as a primeModulus', () => {
// const PRIME = 3;
// const polynomialHash = new PolynomialHash(PRIME);
//
// const wordLength = 3;
// const string = 'Hello World!';
//
// const word1 = string.substr(0, wordLength);
// const word2 = string.substr(1, wordLength);
// const word3 = string.substr(2, wordLength);
// const word4 = string.substr(3, wordLength);
//
// const directHash1 = polynomialHash.hash(word1);
// const directHash2 = polynomialHash.hash(word2);
// const directHash3 = polynomialHash.hash(word3);
// const directHash4 = polynomialHash.hash(word4);
//
// const rollingHash2 = polynomialHash.roll(directHash1, word1, word2);
// const rollingHash3 = polynomialHash.roll(directHash2, word2, word3);
// const rollingHash4 = polynomialHash.roll(directHash3, word3, word4);
//
// expect(directHash1).toBe(1347);
// expect(directHash2).toBe(1397);
// expect(directHash3).toBe(1431);
// expect(directHash4).toBe(729);
//
// expect(rollingHash2).toBe(directHash2);
// expect(rollingHash3).toBe(directHash3);
// expect(rollingHash4).toBe(directHash4);
// });
});