mirror of
https://github.moeyy.xyz/https://github.com/trekhleb/javascript-algorithms.git
synced 2024-12-25 22:46:20 +08:00
Added Polynomial Hash Function.
This commit is contained in:
parent
b3a503aa6d
commit
98a44ea832
@ -123,6 +123,8 @@ a set of rules that precisely define a sequence of operations.
|
||||
* `A` [Hamiltonian Cycle](src/algorithms/graph/hamiltonian-cycle) - Visit every vertex exactly once
|
||||
* `A` [Strongly Connected Components](src/algorithms/graph/strongly-connected-components) - Kosaraju's algorithm
|
||||
* `A` [Travelling Salesman Problem](src/algorithms/graph/travelling-salesman) - shortest possible route that visits each city and returns to the origin city
|
||||
* **Cryptography**
|
||||
* `B` [Polynomial Hash](src/algorithms/cryptography/polynomial-hash) - rolling hash function based on polynomial
|
||||
* **Uncategorized**
|
||||
* `B` [Tower of Hanoi](src/algorithms/uncategorized/hanoi-tower)
|
||||
* `B` [Square Matrix Rotation](src/algorithms/uncategorized/square-matrix-rotation) - in-place algorithm
|
||||
|
@ -0,0 +1,53 @@
|
||||
const DEFAULT_PRIME = 37;
|
||||
|
||||
export default class PolynomialHash {
|
||||
/**
|
||||
* @param {number} [prime] - A prime number used to create the hash representation of a word.
|
||||
*/
|
||||
constructor(prime = DEFAULT_PRIME) {
|
||||
this.prime = prime;
|
||||
this.primeModulus = 101;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that creates hash representation of the word.
|
||||
*
|
||||
* Time complexity: O(word.length).
|
||||
*
|
||||
* @param {string} word - String that needs to be hashed.
|
||||
* @return {number}
|
||||
*/
|
||||
hash(word) {
|
||||
let hash = 0;
|
||||
|
||||
for (let charIndex = 0; charIndex < word.length; charIndex += 1) {
|
||||
hash += word.charCodeAt(charIndex) * (this.prime ** charIndex);
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that creates hash representation of the word
|
||||
* based on previous word (shifted by one character left) hash value.
|
||||
*
|
||||
* Recalculates the hash representation of a word so that it isn't
|
||||
* necessary to traverse the whole word again.
|
||||
*
|
||||
* Time complexity: O(1).
|
||||
*
|
||||
* @param {number} prevHash
|
||||
* @param {string} prevWord
|
||||
* @param {string} newWord
|
||||
* @return {number}
|
||||
*/
|
||||
roll(prevHash, prevWord, newWord) {
|
||||
const newWordLastIndex = newWord.length - 1;
|
||||
|
||||
let hash = prevHash - prevWord.charCodeAt(0);
|
||||
hash /= this.prime;
|
||||
hash += newWord.charCodeAt(newWordLastIndex) * (this.prime ** newWordLastIndex);
|
||||
|
||||
return hash;
|
||||
}
|
||||
}
|
59
src/algorithms/cryptography/polynomial-hash/README.md
Normal file
59
src/algorithms/cryptography/polynomial-hash/README.md
Normal file
@ -0,0 +1,59 @@
|
||||
# Polynomial Rolling Hash
|
||||
|
||||
## Hash Function
|
||||
|
||||
**Hash functions** are used to map large data sets of elements of an arbitrary
|
||||
length (*the keys*) to smaller data sets of elements of a fixed length
|
||||
(*the fingerprints*).
|
||||
|
||||
The basic application of hashing is efficient testing of equality of keys by
|
||||
comparing their fingerprints.
|
||||
|
||||
A *collision* happens when two different keys have the same fingerprint. The way
|
||||
in which collisions are handled is crucial in most applications of hashing.
|
||||
Hashing is particularly useful in construction of efficient practical algorithms.
|
||||
|
||||
## Rolling Hash
|
||||
|
||||
A **rolling hash** (also known as recursive hashing or rolling checksum) is a hash
|
||||
function where the input is hashed in a window that moves through the input.
|
||||
|
||||
A few hash functions allow a rolling hash to be computed very quickly — the new
|
||||
hash value is rapidly calculated given only the following data:
|
||||
|
||||
- old hash value,
|
||||
- the old value removed from the window,
|
||||
- and the new value added to the window.
|
||||
|
||||
## Polynomial String Hashing
|
||||
|
||||
An ideal hash function for strings should obviously depend both on the *multiset* of
|
||||
the symbols present in the key and on the *order* of the symbols. The most common
|
||||
family of such hash functions treats the symbols of a string as coefficients of
|
||||
a *polynomial* with an integer variable `p` and computes its value modulo an
|
||||
integer constant `M`:
|
||||
|
||||
The *Rabin–Karp string search algorithm* is often explained using a very simple
|
||||
rolling hash function that only uses multiplications and
|
||||
additions - **polynomial rolling hash**:
|
||||
|
||||
> H(s<sub>0</sub>, s<sub>1</sub>, ..., s<sub>k</sub>) = (s<sub>0</sub> * p<sup>0</sup> + s<sub>1</sub> * p<sup>1</sup> + ... + s<sub>k</sub> * p<sup>k</sup>) mod M
|
||||
|
||||
where `p` is a constant, and *(s<sub>1</sub>, ... , s<sub>k</sub>)* are the input
|
||||
characters.
|
||||
|
||||
A careful choice of the parameters `M`, `p` is important to obtain “good”
|
||||
properties of the hash function, i.e., low collision rate.
|
||||
|
||||
In order to avoid manipulating huge `H` values, all math is done modulo `M`.
|
||||
|
||||
Removing and adding characters simply involves adding or subtracting the first or
|
||||
last term. Shifting all characters by one position to the right requires multiplying
|
||||
the entire sum `H` by `a`. Shifting all characters by one position to the left
|
||||
requires dividing the entire sum `H` by `a`.
|
||||
|
||||
## References
|
||||
|
||||
- [Where to Use Polynomial String Hashing](https://www.mii.lt/olympiads_in_informatics/pdf/INFOL119.pdf)
|
||||
- [Hash Function on Wikipedia](https://en.wikipedia.org/wiki/Hash_function)
|
||||
- [Rolling Hash on Wikipedia](https://en.wikipedia.org/wiki/Rolling_hash)
|
@ -0,0 +1,103 @@
|
||||
import PolynomialHash from '../PolynomialHash';
|
||||
|
||||
describe('PolynomialHash', () => {
|
||||
it('should calculate new hash based on previous one', () => {
|
||||
// const primes = [3, 79, 101, 3251, 13229, 122743, 3583213];
|
||||
// const frameSizes = [5, 20];
|
||||
|
||||
const primes = [3];
|
||||
const frameSizes = [20];
|
||||
|
||||
const text = 'Lorem Ipsum is simply dummy text of the printing and '
|
||||
+ 'typesetting industry. Lorem Ipsum has been the industry\'s standard '
|
||||
+ 'galley of type and \u{ffff} scrambled it to make a type specimen book. It '
|
||||
+ 'electronic 耀 typesetting, remaining essentially unchanged. It was '
|
||||
+ 'popularised in the \u{20005} \u{20000}1960s with the release of Letraset sheets '
|
||||
+ 'publishing software like Aldus PageMaker 耀 including versions of Lorem.';
|
||||
|
||||
// Check hashing for different prime base.
|
||||
primes.forEach((prime) => {
|
||||
const polynomialHash = new PolynomialHash(prime);
|
||||
|
||||
// Check hashing for different word lengths.
|
||||
frameSizes.forEach((frameSize) => {
|
||||
let previousWord = text.substr(0, frameSize);
|
||||
let previousHash = polynomialHash.hash(previousWord);
|
||||
|
||||
// Shift frame through the whole text.
|
||||
for (let frameShift = 1; frameShift < (text.length - frameSize); frameShift += 1) {
|
||||
const currentWord = text.substr(frameShift, frameSize);
|
||||
const currentHash = polynomialHash.hash(currentWord);
|
||||
const currentRollingHash = polynomialHash.roll(previousHash, previousWord, currentWord);
|
||||
|
||||
// Check that rolling hash is the same as directly calculated hash.
|
||||
expect(currentRollingHash).toBe(currentHash);
|
||||
|
||||
previousWord = currentWord;
|
||||
previousHash = currentHash;
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// it('should calculate new hash based on previous one', () => {
|
||||
// const polynomialHash = new PolynomialHash();
|
||||
//
|
||||
// const wordLength = 3;
|
||||
// const string = 'Hello World!';
|
||||
//
|
||||
// const word1 = string.substr(0, wordLength);
|
||||
// const word2 = string.substr(1, wordLength);
|
||||
// const word3 = string.substr(2, wordLength);
|
||||
// const word4 = string.substr(3, wordLength);
|
||||
//
|
||||
// const directHash1 = polynomialHash.hash(word1);
|
||||
// const directHash2 = polynomialHash.hash(word2);
|
||||
// const directHash3 = polynomialHash.hash(word3);
|
||||
// const directHash4 = polynomialHash.hash(word4);
|
||||
//
|
||||
// const rollingHash2 = polynomialHash.roll(directHash1, word1, word2);
|
||||
// const rollingHash3 = polynomialHash.roll(directHash2, word2, word3);
|
||||
// const rollingHash4 = polynomialHash.roll(directHash3, word3, word4);
|
||||
//
|
||||
// expect(directHash1).toBe(151661);
|
||||
// expect(directHash2).toBe(151949);
|
||||
// expect(directHash3).toBe(156063);
|
||||
// expect(directHash4).toBe(48023);
|
||||
//
|
||||
// expect(rollingHash2).toBe(directHash2);
|
||||
// expect(rollingHash3).toBe(directHash3);
|
||||
// expect(rollingHash4).toBe(directHash4);
|
||||
// });
|
||||
//
|
||||
// it('should calculate new hash based on previous one with 3 as a primeModulus', () => {
|
||||
// const PRIME = 3;
|
||||
// const polynomialHash = new PolynomialHash(PRIME);
|
||||
//
|
||||
// const wordLength = 3;
|
||||
// const string = 'Hello World!';
|
||||
//
|
||||
// const word1 = string.substr(0, wordLength);
|
||||
// const word2 = string.substr(1, wordLength);
|
||||
// const word3 = string.substr(2, wordLength);
|
||||
// const word4 = string.substr(3, wordLength);
|
||||
//
|
||||
// const directHash1 = polynomialHash.hash(word1);
|
||||
// const directHash2 = polynomialHash.hash(word2);
|
||||
// const directHash3 = polynomialHash.hash(word3);
|
||||
// const directHash4 = polynomialHash.hash(word4);
|
||||
//
|
||||
// const rollingHash2 = polynomialHash.roll(directHash1, word1, word2);
|
||||
// const rollingHash3 = polynomialHash.roll(directHash2, word2, word3);
|
||||
// const rollingHash4 = polynomialHash.roll(directHash3, word3, word4);
|
||||
//
|
||||
// expect(directHash1).toBe(1347);
|
||||
// expect(directHash2).toBe(1397);
|
||||
// expect(directHash3).toBe(1431);
|
||||
// expect(directHash4).toBe(729);
|
||||
//
|
||||
// expect(rollingHash2).toBe(directHash2);
|
||||
// expect(rollingHash3).toBe(directHash3);
|
||||
// expect(rollingHash4).toBe(directHash4);
|
||||
// });
|
||||
});
|
Loading…
Reference in New Issue
Block a user