diff --git a/README.md b/README.md index 2c12f227..099ea2a3 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,8 @@ a set of rules that precisely define a sequence of operations. * `A` [Hamiltonian Cycle](src/algorithms/graph/hamiltonian-cycle) - Visit every vertex exactly once * `A` [Strongly Connected Components](src/algorithms/graph/strongly-connected-components) - Kosaraju's algorithm * `A` [Travelling Salesman Problem](src/algorithms/graph/travelling-salesman) - shortest possible route that visits each city and returns to the origin city +* **Cryptography** + * `B` [Polynomial Hash](src/algorithms/cryptography/polynomial-hash) - rolling hash function based on polynomial * **Uncategorized** * `B` [Tower of Hanoi](src/algorithms/uncategorized/hanoi-tower) * `B` [Square Matrix Rotation](src/algorithms/uncategorized/square-matrix-rotation) - in-place algorithm diff --git a/src/algorithms/cryptography/polynomial-hash/PolynomialHash.js b/src/algorithms/cryptography/polynomial-hash/PolynomialHash.js new file mode 100644 index 00000000..896a3a1f --- /dev/null +++ b/src/algorithms/cryptography/polynomial-hash/PolynomialHash.js @@ -0,0 +1,53 @@ +const DEFAULT_PRIME = 37; + +export default class PolynomialHash { + /** + * @param {number} [prime] - A prime number used to create the hash representation of a word. + */ + constructor(prime = DEFAULT_PRIME) { + this.prime = prime; + this.primeModulus = 101; + } + + /** + * Function that creates hash representation of the word. + * + * Time complexity: O(word.length). + * + * @param {string} word - String that needs to be hashed. + * @return {number} + */ + hash(word) { + let hash = 0; + + for (let charIndex = 0; charIndex < word.length; charIndex += 1) { + hash += word.charCodeAt(charIndex) * (this.prime ** charIndex); + } + + return hash; + } + + /** + * Function that creates hash representation of the word + * based on previous word (shifted by one character left) hash value. + * + * Recalculates the hash representation of a word so that it isn't + * necessary to traverse the whole word again. + * + * Time complexity: O(1). + * + * @param {number} prevHash + * @param {string} prevWord + * @param {string} newWord + * @return {number} + */ + roll(prevHash, prevWord, newWord) { + const newWordLastIndex = newWord.length - 1; + + let hash = prevHash - prevWord.charCodeAt(0); + hash /= this.prime; + hash += newWord.charCodeAt(newWordLastIndex) * (this.prime ** newWordLastIndex); + + return hash; + } +} diff --git a/src/algorithms/cryptography/polynomial-hash/README.md b/src/algorithms/cryptography/polynomial-hash/README.md new file mode 100644 index 00000000..7d0e8d53 --- /dev/null +++ b/src/algorithms/cryptography/polynomial-hash/README.md @@ -0,0 +1,59 @@ +# Polynomial Rolling Hash + +## Hash Function + +**Hash functions** are used to map large data sets of elements of an arbitrary +length (*the keys*) to smaller data sets of elements of a fixed length +(*the fingerprints*). + +The basic application of hashing is efficient testing of equality of keys by +comparing their fingerprints. + +A *collision* happens when two different keys have the same fingerprint. The way +in which collisions are handled is crucial in most applications of hashing. +Hashing is particularly useful in construction of efficient practical algorithms. + +## Rolling Hash + +A **rolling hash** (also known as recursive hashing or rolling checksum) is a hash +function where the input is hashed in a window that moves through the input. + +A few hash functions allow a rolling hash to be computed very quickly — the new +hash value is rapidly calculated given only the following data: + +- old hash value, +- the old value removed from the window, +- and the new value added to the window. + +## Polynomial String Hashing + +An ideal hash function for strings should obviously depend both on the *multiset* of +the symbols present in the key and on the *order* of the symbols. The most common +family of such hash functions treats the symbols of a string as coefficients of +a *polynomial* with an integer variable `p` and computes its value modulo an +integer constant `M`: + +The *Rabin–Karp string search algorithm* is often explained using a very simple +rolling hash function that only uses multiplications and +additions - **polynomial rolling hash**: + +> H(s0, s1, ..., sk) = (s0 * p0 + s1 * p1 + ... + sk * pk) mod M + +where `p` is a constant, and *(s1, ... , sk)* are the input +characters. + +A careful choice of the parameters `M`, `p` is important to obtain “good” +properties of the hash function, i.e., low collision rate. + +In order to avoid manipulating huge `H` values, all math is done modulo `M`. + +Removing and adding characters simply involves adding or subtracting the first or +last term. Shifting all characters by one position to the right requires multiplying +the entire sum `H` by `a`. Shifting all characters by one position to the left +requires dividing the entire sum `H` by `a`. + +## References + +- [Where to Use Polynomial String Hashing](https://www.mii.lt/olympiads_in_informatics/pdf/INFOL119.pdf) +- [Hash Function on Wikipedia](https://en.wikipedia.org/wiki/Hash_function) +- [Rolling Hash on Wikipedia](https://en.wikipedia.org/wiki/Rolling_hash) diff --git a/src/algorithms/cryptography/polynomial-hash/__test__/PolynomialHash.test.js b/src/algorithms/cryptography/polynomial-hash/__test__/PolynomialHash.test.js new file mode 100644 index 00000000..0d56b6dc --- /dev/null +++ b/src/algorithms/cryptography/polynomial-hash/__test__/PolynomialHash.test.js @@ -0,0 +1,103 @@ +import PolynomialHash from '../PolynomialHash'; + +describe('PolynomialHash', () => { + it('should calculate new hash based on previous one', () => { + // const primes = [3, 79, 101, 3251, 13229, 122743, 3583213]; + // const frameSizes = [5, 20]; + + const primes = [3]; + const frameSizes = [20]; + + const text = 'Lorem Ipsum is simply dummy text of the printing and ' + + 'typesetting industry. Lorem Ipsum has been the industry\'s standard ' + + 'galley of type and \u{ffff} scrambled it to make a type specimen book. It ' + + 'electronic 耀 typesetting, remaining essentially unchanged. It was ' + + 'popularised in the \u{20005} \u{20000}1960s with the release of Letraset sheets ' + + 'publishing software like Aldus PageMaker 耀 including versions of Lorem.'; + + // Check hashing for different prime base. + primes.forEach((prime) => { + const polynomialHash = new PolynomialHash(prime); + + // Check hashing for different word lengths. + frameSizes.forEach((frameSize) => { + let previousWord = text.substr(0, frameSize); + let previousHash = polynomialHash.hash(previousWord); + + // Shift frame through the whole text. + for (let frameShift = 1; frameShift < (text.length - frameSize); frameShift += 1) { + const currentWord = text.substr(frameShift, frameSize); + const currentHash = polynomialHash.hash(currentWord); + const currentRollingHash = polynomialHash.roll(previousHash, previousWord, currentWord); + + // Check that rolling hash is the same as directly calculated hash. + expect(currentRollingHash).toBe(currentHash); + + previousWord = currentWord; + previousHash = currentHash; + } + }); + }); + }); + + // it('should calculate new hash based on previous one', () => { + // const polynomialHash = new PolynomialHash(); + // + // const wordLength = 3; + // const string = 'Hello World!'; + // + // const word1 = string.substr(0, wordLength); + // const word2 = string.substr(1, wordLength); + // const word3 = string.substr(2, wordLength); + // const word4 = string.substr(3, wordLength); + // + // const directHash1 = polynomialHash.hash(word1); + // const directHash2 = polynomialHash.hash(word2); + // const directHash3 = polynomialHash.hash(word3); + // const directHash4 = polynomialHash.hash(word4); + // + // const rollingHash2 = polynomialHash.roll(directHash1, word1, word2); + // const rollingHash3 = polynomialHash.roll(directHash2, word2, word3); + // const rollingHash4 = polynomialHash.roll(directHash3, word3, word4); + // + // expect(directHash1).toBe(151661); + // expect(directHash2).toBe(151949); + // expect(directHash3).toBe(156063); + // expect(directHash4).toBe(48023); + // + // expect(rollingHash2).toBe(directHash2); + // expect(rollingHash3).toBe(directHash3); + // expect(rollingHash4).toBe(directHash4); + // }); + // + // it('should calculate new hash based on previous one with 3 as a primeModulus', () => { + // const PRIME = 3; + // const polynomialHash = new PolynomialHash(PRIME); + // + // const wordLength = 3; + // const string = 'Hello World!'; + // + // const word1 = string.substr(0, wordLength); + // const word2 = string.substr(1, wordLength); + // const word3 = string.substr(2, wordLength); + // const word4 = string.substr(3, wordLength); + // + // const directHash1 = polynomialHash.hash(word1); + // const directHash2 = polynomialHash.hash(word2); + // const directHash3 = polynomialHash.hash(word3); + // const directHash4 = polynomialHash.hash(word4); + // + // const rollingHash2 = polynomialHash.roll(directHash1, word1, word2); + // const rollingHash3 = polynomialHash.roll(directHash2, word2, word3); + // const rollingHash4 = polynomialHash.roll(directHash3, word3, word4); + // + // expect(directHash1).toBe(1347); + // expect(directHash2).toBe(1397); + // expect(directHash3).toBe(1431); + // expect(directHash4).toBe(729); + // + // expect(rollingHash2).toBe(directHash2); + // expect(rollingHash3).toBe(directHash3); + // expect(rollingHash4).toBe(directHash4); + // }); +});