mirror of
https://github.moeyy.xyz/https://github.com/trekhleb/javascript-algorithms.git
synced 2024-12-26 23:21:18 +08:00
Refactor Rabin-Karp (#110)
* Simplify Rabin-Karp functionality * Created Rabin Fingerprinting module within util directory * Updated Rabin-Karp search to use rolling hash module Incorporate tests from @dubzzz
This commit is contained in:
parent
f32172e3db
commit
c4605ea13d
@ -1,24 +1,20 @@
|
|||||||
import { rabinKarp, hashWord, reHashWord } from '../rabinKarp';
|
import rabinKarp from '../rabinKarp';
|
||||||
|
|
||||||
describe('rabinKarp', () => {
|
describe('rabinKarp', () => {
|
||||||
it('should correctly calculates hash and re-hash', () => {
|
|
||||||
expect(hashWord('a')).toBe(97);
|
|
||||||
expect(hashWord('b')).toBe(98);
|
|
||||||
expect(hashWord('abc')).toBe(941094);
|
|
||||||
expect(hashWord('bcd')).toBe(950601);
|
|
||||||
expect(reHashWord(hashWord('abc'), 'abc', 'bcd')).toBe(950601);
|
|
||||||
expect(reHashWord(hashWord('abc'), 'abc', 'bcd')).toBe(hashWord('bcd'));
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should find substring in a string', () => {
|
it('should find substring in a string', () => {
|
||||||
expect(rabinKarp('', '')).toBe(0);
|
expect(rabinKarp('', '')).toBe(0);
|
||||||
expect(rabinKarp('a', '')).toBe(0);
|
expect(rabinKarp('a', '')).toBe(0);
|
||||||
expect(rabinKarp('a', 'a')).toBe(0);
|
expect(rabinKarp('a', 'a')).toBe(0);
|
||||||
|
expect(rabinKarp('ab', 'b')).toBe(1);
|
||||||
expect(rabinKarp('abcbcglx', 'abca')).toBe(-1);
|
expect(rabinKarp('abcbcglx', 'abca')).toBe(-1);
|
||||||
expect(rabinKarp('abcbcglx', 'bcgl')).toBe(3);
|
expect(rabinKarp('abcbcglx', 'bcgl')).toBe(3);
|
||||||
expect(rabinKarp('abcxabcdabxabcdabcdabcy', 'abcdabcy')).toBe(15);
|
expect(rabinKarp('abcxabcdabxabcdabcdabcy', 'abcdabcy')).toBe(15);
|
||||||
expect(rabinKarp('abcxabcdabxabcdabcdabcy', 'abcdabca')).toBe(-1);
|
expect(rabinKarp('abcxabcdabxabcdabcdabcy', 'abcdabca')).toBe(-1);
|
||||||
expect(rabinKarp('abcxabcdabxaabcdabcabcdabcdabcy', 'abcdabca')).toBe(12);
|
expect(rabinKarp('abcxabcdabxaabcdabcabcdabcdabcy', 'abcdabca')).toBe(12);
|
||||||
expect(rabinKarp('abcxabcdabxaabaabaaaabcdabcdabcy', 'aabaabaaa')).toBe(11);
|
expect(rabinKarp('abcxabcdabxaabaabaaaabcdabcdabcy', 'aabaabaaa')).toBe(11);
|
||||||
|
expect(rabinKarp('^ !/\'#\'pp', ' !/\'#\'pp')).toBe(1);
|
||||||
|
expect(rabinKarp('a\u{ffff}', '\u{ffff}')).toBe(1);
|
||||||
|
expect(rabinKarp('a\u{10000}', '\u{10000}')).toBe(1);
|
||||||
|
expect(rabinKarp('\u0000耀\u0000', '耀\u0000')).toBe(1);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -1,88 +1,33 @@
|
|||||||
/**
|
import RabinFingerprint from '../../../utils/hash/rolling/Rabin_Fingerprint';
|
||||||
* A prime number used to create
|
|
||||||
* the hash representation of a word
|
|
||||||
*
|
|
||||||
* Bigger the prime number,
|
|
||||||
* bigger the hash value
|
|
||||||
*/
|
|
||||||
const PRIME = 97;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Function that creates hash representation of the word.
|
|
||||||
*
|
|
||||||
* @param {string} word
|
|
||||||
* @return {number}
|
|
||||||
*/
|
|
||||||
export function hashWord(word) {
|
|
||||||
let hash = 0;
|
|
||||||
|
|
||||||
for (let charIndex = 0; charIndex < word.length; charIndex += 1) {
|
|
||||||
hash += word[charIndex].charCodeAt(0) * (PRIME ** charIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
return hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Function that creates hash representation of the word
|
|
||||||
* based on previous word (shifted by one character left) hash value.
|
|
||||||
*
|
|
||||||
* Recalculates the hash representation of a word so that it isn't
|
|
||||||
* necessary to traverse the whole word again
|
|
||||||
*
|
|
||||||
* @param {number} prevHash
|
|
||||||
* @param {string} prevWord
|
|
||||||
* @param {string} newWord
|
|
||||||
* @return {number}
|
|
||||||
*/
|
|
||||||
export function reHashWord(prevHash, prevWord, newWord) {
|
|
||||||
const newWordLastIndex = newWord.length - 1;
|
|
||||||
let newHash = prevHash - prevWord[0].charCodeAt(0);
|
|
||||||
newHash /= PRIME;
|
|
||||||
newHash += newWord[newWordLastIndex].charCodeAt(0) * (PRIME ** newWordLastIndex);
|
|
||||||
|
|
||||||
return newHash;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {string} text
|
* @param {string} text
|
||||||
* @param {string} word
|
* @param {string} word
|
||||||
* @return {number}
|
* @return {number}
|
||||||
*/
|
*/
|
||||||
export function rabinKarp(text, word) {
|
export default function rabinKarp(text, word) {
|
||||||
// Calculate word hash that we will use for comparison with other substring hashes.
|
const toNum = function toNum(character) {
|
||||||
const wordHash = hashWord(word);
|
const surrogate = character.codePointAt(1);
|
||||||
|
return ((surrogate === undefined) ? 0 : surrogate) + (character.codePointAt(0) * (2 ** 16));
|
||||||
|
};
|
||||||
|
const arrEq = (a1, a2) => ((a1.length === a2.length) && a1.every((val, idx) => val === a2[idx]));
|
||||||
|
|
||||||
let prevSegment = null;
|
const wordArr = [...word].map(toNum);
|
||||||
let currentSegmentHash = null;
|
const textArr = [...text].map(toNum);
|
||||||
|
|
||||||
// Go through all substring of the text that may match
|
// The prime generation function could depend on the inputs for collision guarantees.
|
||||||
for (let charIndex = 0; charIndex <= text.length - word.length; charIndex += 1) {
|
const hasher = new RabinFingerprint(() => 229);
|
||||||
const currentSegment = text.substring(charIndex, charIndex + word.length);
|
const cmpVal = hasher.init(wordArr);
|
||||||
|
|
||||||
// Calculate the hash of current substring.
|
let currHash = hasher.init(textArr.slice(0, wordArr.length));
|
||||||
if (currentSegmentHash === null) {
|
if ((currHash === cmpVal) && arrEq(wordArr, textArr.slice(0, wordArr.length))) {
|
||||||
currentSegmentHash = hashWord(currentSegment);
|
return 0;
|
||||||
} else {
|
|
||||||
currentSegmentHash = reHashWord(currentSegmentHash, prevSegment, currentSegment);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prevSegment = currentSegment;
|
for (let i = 0; i < (textArr.length - wordArr.length); i += 1) {
|
||||||
|
currHash = hasher.roll(textArr[i], textArr[i + wordArr.length]);
|
||||||
// Compare the hash of current substring and seeking string.
|
if ((currHash === cmpVal) && arrEq(wordArr, textArr.slice(i + 1, i + wordArr.length + 1))) {
|
||||||
if (wordHash === currentSegmentHash) {
|
return i + 1;
|
||||||
// In case if hashes match let's check substring char by char.
|
|
||||||
let numberOfMatches = 0;
|
|
||||||
|
|
||||||
for (let deepCharIndex = 0; deepCharIndex < word.length; deepCharIndex += 1) {
|
|
||||||
if (word[deepCharIndex] === text[charIndex + deepCharIndex]) {
|
|
||||||
numberOfMatches += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numberOfMatches === word.length) {
|
|
||||||
return charIndex;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
51
src/utils/hash/rolling/Rabin_Fingerprint.js
Normal file
51
src/utils/hash/rolling/Rabin_Fingerprint.js
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
/**
|
||||||
|
* Generates fingerprints using Rabin scheme with x = 2 (for potential compiler optimizations).
|
||||||
|
* Guaranteed not to over or underflow if function assumptions are met.
|
||||||
|
*/
|
||||||
|
export default class RabinFingerprint {
|
||||||
|
/**
|
||||||
|
* @param { function() : number } [primeGenerator]
|
||||||
|
* @assumes Output from any function call is prime less than Number.MAX_SAFE_INTEGER / 2.
|
||||||
|
*/
|
||||||
|
constructor(primeGenerator) {
|
||||||
|
this.prime = primeGenerator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param { array[number] } [values]
|
||||||
|
* @returns {number} - The hash value after digesting input.
|
||||||
|
* @assumes All array elements are non-negative.
|
||||||
|
* @note First element in array is considered to be oldest value.
|
||||||
|
*/
|
||||||
|
init(values) {
|
||||||
|
this.val = 0;
|
||||||
|
this.len = values.length;
|
||||||
|
|
||||||
|
for (let i = 0; i < values.length; i += 1) {
|
||||||
|
this.val = (((this.val * 2) % this.prime) + (values[i] % this.prime)) % this.prime;
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @param {number} [oldValue]
|
||||||
|
* @param {number} [newValue]
|
||||||
|
* @returns {number} - The hash value after removing the oldest value & inserting the newest.
|
||||||
|
* @assumes Instance has already been initialized.
|
||||||
|
* @assumes oldValue is the oldest value still processed by the hash.
|
||||||
|
* @assumes newValue is non-negative.
|
||||||
|
*/
|
||||||
|
roll(oldValue, newValue) {
|
||||||
|
let oldVal = oldValue % this.prime;
|
||||||
|
for (let i = 1; i < this.len; i += 1) {
|
||||||
|
oldVal = (oldVal * 2) % this.prime;
|
||||||
|
}
|
||||||
|
this.val = (this.val + this.prime - (oldVal % this.prime)) % this.prime;
|
||||||
|
|
||||||
|
const newVal = newValue % this.prime;
|
||||||
|
this.val = (((this.val * 2) % this.prime) + (newVal % this.prime)) % this.prime;
|
||||||
|
|
||||||
|
return this.val;
|
||||||
|
}
|
||||||
|
}
|
59
src/utils/hash/rolling/__test__/Rabin_Fingerprint.test.js
Normal file
59
src/utils/hash/rolling/__test__/Rabin_Fingerprint.test.js
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
import RabinFingerprint from '../Rabin_Fingerprint';
|
||||||
|
|
||||||
|
describe('Rabin fingerprint Hash Family', () => {
|
||||||
|
it('should hash deterministically', () => {
|
||||||
|
const primeVals = [3, 5, 19, 53, 97, 401, 7039, 193939];
|
||||||
|
for (let primeIdx = 0; primeIdx < primeVals.length; primeIdx += 1) {
|
||||||
|
const primeVal = primeVals[primeIdx];
|
||||||
|
const hasher = new RabinFingerprint(() => primeVal);
|
||||||
|
|
||||||
|
// Test basic values
|
||||||
|
expect(hasher.init([])).toEqual(0);
|
||||||
|
expect(hasher.init([1])).toEqual(1);
|
||||||
|
|
||||||
|
// Test overflow
|
||||||
|
const largeVal = Number.MAX_SAFE_INTEGER;
|
||||||
|
expect(hasher.init([primeVal])).toEqual(0);
|
||||||
|
expect(hasher.init([largeVal])).toEqual(largeVal % primeVal);
|
||||||
|
|
||||||
|
const numLargeVal = 2; // 2 ^ numLargeVal fits in javascript number
|
||||||
|
const largeValues = new Array(numLargeVal).fill(largeVal);
|
||||||
|
|
||||||
|
const expVal = ((largeVal % primeVal) * ((2 ** numLargeVal) - 1)) % primeVal;
|
||||||
|
expect(hasher.init(largeValues)).toEqual(expVal);
|
||||||
|
|
||||||
|
// Test using Fermat's little theorem
|
||||||
|
const fermatValues = new Array(primeVal).fill(primeVal);
|
||||||
|
const numFermatTests = 100;
|
||||||
|
for (let i = 0; i < numFermatTests; i += 1) {
|
||||||
|
const randomValue = Math.floor(Math.random() * largeVal);
|
||||||
|
fermatValues[0] = randomValue;
|
||||||
|
expect(hasher.init(fermatValues)).toEqual(randomValue % primeVal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should roll appropriately', () => {
|
||||||
|
const primeVals = [3, 5, 19, 53, 97, 401, 7039, 193939];
|
||||||
|
|
||||||
|
for (let primeIdx = 0; primeIdx < primeVals.length; primeIdx += 1) {
|
||||||
|
const primeVal = primeVals[primeIdx];
|
||||||
|
const hasher = new RabinFingerprint(() => primeVal);
|
||||||
|
|
||||||
|
// Test basic values
|
||||||
|
const largeVal = Number.MAX_SAFE_INTEGER;
|
||||||
|
expect(hasher.init([0])).toEqual(0);
|
||||||
|
expect(hasher.roll(0, 1)).toEqual(1);
|
||||||
|
expect(hasher.roll(1, primeVal)).toEqual(0);
|
||||||
|
expect(hasher.roll(primeVal, largeVal)).toEqual(largeVal % primeVal);
|
||||||
|
|
||||||
|
const numRollTest = 100;
|
||||||
|
let previousValue = largeVal;
|
||||||
|
for (let i = 0; i < numRollTest; i += 1) {
|
||||||
|
const randomVal = Math.floor(Math.random() * largeVal);
|
||||||
|
expect(hasher.roll(previousValue, randomVal)).toEqual(randomVal % primeVal);
|
||||||
|
previousValue = randomVal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
Loading…
Reference in New Issue
Block a user