From 4afc8c759a4e9cab3344033a088a53bdd2a0effa Mon Sep 17 00:00:00 2001 From: Oleksii Trekhleb Date: Tue, 24 Apr 2018 17:53:29 +0300 Subject: [PATCH] Add knuth. --- README.md | 4 +- .../string/knuth-morris-pratt/README.md | 19 +++++++ .../__test__/knuthMorrisPratt.test.js | 12 ++++ .../knuth-morris-pratt/knuthMorrisPratt.js | 55 +++++++++++++++++++ 4 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 src/algorithms/string/knuth-morris-pratt/README.md create mode 100644 src/algorithms/string/knuth-morris-pratt/__test__/knuthMorrisPratt.test.js create mode 100644 src/algorithms/string/knuth-morris-pratt/knuthMorrisPratt.js diff --git a/README.md b/README.md index b53e85b9..40860e0f 100644 --- a/README.md +++ b/README.md @@ -41,10 +41,10 @@ * **String** * [Levenshtein Distance](https://github.com/trekhleb/javascript-algorithms/tree/master/src/algorithms/string/levenshtein-distance) - minimum edit distance between two sequences * [Hamming Distance](https://github.com/trekhleb/javascript-algorithms/tree/master/src/algorithms/string/hamming-distance) - number of positions at which the symbols are different - * Knuth Morris Pratt + * [Knuth–Morris–Pratt algorithm](https://github.com/trekhleb/javascript-algorithms/tree/master/src/algorithms/string/knuth-morris-pratt) - substring search + * Rabin Karp * Longest common subsequence * longest common substring - * Rabin Karp * **Search** * [Binary Search](https://github.com/trekhleb/javascript-algorithms/tree/master/src/algorithms/search/binary-search) * **Sorting** diff --git a/src/algorithms/string/knuth-morris-pratt/README.md b/src/algorithms/string/knuth-morris-pratt/README.md new file mode 100644 index 00000000..8f17c9e9 --- /dev/null +++ b/src/algorithms/string/knuth-morris-pratt/README.md @@ -0,0 +1,19 @@ +# Knuth–Morris–Pratt Algorithm + +The Knuth–Morris–Pratt string searching algorithm (or +KMP algorithm) searches for occurrences of a "word" `W` +within a main "text string" `T` by employing the +observation that when a mismatch occurs, the word itself +embodies sufficient information to determine where the +next match could begin, thus bypassing re-examination +of previously matched characters. + +## Complexity + +- **Time:** `O(|W| + |T|)` (much faster comparing to trivial `O(|W| * |T|)`) +- **Space:** `O(|W|)` + +## References + +- [Wikipedia](https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm) +- [YouTube](https://www.youtube.com/watch?v=GTJr8OvyEVQ) diff --git a/src/algorithms/string/knuth-morris-pratt/__test__/knuthMorrisPratt.test.js b/src/algorithms/string/knuth-morris-pratt/__test__/knuthMorrisPratt.test.js new file mode 100644 index 00000000..5b47e72a --- /dev/null +++ b/src/algorithms/string/knuth-morris-pratt/__test__/knuthMorrisPratt.test.js @@ -0,0 +1,12 @@ +import knuthMorrisPratt from '../knuthMorrisPratt'; + +describe('knuthMorrisPratt', () => { + it('should find word position in given text', () => { + expect(knuthMorrisPratt('abcbcglx', 'abca')).toBe(-1); + expect(knuthMorrisPratt('abcbcglx', 'bcgl')).toBe(3); + expect(knuthMorrisPratt('abcxabcdabxabcdabcdabcy', 'abcdabcy')).toBe(15); + expect(knuthMorrisPratt('abcxabcdabxabcdabcdabcy', 'abcdabca')).toBe(-1); + expect(knuthMorrisPratt('abcxabcdabxaabcdabcabcdabcdabcy', 'abcdabca')).toBe(12); + expect(knuthMorrisPratt('abcxabcdabxaabaabaaaabcdabcdabcy', 'aabaabaaa')).toBe(11); + }); +}); diff --git a/src/algorithms/string/knuth-morris-pratt/knuthMorrisPratt.js b/src/algorithms/string/knuth-morris-pratt/knuthMorrisPratt.js new file mode 100644 index 00000000..4c008e44 --- /dev/null +++ b/src/algorithms/string/knuth-morris-pratt/knuthMorrisPratt.js @@ -0,0 +1,55 @@ +/** + * @see https://www.youtube.com/watch?v=GTJr8OvyEVQ + * @param {string} word + * @return {number[]} + */ +function buildPatternTable(word) { + const patternTable = [0]; + let prefixIndex = 0; + let suffixIndex = 1; + + while (suffixIndex < word.length) { + if (word[prefixIndex] === word[suffixIndex]) { + patternTable[suffixIndex] = prefixIndex + 1; + suffixIndex += 1; + prefixIndex += 1; + } else if (prefixIndex === 0) { + patternTable[suffixIndex] = 0; + suffixIndex += 1; + } else { + prefixIndex = patternTable[prefixIndex - 1]; + } + } + + return patternTable; +} + +/** + * @param {string} text + * @param {string} word + * @return {number} + */ +export default function knuthMorrisPratt(text, word) { + let textIndex = 0; + let wordIndex = 0; + + const patternTable = buildPatternTable(word); + + while (textIndex < text.length) { + if (text[textIndex] === word[wordIndex]) { + // We've found a match. + if (wordIndex === word.length - 1) { + return (textIndex - word.length) + 1; + } + wordIndex += 1; + textIndex += 1; + } else if (wordIndex > 0) { + wordIndex = patternTable[wordIndex - 1]; + } else { + wordIndex = 0; + textIndex += 1; + } + } + + return -1; +}