Add regular expression matching algorithm.

This commit is contained in:
Oleksii Trekhleb 2018-06-25 20:02:24 +03:00
parent c96bbdf00e
commit d69199e658
4 changed files with 244 additions and 0 deletions

View File

@ -76,6 +76,7 @@ a set of rules that precisely define a sequence of operations.
* `A` [Z Algorithm](src/algorithms/string/z-algorithm) - substring search (pattern matching)
* `A` [Rabin Karp Algorithm](src/algorithms/string/rabin-karp) - substring search
* `A` [Longest Common Substring](src/algorithms/string/longest-common-substring)
* `A` [Regular Expression Matching](src/algorithms/string/regular-expression-matching)
* **Searches**
* `B` [Linear Search](src/algorithms/search/linear-search)
* `B` [Binary Search](src/algorithms/search/binary-search)
@ -147,6 +148,7 @@ algorithm is an abstraction higher than a computer program.
* `A` [Integer Partition](src/algorithms/math/integer-partition)
* `A` [Maximum Subarray](src/algorithms/sets/maximum-subarray)
* `A` [Bellman-Ford Algorithm](src/algorithms/graph/bellman-ford) - finding shortest path to all graph vertices
* `A` [Regular Expression Matching](src/algorithms/string/regular-expression-matching)
* **Backtracking** - similarly to brute force, try to generate all possible solutions, but each time you generate next solution you test
if it satisfies all conditions, and only then continue generating subsequent solutions. Otherwise, backtrack, and go on a
different path of finding a solution. Normally the DFS traversal of state-space is being used.

View File

@ -0,0 +1,73 @@
# Regular Expression Matching
Given an input string `s` and a pattern `p`, implement regular
expression matching with support for `.` and `*`.
- `.` Matches any single character.
- `*` Matches zero or more of the preceding element.
The matching should cover the **entire** input string (not partial).
**Note**
- `s` could be empty and contains only lowercase letters `a-z`.
- `p` could be empty and contains only lowercase letters `a-z`, and characters like `.` or `*`.
## Examples
**Example #1**
Input:
```
s = 'aa'
p = 'a'
```
Output: `false`
Explanation: `a` does not match the entire string `aa`.
**Example #2**
Input:
```
s = 'aa'
p = 'a*'
```
Output: `true`
Explanation: `*` means zero or more of the preceding element, `a`.
Therefore, by repeating `a` once, it becomes `aa`.
**Example #3**
Input:
```
s = 'ab'
p = '.*'
```
Output: `true`
Explanation: `.*` means "zero or more (`*`) of any character (`.`)".
**Example #4**
Input:
```
s = 'aab'
p = 'c*a*b'
```
Output: `true`
Explanation: `c` can be repeated 0 times, `a` can be repeated
1 time. Therefore it matches `aab`.
## References
- [YouTube](https://www.youtube.com/watch?v=l3hda49XcDE&list=PLLXdhg_r2hKA7DPDsunoDZ-Z769jWn4R8&index=71&t=0s)
- [LeetCode](https://leetcode.com/problems/regular-expression-matching/description/)

View File

@ -0,0 +1,34 @@
import regularExpressionMatching from '../regularExpressionMatching';
describe('regularExpressionMatching', () => {
it('should match regular expressions in a string', () => {
expect(regularExpressionMatching('', '')).toBeTruthy();
expect(regularExpressionMatching('a', 'a')).toBeTruthy();
expect(regularExpressionMatching('aa', 'aa')).toBeTruthy();
expect(regularExpressionMatching('aab', 'aab')).toBeTruthy();
expect(regularExpressionMatching('aab', 'aa.')).toBeTruthy();
expect(regularExpressionMatching('aab', '.a.')).toBeTruthy();
expect(regularExpressionMatching('aab', '...')).toBeTruthy();
expect(regularExpressionMatching('a', 'a*')).toBeTruthy();
expect(regularExpressionMatching('aaa', 'a*')).toBeTruthy();
expect(regularExpressionMatching('aaab', 'a*b')).toBeTruthy();
expect(regularExpressionMatching('aaabb', 'a*b*')).toBeTruthy();
expect(regularExpressionMatching('aaabb', 'a*b*c*')).toBeTruthy();
expect(regularExpressionMatching('', 'a*')).toBeTruthy();
expect(regularExpressionMatching('xaabyc', 'xa*b.c')).toBeTruthy();
expect(regularExpressionMatching('aab', 'c*a*b*')).toBeTruthy();
expect(regularExpressionMatching('mississippi', 'mis*is*.p*.')).toBeTruthy();
expect(regularExpressionMatching('ab', '.*')).toBeTruthy();
expect(regularExpressionMatching('', 'a')).toBeFalsy();
expect(regularExpressionMatching('a', '')).toBeFalsy();
expect(regularExpressionMatching('aab', 'aa')).toBeFalsy();
expect(regularExpressionMatching('aab', 'baa')).toBeFalsy();
expect(regularExpressionMatching('aabc', '...')).toBeFalsy();
expect(regularExpressionMatching('aaabbdd', 'a*b*c*')).toBeFalsy();
expect(regularExpressionMatching('mississippi', 'mis*is*p*.')).toBeFalsy();
expect(regularExpressionMatching('ab', 'a*')).toBeFalsy();
expect(regularExpressionMatching('abba', 'a*b*.c')).toBeFalsy();
expect(regularExpressionMatching('abba', '.*c')).toBeFalsy();
});
});

View File

@ -0,0 +1,135 @@
const ZERO_OR_MORE_CHARS = '*';
const ANY_CHAR = '.';
/**
* Dynamic programming approach.
*
* @param {string} string
* @param {string} pattern
* @return {boolean}
*/
export default function regularExpressionMatching(string, pattern) {
/*
* Let's initiate dynamic programming matrix for this string and pattern.
* We will have pattern characters on top (as columns) and string characters
* will be placed to the left of the table (as rows).
*
* Example:
*
* a * b . b
* - - - - - -
* a - - - - - -
* a - - - - - -
* b - - - - - -
* y - - - - - -
* b - - - - - -
*/
const matchMatrix = Array(string.length + 1).fill(null).map(() => {
return Array(pattern.length + 1).fill(null);
});
// Let's fill the top-left cell with true. This would mean that empty
// string '' matches to empty pattern ''.
matchMatrix[0][0] = true;
// Let's fill the first row of the matrix with false. That would mean that
// empty string can't match any non-empty pattern.
//
// Example:
// string: ''
// pattern: 'a.z'
//
// The one exception here is patterns like a*b* that matches the empty string.
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) {
const patternIndex = columnIndex - 1;
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) {
matchMatrix[0][columnIndex] = matchMatrix[0][columnIndex - 2];
} else {
matchMatrix[0][columnIndex] = false;
}
}
// Let's fill the first column with false. That would mean that empty pattern
// can't match any non-empty string.
//
// Example:
// string: 'ab'
// pattern: ''
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) {
matchMatrix[rowIndex][0] = false;
}
// Not let's go through every letter of the pattern and every letter of
// the string and compare them one by one.
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) {
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) {
// Take into account that fact that matrix contain one extra column and row.
const stringIndex = rowIndex - 1;
const patternIndex = columnIndex - 1;
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) {
/*
* In case if current pattern character is special '*' character we have
* two options:
*
* 1. Since * char allows it previous char to not be presented in a string we
* need to check if string matches the pattern without '*' char and without the
* char that goes before '*'. That would mean to go two positions left on the
* same row.
*
* 2. Since * char allows it previous char to be presented in a string many times we
* need to check if char before * is the same as current string char. If they are the
* same that would mean that current string matches the current pattern in case if
* the string WITHOUT current char matches the same pattern. This would mean to go
* one position up in the same row.
*/
if (matchMatrix[rowIndex][columnIndex - 2] === true) {
matchMatrix[rowIndex][columnIndex] = true;
} else if (
(
pattern[patternIndex - 1] === string[stringIndex] ||
pattern[patternIndex - 1] === ANY_CHAR
) &&
matchMatrix[rowIndex - 1][columnIndex] === true
) {
matchMatrix[rowIndex][columnIndex] = true;
} else {
matchMatrix[rowIndex][columnIndex] = false;
}
} else if (
pattern[patternIndex] === string[stringIndex] ||
pattern[patternIndex] === ANY_CHAR
) {
/*
* In case if current pattern char is the same as current string char
* or it may be any character (in case if pattern contains '.' char)
* we need to check if there was a match for the pattern and for the
* string by WITHOUT current char. This would mean that we may copy
* left-top diagonal value.
*
* Example:
*
* a b
* a 1 -
* b - 1
*/
matchMatrix[rowIndex][columnIndex] = matchMatrix[rowIndex - 1][columnIndex - 1];
} else {
/*
* In case if pattern char and string char are different we may
* treat this case as "no-match".
*
* Example:
*
* a b
* a - -
* c - 0
*/
matchMatrix[rowIndex][columnIndex] = false;
}
}
}
return matchMatrix[string.length][pattern.length];
}