diff --git a/README.md b/README.md
index fddf28a4..143d6d0a 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,7 @@ the data.
* `A` [Fenwick Tree](src/data-structures/tree/fenwick-tree) (Binary Indexed Tree)
* `A` [Graph](src/data-structures/graph) (both directed and undirected)
* `A` [Disjoint Set](src/data-structures/disjoint-set)
+* `A` [Bloom Filter](src/data-structures/bloom-filter)
## Algorithms
@@ -231,6 +232,7 @@ Below is the list of some of the most used Big O notations and their performance
| **B-Tree** | log(n) | log(n) | log(n) | log(n) | |
| **Red-Black Tree** | log(n) | log(n) | log(n) | log(n) | |
| **AVL Tree** | log(n) | log(n) | log(n) | log(n) | |
+| **Bloom Filter** | | 1 | 1 | | |
### Array Sorting Algorithms Complexity
diff --git a/src/data-structures/bloom-filter/BloomFilter.js b/src/data-structures/bloom-filter/BloomFilter.js
new file mode 100644
index 00000000..465e6f58
--- /dev/null
+++ b/src/data-structures/bloom-filter/BloomFilter.js
@@ -0,0 +1,127 @@
+export default class BloomFilter {
+ /**
+ * @param {number} size
+ */
+ constructor(size = 100) {
+ // Bloom filter size directly affects the likelihood of false positives.
+ // The bigger the size the lower the likelihood of false positives.
+ this.size = size;
+ this.storage = this.createStore(size);
+ }
+
+ /**
+ * @param {string} item
+ */
+ insert(item) {
+ const hashValues = this.getHashValues(item);
+
+ // Set each hashValue index to true
+ hashValues.forEach(val => this.storage.setValue(val));
+ }
+
+ /**
+ * @param {string} item
+ * @return {boolean}
+ */
+ mayContain(item) {
+ const hashValues = this.getHashValues(item);
+
+ for (let i = 0; i < hashValues.length; i += 1) {
+ if (!this.storage.getValue(hashValues[i])) {
+ // We know that the item was definitely not inserted.
+ return false;
+ }
+ }
+
+ // The item may or may not have been inserted.
+ return true;
+ }
+
+ /**
+ * Creates the data store for our filter.
+ * We use this method to generate the store in order to
+ * encapsulate the data itself and only provide access
+ * to the necessary methods.
+ *
+ * @param {number} size
+ * @return {Object}
+ */
+ createStore(size) {
+ const storage = [];
+
+ // Initialize all indexes to false
+ for (let i = 0; i < size; i += 1) {
+ storage.push(false);
+ }
+
+ const storageInterface = {
+ getValue(index) {
+ return storage[index];
+ },
+ setValue(index) {
+ storage[index] = true;
+ },
+ };
+
+ return storageInterface;
+ }
+
+ /**
+ * @param {string} str
+ * @return {number}
+ */
+ hash1(str) {
+ let hash = 0;
+
+ for (let i = 0; i < str.length; i += 1) {
+ const char = str.charCodeAt(i);
+ hash = (hash << 5) + hash + char;
+ hash &= hash; // Convert to 32bit integer
+ hash = Math.abs(hash);
+ }
+
+ return hash % this.size;
+ }
+
+ /**
+ * @param {string} str
+ * @return {number}
+ */
+ hash2(str) {
+ let hash = 5381;
+
+ for (let i = 0; i < str.length; i += 1) {
+ const char = str.charCodeAt(i);
+ hash = (hash << 5) + hash + char; /* hash * 33 + c */
+ }
+
+ return hash % this.size;
+ }
+
+ /**
+ * @param {string} str
+ * @return {number}
+ */
+ hash3(str) {
+ let hash = 0;
+
+ for (let i = 0; i < str.length; i += 1) {
+ const char = str.charCodeAt(i);
+ hash = (hash << 5) - hash;
+ hash += char;
+ hash &= hash; // Convert to 32bit integer
+ }
+
+ return hash % this.size;
+ }
+
+ /**
+ * Runs all 3 hash functions on the input and returns an array of results
+ *
+ * @param {string} str
+ * @return {number[]}
+ */
+ getHashValues(item) {
+ return [this.hash1(item), Math.abs(this.hash2(item)), Math.abs(this.hash3(item))];
+ }
+}
diff --git a/src/data-structures/bloom-filter/README.md b/src/data-structures/bloom-filter/README.md
new file mode 100644
index 00000000..07936a34
--- /dev/null
+++ b/src/data-structures/bloom-filter/README.md
@@ -0,0 +1,104 @@
+# Bloom Filter
+
+A bloom filter is a data structure designed to
+test whether an element is present in a set. It
+is designed to be blazingly fast and use minimal
+memory at the cost of potential false positives.
+
+![Bloom Filter](https://upload.wikimedia.org/wikipedia/commons/a/ac/Bloom_filter.svg)
+
+## Operations
+
+There are two main operations a bloom filter can
+perform: insertion and search. Search may result in
+false positives. Deletion is not possible.
+
+In other words, the filter can take in items. When
+we go to check if an item has previously been
+inserted, it can tell us either "no" or "maybe".
+
+Both insertion and search are O(1) operations.
+
+## Making the filter
+
+A bloom filter is created by allotting a certain size.
+In our example, we use 100 as a default length. All
+locations are initialized to `false`.
+
+### Insertion
+
+During insertion, a number of hash functions,
+in our case 3 hash functions, are used to create
+hashes of the input. These hash functions output
+indexes. At every index received, we simply change
+the value in our bloom filter to `true`.
+
+### Search
+
+During a search, the same hash functions are called
+and used to hash the input. We then check if the
+indexes received _all_ have a value of `true` inside
+our bloom filter. If they _all_ have a value of
+`true`, we know that the bloom filter may have had
+the value previously inserted.
+
+However, it's not certain, because it's possible
+that other values previously inserted flipped the
+values to `true`. The values aren't necessarily
+`true` due to the item currently being searched for.
+Absolute certainty is impossible unless only a single
+item has previously been inserted.
+
+While checking the bloom filter for the indexes
+returned by our hash functions, if even one of them
+has a value of `false`, we definitively know that the
+item was not previously inserted.
+
+## False Positives
+
+The probability of false positives is determined by
+three factors: the size of the bloom filter, the
+number of hash functions we use, and the number
+of items that have been inserted into the filter.
+
+The formula to calculate probablity of a false positive is:
+
+( 1 - e -kn/m ) k
+
+k = # hash functions
+
+m = size
+
+n = # items inserted
+
+These variables, k, m, and n, should be picked based
+on how acceptable false positives are. If the values
+are picked and the resulting probability is too high,
+the values should be tweaked and the probability
+re-calculated.
+
+## Applications
+
+A bloom filter can be used on a blogging website. If
+the goal is to show readers only articles that they
+have never seen before, a bloom filter is perfect.
+It can store hashed values based on the articles. After
+a user reads a few articles, they can be inserted into
+the filter. The next time the user visits the site,
+those articles can be filtered out of the results.
+
+Some articles will inevitably be filtered out by mistake,
+but the cost is acceptable. It's ok if a user never sees
+a few articles as long as they have other, brand new ones
+to see every time they visit the site.
+
+The popular blog site Medium does a version of this.
+Feel free to read [their article](https://blog.medium.com/what-are-bloom-filters-1ec2a50c68ff).
+
+## References
+
+- [Wikipedia](https://en.wikipedia.org/wiki/Bloom_filter)
+- [Tutorial](http://llimllib.github.io/bloomfilter-tutorial/)
+- [Calculating false positive probability](https://hur.st/bloomfilter/?n=4&p=&m=18&k=3)
+- [Medium blog](https://blog.medium.com/what-are-bloom-filters-1ec2a50c68ff)
+- [YouTube](https://www.youtube.com/watch?v=bEmBh1HtYrw)
diff --git a/src/data-structures/bloom-filter/__test__/BloomFilter.test.js b/src/data-structures/bloom-filter/__test__/BloomFilter.test.js
new file mode 100644
index 00000000..a8b9c233
--- /dev/null
+++ b/src/data-structures/bloom-filter/__test__/BloomFilter.test.js
@@ -0,0 +1,39 @@
+import BloomFilter from '../BloomFilter';
+
+describe('Bloom Filter', () => {
+ let bloomFilter;
+ const people = ['Bruce Wayne', 'Clark Kent', 'Barry Allen'];
+
+ beforeEach(() => {
+ bloomFilter = new BloomFilter();
+ });
+
+ it('Should have methods named "insert" and "mayContain"', () => {
+ expect(typeof bloomFilter.insert).toBe('function');
+ expect(typeof bloomFilter.mayContain).toBe('function');
+ });
+
+ it('Should create a new filter store with the appropriate methods', () => {
+ const store = bloomFilter.createStore(18);
+ expect(typeof store.getValue).toBe('function');
+ expect(typeof store.setValue).toBe('function');
+ });
+
+ it('Should hash deterministically with all 3 hash functions', () => {
+ const str = 'abc';
+ expect(bloomFilter.hash1(str)).toEqual(bloomFilter.hash1(str));
+ expect(bloomFilter.hash2(str)).toEqual(bloomFilter.hash2(str));
+ expect(bloomFilter.hash3(str)).toEqual(bloomFilter.hash3(str));
+ });
+
+ it('Should create an array with 3 hash values', () => {
+ expect(bloomFilter.getHashValues('abc').length).toEqual(3);
+ });
+
+ it('Should insert strings correctly and return true when checking for inserted values', () => {
+ people.forEach(person => bloomFilter.insert(person));
+ expect(bloomFilter.mayContain('Bruce Wayne')).toBe(true);
+ expect(bloomFilter.mayContain('Clark Kent')).toBe(true);
+ expect(bloomFilter.mayContain('Barry Allen')).toBe(true);
+ });
+});
diff --git a/src/data-structures/bloom-filter/__test__/BloomFilterFalsePositive.test.js b/src/data-structures/bloom-filter/__test__/BloomFilterFalsePositive.test.js
new file mode 100644
index 00000000..dd1ad73a
--- /dev/null
+++ b/src/data-structures/bloom-filter/__test__/BloomFilterFalsePositive.test.js
@@ -0,0 +1,86 @@
+import BloomFilter from '../BloomFilter';
+
+// Adapted from http://stackoverflow.com/questions/1349404/generate-random-string-characters-in-javascript
+function makeID() {
+ const possible = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
+ let id = '';
+
+ for (let i = 0; i < 10; i += 1) {
+ const randomLength = Math.random() * possible.length;
+ const randomIndex = Math.floor(randomLength);
+ id += possible.charAt(randomIndex);
+ }
+
+ return id;
+}
+
+function run10kTrials(numRandomTests = 1000) {
+ const bloomFilter = new BloomFilter();
+ const mockPeopleIDs = [];
+
+ for (let i = 0; i < 10; i += 1) {
+ mockPeopleIDs.push(makeID());
+ }
+
+ mockPeopleIDs.forEach(id => bloomFilter.insert(id));
+ let numFalsePositives = 0;
+
+ for (let index = 0; index < numRandomTests; index += 1) {
+ const randomID = makeID();
+ if (bloomFilter.mayContain(randomID)) {
+ numFalsePositives += 1;
+ }
+ }
+
+ return numFalsePositives;
+}
+
+function testFilter(numTrials = 100) {
+ const results = [];
+
+ for (let i = 0; i < numTrials; i += 1) {
+ results.push(run10kTrials());
+ }
+
+ const sum = results.reduce((cumulative, next) => cumulative + next, 0);
+ return sum / numTrials;
+}
+
+describe('Bloom filter false positives', () => {
+ const falsePositiveProbability = 0.0174;
+ const expectedFalsePositives = falsePositiveProbability * 1000;
+ const avgFalsePositives = testFilter();
+
+ it(`Should keep false positives close to an expected value:
+
+ # trials = 1000
+ k = 3 (hash functions)
+ m = 100 (size)
+ n = 10 (items inserted)
+
+ Using k, m, and n, plugged into https://hur.st/bloomfilter/?n=3&p=&m=18&k=3
+ Chance of false positive = 0.017
+
+ Expected false positives = # trials * chance of false positive
+ Expected false positives => 1000 * ${falsePositiveProbability}
+ Expected false positives => ${expectedFalsePositives}
+
+ **************************
+ EXPECTED = ${expectedFalsePositives}
+ ACTUAL AVG = ${avgFalsePositives}
+ **************************
+
+ If the expected and actual numbers are far off, something is wrong.
+ Inspect manually.`, () => {
+ // We give it a large range to avoid unnecessary failures.
+ // If it's working correctly, the value should definitely
+ // fall within this range.
+
+ // In over 1,000 test runs, none of them ever come close
+ // to falling outside of this range.
+ const upperLimit = expectedFalsePositives + 5;
+ const lowerLimit = expectedFalsePositives - 5;
+ expect(avgFalsePositives).toBeGreaterThan(lowerLimit);
+ expect(avgFalsePositives).toBeLessThan(upperLimit);
+ });
+});