Merge d2bb607d56 into ca3d16dcce

2024-09-20 07:43:04 +08:00 · 2024-07-17 10:37:17 +09:00 · 2024-07-17 10:37:17 +09:00 · 1f2b2c151d
commit 1f2b2c151d
parent ca3d16dcce d2bb607d56
3 changed files with 297 additions and 0 deletions
--- a/src/algorithms/uncategorized/huffman-coding/README.md
+++ b/src/algorithms/uncategorized/huffman-coding/README.md
@ -0,0 +1,45 @@
+# Huffman Coding Algorithm
+
+![Huffman Coding](https://upload.wikimedia.org/wikipedia/commons/thumb/8/82/Huffman_tree_2.svg/1920px-Huffman_tree_2.svg.png)
+
+
+In computer science and information theory, 
+a Huffman code is a particular type of optimal prefix code
+that is commonly used for lossless data compression. 
+The process of finding and/or using such a code proceeds by means of Huffman coding, an algorithm developed by David A.
+Huffman while he was a Sc.D. student at MIT, and published in the 1952 
+paper "A Method for the Construction of Minimum-Redundancy Codes".
+
+The output from Huffman's algorithm can be viewed as a variable-length code table 
+for encoding a source symbol (such as a character in a file). 
+The algorithm derives this table from the estimated probability or frequency of occurrence (weight) 
+for each possible value of the source symbol. 
+As in other entropy encoding methods, 
+more common symbols are generally represented using fewer bits than less common symbols. 
+Huffman's method can be efficiently implemented,
+finding a code in time linear to the number of input weights if these weights are sorted.
+
+![Constructing a Huffman Tree](https://upload.wikimedia.org/wikipedia/commons/d/d8/HuffmanCodeAlg.png)
+
+
+
+## Encode : Compression
+
+![Visualization of encoding](https://upload.wikimedia.org/wikipedia/commons/thumb/a/a0/Huffman_coding_visualisation.svg/1920px-Huffman_coding_visualisation.svg.png)
+
+The simplest construction algorithm uses a priority queue where the node with lowest probability is given highest priority:
+
+1. Create a leaf node for each symbol and add it to the priority queue.
+2. While there is more than one node in the queue:
+    1. Remove the two nodes of highest priority (lowest probability) from the queue
+    2. Create a new internal node with these two nodes as children and with probability equal to the sum of the two nodes' probabilities.
+    3. Add the new node to the queue.
+3. The remaining node is the root node and the tree is complete.
+Since efficient priority queue data structures require `O(log n)` time per insertion, and a tree with `n` leaves has `2n−1` nodes, this algorithm operates in `O(n log n)` time, where `n` is the number of symbols.
+
+
+
+## References
+
+- [Wikipedia](https://en.wikipedia.org/wiki/Huffman_coding)
+- [GitHub](https://gist.github.com/1995eaton/86f10f4d0247b4e4e65e)
--- a/src/algorithms/uncategorized/huffman-coding/test/huffmanCoding.test.js
+++ b/src/algorithms/uncategorized/huffman-coding/test/huffmanCoding.test.js
@ -0,0 +1,133 @@
+var Heap = function(fn) {
+    this.fn = fn || function(e) {
+      return e;
+    };
+    this.items = [];
+};
+  
+Heap.prototype = {
+    swap: function(i, j) {
+        this.items[i] = [
+        this.items[j],
+        this.items[j] = this.items[i]
+        ][0];
+    },
+    bubble: function(index) {
+        var parent = ~~((index - 1) / 2);
+        if (this.item(parent) < this.item(index)) {
+        this.swap(index, parent);
+        this.bubble(parent);
+        }
+    },
+    item: function(index) {
+        return this.fn(this.items[index]);
+    },
+    pop: function() {
+        return this.items.pop();
+    },
+    sift: function(index, end) {
+        var child = index * 2 + 1;
+        if (child < end) {
+        if (child + 1 < end && this.item(child + 1) > this.item(child)) {
+            child++;
+        }
+        if (this.item(index) < this.item(child)) {
+            this.swap(index, child);
+            return this.sift(child, end);
+        }
+        }
+    },
+    push: function() {
+        var lastIndex = this.items.length;
+        for (var i = 0; i < arguments.length; i++) {
+        this.items.push(arguments[i]);
+        this.bubble(lastIndex++);
+        }
+    },
+    get length() {
+        return this.items.length;
+    }
+};
+  
+var Huffman = {
+    // encode function
+    encode: function(data) {
+        var prob = {};
+        var tree = new Heap(function(e) {
+        return e[0];
+        });
+        for (var i = 0; i < data.length; i++) {
+        if (prob.hasOwnProperty(data[i])) {
+            prob[data[i]]++;
+        } else {
+            prob[data[i]] = 1;
+        }
+        }
+        Object.keys(prob).sort(function(a, b) {
+        return ~~(Math.random() * 2);
+        }).forEach(function(e) {
+        tree.push([prob[e], e]);
+        });
+        while (tree.length > 1) {
+        var first = tree.pop(),
+            second = tree.pop();
+        tree.push([first[0] + second[0], [first[1], second[1]]]);
+        }
+        var dict = {};
+        var recurse = function(root, string) {
+        if (root.constructor === Array) {
+            recurse(root[0], string + '0');
+            recurse(root[1], string + '1');
+        } else {
+            dict[root] = string;
+        }
+        };
+        tree.items = tree.pop()[1];
+        recurse(tree.items, '');
+        var result = '';
+        for (var i = 0; i < data.length; i++) {
+        result += dict[data.charAt(i)];
+        }
+        var header = Object.keys(dict).map(function(e) {
+        return e.charCodeAt(0) + '|' + dict[e];
+        }).join('-') + '/';
+        return header + result;
+    },
+
+    // decode function
+    decode: function(string) {
+        string = string.split('/');
+        var data = string[1].split(''),
+            header = {};
+        string[0].split('-').forEach(function(e) {
+        var values = e.split('|');
+        header[values[1]] = String.fromCharCode(values[0]);
+        });
+        var result = '';
+        while (data.length) {
+        var i = 0,
+            cur = '';
+        while (data.length) {
+            cur += data.shift();
+            if (header.hasOwnProperty(cur)) {
+            result += header[cur];
+            break;
+            }
+        }
+        }
+        return result;
+    }
+};
+  
+
+/**** test code ****/
+var test = 'OSS1234L1OSSTEST'
+console.log("1. test string = ",test);
+
+// test encode
+var enc = Huffman.encode(test);
+console.log("2. encoded string = ",enc);
+
+// test decode
+var dec = Huffman.decode(enc);
+console.log("3. decoded string = ",dec);
--- a/src/algorithms/uncategorized/huffman-coding/huffmanCoding.js
+++ b/src/algorithms/uncategorized/huffman-coding/huffmanCoding.js
@ -0,0 +1,119 @@
+var Heap = function(fn) {
+  this.fn = fn || function(e) {
+    return e;
+  };
+  this.items = [];
+};
+
+Heap.prototype = {
+  swap: function(i, j) {
+    this.items[i] = [
+      this.items[j],
+      this.items[j] = this.items[i]
+    ][0];
+  },
+  bubble: function(index) {
+    var parent = ~~((index - 1) / 2);
+    if (this.item(parent) < this.item(index)) {
+      this.swap(index, parent);
+      this.bubble(parent);
+    }
+  },
+  item: function(index) {
+    return this.fn(this.items[index]);
+  },
+  pop: function() {
+    return this.items.pop();
+  },
+  sift: function(index, end) {
+    var child = index * 2 + 1;
+    if (child < end) {
+      if (child + 1 < end && this.item(child + 1) > this.item(child)) {
+        child++;
+      }
+      if (this.item(index) < this.item(child)) {
+        this.swap(index, child);
+        return this.sift(child, end);
+      }
+    }
+  },
+  push: function() {
+    var lastIndex = this.items.length;
+    for (var i = 0; i < arguments.length; i++) {
+      this.items.push(arguments[i]);
+      this.bubble(lastIndex++);
+    }
+  },
+  get length() {
+    return this.items.length;
+  }
+};
+
+var Huffman = {
+  // encode function
+  encode: function(data) {
+    var prob = {};
+    var tree = new Heap(function(e) {
+      return e[0];
+    });
+    for (var i = 0; i < data.length; i++) {
+      if (prob.hasOwnProperty(data[i])) {
+        prob[data[i]]++;
+      } else {
+        prob[data[i]] = 1;
+      }
+    }
+    Object.keys(prob).sort(function(a, b) {
+      return ~~(Math.random() * 2);
+    }).forEach(function(e) {
+      tree.push([prob[e], e]);
+    });
+    while (tree.length > 1) {
+      var first = tree.pop(),
+          second = tree.pop();
+      tree.push([first[0] + second[0], [first[1], second[1]]]);
+    }
+    var dict = {};
+    var recurse = function(root, string) {
+      if (root.constructor === Array) {
+        recurse(root[0], string + '0');
+        recurse(root[1], string + '1');
+      } else {
+        dict[root] = string;
+      }
+    };
+    tree.items = tree.pop()[1];
+    recurse(tree.items, '');
+    var result = '';
+    for (var i = 0; i < data.length; i++) {
+      result += dict[data.charAt(i)];
+    }
+    var header = Object.keys(dict).map(function(e) {
+      return e.charCodeAt(0) + '|' + dict[e];
+    }).join('-') + '/';
+    return header + result;
+  },
+  // decode function
+  decode: function(string) {
+    string = string.split('/');
+    var data = string[1].split(''),
+        header = {};
+    string[0].split('-').forEach(function(e) {
+      var values = e.split('|');
+      header[values[1]] = String.fromCharCode(values[0]);
+    });
+    var result = '';
+    while (data.length) {
+      var i = 0,
+          cur = '';
+      while (data.length) {
+        cur += data.shift();
+        if (header.hasOwnProperty(cur)) {
+          result += header[cur];
+          break;
+        }
+      }
+    }
+    return result;
+  }
+};