[include func] copy from lib.

2025-06-09 16:11:28 +08:00 · 2025-06-09 16:11:28 +08:00 · 44fbe96dcb
commit 44fbe96dcb
parent ab8afdcc89
1 changed files with 67 additions and 8 deletions
--- a/src/main.rs
+++ b/src/main.rs
@ -1,9 +1,72 @@
 #![doc = include_str!("../README.md")]

-use levenshtein::levenshtein;
 use std::fs::{File, read_to_string};
 use std::io::Write;

+/// calculate levenshtein instance.
+///
+/// origin see https://crates.io/crates/levenshtein
+/// Copyright (c) 2016 Titus Wormer <tituswormer@gmail.com>
+fn levenshtein(a: &str, b: &str) -> usize {
+    let mut result = 0;
+
+    /* Shortcut optimizations / degenerate cases. */
+    if a == b {
+        return result;
+    }
+
+    let length_a = a.chars().count();
+    let length_b = b.chars().count();
+
+    if length_a == 0 {
+        return length_b;
+    }
+
+    if length_b == 0 {
+        return length_a;
+    }
+
+    /* Initialize the vector.
+     *
+     * This is why it’s fast, normally a matrix is used,
+     * here we use a single vector. */
+    let mut cache: Vec<usize> = (1..).take(length_a).collect();
+    let mut distance_a;
+    let mut distance_b;
+
+    /* Loop. */
+    for (index_b, code_b) in b.chars().enumerate() {
+        result = index_b;
+        distance_a = index_b;
+
+        for (index_a, code_a) in a.chars().enumerate() {
+            distance_b = if code_a == code_b {
+                distance_a
+            } else {
+                distance_a + 1
+            };
+
+            distance_a = cache[index_a];
+
+            result = if distance_a > result {
+                if distance_b > result {
+                    result + 1
+                } else {
+                    distance_b
+                }
+            } else if distance_b > distance_a {
+                distance_a + 1
+            } else {
+                distance_b
+            };
+
+            cache[index_a] = result;
+        }
+    }
+
+    result
+}
+
 /// open a specified file and return a vector of strings
 /// where each element is a line.
 fn parse_line(file: &str) -> Vec<String> {
@ -14,13 +77,12 @@ fn parse_line(file: &str) -> Vec<String> {
        .collect()
 }

-
 /// Lines of a words.txt are like
 /// ```plaintext
 /// 1234 hello I/am/a/test/you/can
 /// 1231 correrify my/posibily/orrer
 /// ```
-/// We want to parse a line into a vector 
+/// We want to parse a line into a vector
 /// which elements represents each words,
 /// include first number.
 fn parse_words(file: &str) -> Vec<Vec<String>> {
@ -39,7 +101,6 @@ fn parse_words(file: &str) -> Vec<Vec<String>> {
    words
 }

-
 /// Binary-search first. If the word is NOT in the dictionary,
 /// we will find the word with the minimum distance.
 fn correrify<'a>(word: &'a str, dict: &'a Vec<String>) -> &'a str {
@ -62,7 +123,6 @@ fn correrify<'a>(word: &'a str, dict: &'a Vec<String>) -> &'a str {
    temp_min.1
 }

-
 /// The words's shape is just like
 /// ```
 /// [
@ -75,7 +135,7 @@ fn correrify<'a>(word: &'a str, dict: &'a Vec<String>) -> &'a str {
 /// assert!(word_line[0].len() == 4);
 /// assert!(word_line[0].chars().all(|c| c.is_numeric()));
 /// ```
-/// We just skip the first word(4 digits number) 
+/// We just skip the first word(4 digits number)
 /// and correrify the rest of words.
 fn select_word_correrify(words: &Vec<Vec<String>>, dict: &Vec<String>) -> Vec<Vec<String>> {
    let mut word_correrified = Vec::new();
@ -100,8 +160,7 @@ fn select_word_correrify(words: &Vec<Vec<String>>, dict: &Vec<String>) -> Vec<Ve
    word_correrified
 }

-
-/// this function just write to the correrified_words.txt 
+/// this function just write to the correrified_words.txt
 /// with same format as words.txt
 fn write_correrified_words(words: &Vec<Vec<String>>) {
    let mut file = File::create("correrified_words.txt").expect("Unable to create file");