diff --git a/src/main.rs b/src/main.rs index 2218779..7adde92 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,72 @@ #![doc = include_str!("../README.md")] -use levenshtein::levenshtein; use std::fs::{File, read_to_string}; use std::io::Write; +/// calculate levenshtein instance. +/// +/// origin see https://crates.io/crates/levenshtein +/// Copyright (c) 2016 Titus Wormer +fn levenshtein(a: &str, b: &str) -> usize { + let mut result = 0; + + /* Shortcut optimizations / degenerate cases. */ + if a == b { + return result; + } + + let length_a = a.chars().count(); + let length_b = b.chars().count(); + + if length_a == 0 { + return length_b; + } + + if length_b == 0 { + return length_a; + } + + /* Initialize the vector. + * + * This is why it’s fast, normally a matrix is used, + * here we use a single vector. */ + let mut cache: Vec = (1..).take(length_a).collect(); + let mut distance_a; + let mut distance_b; + + /* Loop. */ + for (index_b, code_b) in b.chars().enumerate() { + result = index_b; + distance_a = index_b; + + for (index_a, code_a) in a.chars().enumerate() { + distance_b = if code_a == code_b { + distance_a + } else { + distance_a + 1 + }; + + distance_a = cache[index_a]; + + result = if distance_a > result { + if distance_b > result { + result + 1 + } else { + distance_b + } + } else if distance_b > distance_a { + distance_a + 1 + } else { + distance_b + }; + + cache[index_a] = result; + } + } + + result +} + /// open a specified file and return a vector of strings /// where each element is a line. fn parse_line(file: &str) -> Vec { @@ -14,13 +77,12 @@ fn parse_line(file: &str) -> Vec { .collect() } - /// Lines of a words.txt are like /// ```plaintext /// 1234 hello I/am/a/test/you/can /// 1231 correrify my/posibily/orrer /// ``` -/// We want to parse a line into a vector +/// We want to parse a line into a vector /// which elements represents each words, /// include first number. fn parse_words(file: &str) -> Vec> { @@ -39,7 +101,6 @@ fn parse_words(file: &str) -> Vec> { words } - /// Binary-search first. If the word is NOT in the dictionary, /// we will find the word with the minimum distance. fn correrify<'a>(word: &'a str, dict: &'a Vec) -> &'a str { @@ -62,7 +123,6 @@ fn correrify<'a>(word: &'a str, dict: &'a Vec) -> &'a str { temp_min.1 } - /// The words's shape is just like /// ``` /// [ @@ -75,7 +135,7 @@ fn correrify<'a>(word: &'a str, dict: &'a Vec) -> &'a str { /// assert!(word_line[0].len() == 4); /// assert!(word_line[0].chars().all(|c| c.is_numeric())); /// ``` -/// We just skip the first word(4 digits number) +/// We just skip the first word(4 digits number) /// and correrify the rest of words. fn select_word_correrify(words: &Vec>, dict: &Vec) -> Vec> { let mut word_correrified = Vec::new(); @@ -100,8 +160,7 @@ fn select_word_correrify(words: &Vec>, dict: &Vec) -> Vec>) { let mut file = File::create("correrified_words.txt").expect("Unable to create file");