add rust program

2025-02-13 01:44:20 +08:00 · 2025-02-13 01:44:20 +08:00 · 765f1f3d77
commit 765f1f3d77
parent 9d4d4bef1c
8 changed files with 15713 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+/target
--- a/Cargo.lock
+++ b/Cargo.lock
@ -0,0 +1,16 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "levenshtein"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760"
+
+[[package]]
+name = "word_correction"
+version = "0.1.0"
+dependencies = [
+ "levenshtein",
+]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,7 @@
+[package]
+name = "word_correction"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+levenshtein = "1.0.5"
--- a/README.md
+++ b/README.md
@ -0,0 +1,18 @@
+# Word Correction
+## YinMo19
+
+A simple word correction program written in rust, while it is from a c course extra homework problem.
+It uses an algorithm to find the most similar word to the input word, which called Levenshtein distance.
+The parse of the input file is the most thing in the program do.
+The two big acceptance of the program is the levenshtein distance algorithm realization and the sort/binary_search of the word in dict.
+
+You can just run 
+```bash
+cargo build --release
+time ./target/release/word_correction
+```
+to test. In MacBook Air M2, My test result is 
+```bash
+> time ./target/release/word_correction 
+./target/release/word_correction  0.13s user 0.07s system 97% cpu 0.213 total
+```
--- a/correrified_words.txt
+++ b/correrified_words.txt
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,95 @@
+#![doc = include_str!("../README.md")]
+
+use levenshtein::levenshtein;
+use std::fs::{File, read_to_string};
+use std::io::Write;
+
+fn parse_line(file: &str) -> Vec<String> {
+    read_to_string(file)
+        .expect(format!("No {} found", file).as_str())
+        .lines()
+        .map(String::from)
+        .collect()
+}
+
+fn parse_words(file: &str) -> Vec<Vec<String>> {
+    let words_line = parse_line(file);
+    let mut words = Vec::new();
+
+    for word in words_line.iter() {
+        let word_chars = word
+            .as_str()
+            .split(&['/', ' '][..])
+            .map(String::from)
+            .collect::<Vec<_>>();
+
+        words.push(word_chars);
+    }
+    words
+}
+
+fn correrify<'a>(word: &'a str, dict: &'a Vec<String>) -> &'a str {
+    if let Ok(_) = dict.binary_search(&word.to_string()) {
+        return word;
+    }
+
+    let mut temp_min = (usize::MAX, "");
+    for check_word in dict.iter() {
+        let distance = levenshtein(word, check_word.as_str());
+
+        if distance <= 1 {
+            return check_word;
+        }
+        if distance < temp_min.0 {
+            temp_min = (distance, check_word);
+        }
+    }
+
+    temp_min.1
+}
+
+fn select_word_correrify(words: &Vec<Vec<String>>, dict: &Vec<String>) -> Vec<Vec<String>> {
+    let mut word_correrified = Vec::new();
+    for word_line in words.iter() {
+        assert!(word_line[0].len() == 4);
+        assert!(word_line[0].chars().all(|c| c.is_numeric()));
+
+        word_correrified.push(
+            vec![word_line[0].clone()]
+                .into_iter()
+                .chain(
+                    word_line
+                        .iter()
+                        .skip(1)
+                        .map(|word| correrify(word, dict).to_string())
+                        .collect::<Vec<_>>(),
+                )
+                .collect::<Vec<String>>(),
+        );
+    }
+
+    word_correrified
+}
+
+fn write_correrified_words(words: &Vec<Vec<String>>) {
+    let mut file = File::create("correrified_words.txt").expect("Unable to create file");
+    for word_line in words.iter() {
+        writeln!(
+            file,
+            "{} {} {}",
+            word_line[0],
+            word_line[1],
+            word_line[2..].join("/")
+        )
+        .expect("unable to write to file");
+    }
+}
+
+fn main() {
+    let mut dict = parse_line("vocabulary.txt");
+    dict.sort_unstable(); // sort the dictionary to accelerate the search
+    let words = parse_words("words.txt");
+    let word_correrified = select_word_correrify(&words, &dict);
+
+    write_correrified_words(&word_correrified);
+}
--- a/vocabulary.txt
+++ b/vocabulary.txt
--- a/words.txt
+++ b/words.txt