add rust program

This commit is contained in:
YinMo19 2025-02-13 01:44:20 +08:00
parent 9d4d4bef1c
commit 765f1f3d77
8 changed files with 15713 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

16
Cargo.lock generated Normal file
View File

@ -0,0 +1,16 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "levenshtein"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760"
[[package]]
name = "word_correction"
version = "0.1.0"
dependencies = [
"levenshtein",
]

7
Cargo.toml Normal file
View File

@ -0,0 +1,7 @@
[package]
name = "word_correction"
version = "0.1.0"
edition = "2024"
[dependencies]
levenshtein = "1.0.5"

View File

@ -0,0 +1,18 @@
# Word Correction
## YinMo19
A simple word correction program written in rust, while it is from a c course extra homework problem.
It uses an algorithm to find the most similar word to the input word, which called Levenshtein distance.
The parse of the input file is the most thing in the program do.
The two big acceptance of the program is the levenshtein distance algorithm realization and the sort/binary_search of the word in dict.
You can just run
```bash
cargo build --release
time ./target/release/word_correction
```
to test. In MacBook Air M2, My test result is
```bash
> time ./target/release/word_correction
./target/release/word_correction 0.13s user 0.07s system 97% cpu 0.213 total
```

6167
correrified_words.txt Normal file

File diff suppressed because it is too large Load Diff

95
src/main.rs Normal file
View File

@ -0,0 +1,95 @@
#![doc = include_str!("../README.md")]
use levenshtein::levenshtein;
use std::fs::{File, read_to_string};
use std::io::Write;
fn parse_line(file: &str) -> Vec<String> {
read_to_string(file)
.expect(format!("No {} found", file).as_str())
.lines()
.map(String::from)
.collect()
}
fn parse_words(file: &str) -> Vec<Vec<String>> {
let words_line = parse_line(file);
let mut words = Vec::new();
for word in words_line.iter() {
let word_chars = word
.as_str()
.split(&['/', ' '][..])
.map(String::from)
.collect::<Vec<_>>();
words.push(word_chars);
}
words
}
fn correrify<'a>(word: &'a str, dict: &'a Vec<String>) -> &'a str {
if let Ok(_) = dict.binary_search(&word.to_string()) {
return word;
}
let mut temp_min = (usize::MAX, "");
for check_word in dict.iter() {
let distance = levenshtein(word, check_word.as_str());
if distance <= 1 {
return check_word;
}
if distance < temp_min.0 {
temp_min = (distance, check_word);
}
}
temp_min.1
}
fn select_word_correrify(words: &Vec<Vec<String>>, dict: &Vec<String>) -> Vec<Vec<String>> {
let mut word_correrified = Vec::new();
for word_line in words.iter() {
assert!(word_line[0].len() == 4);
assert!(word_line[0].chars().all(|c| c.is_numeric()));
word_correrified.push(
vec![word_line[0].clone()]
.into_iter()
.chain(
word_line
.iter()
.skip(1)
.map(|word| correrify(word, dict).to_string())
.collect::<Vec<_>>(),
)
.collect::<Vec<String>>(),
);
}
word_correrified
}
fn write_correrified_words(words: &Vec<Vec<String>>) {
let mut file = File::create("correrified_words.txt").expect("Unable to create file");
for word_line in words.iter() {
writeln!(
file,
"{} {} {}",
word_line[0],
word_line[1],
word_line[2..].join("/")
)
.expect("unable to write to file");
}
}
fn main() {
let mut dict = parse_line("vocabulary.txt");
dict.sort_unstable(); // sort the dictionary to accelerate the search
let words = parse_words("words.txt");
let word_correrified = select_word_correrify(&words, &dict);
write_correrified_words(&word_correrified);
}

3242
vocabulary.txt Normal file

File diff suppressed because it is too large Load Diff

6167
words.txt Normal file

File diff suppressed because it is too large Load Diff