add rust program
This commit is contained in:
parent
9d4d4bef1c
commit
765f1f3d77
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
||||
16
Cargo.lock
generated
Normal file
16
Cargo.lock
generated
Normal file
@ -0,0 +1,16 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "levenshtein"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db13adb97ab515a3691f56e4dbab09283d0b86cb45abd991d8634a9d6f501760"
|
||||
|
||||
[[package]]
|
||||
name = "word_correction"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"levenshtein",
|
||||
]
|
||||
7
Cargo.toml
Normal file
7
Cargo.toml
Normal file
@ -0,0 +1,7 @@
|
||||
[package]
|
||||
name = "word_correction"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
levenshtein = "1.0.5"
|
||||
18
README.md
18
README.md
@ -0,0 +1,18 @@
|
||||
# Word Correction
|
||||
## YinMo19
|
||||
|
||||
A simple word correction program written in rust, while it is from a c course extra homework problem.
|
||||
It uses an algorithm to find the most similar word to the input word, which called Levenshtein distance.
|
||||
The parse of the input file is the most thing in the program do.
|
||||
The two big acceptance of the program is the levenshtein distance algorithm realization and the sort/binary_search of the word in dict.
|
||||
|
||||
You can just run
|
||||
```bash
|
||||
cargo build --release
|
||||
time ./target/release/word_correction
|
||||
```
|
||||
to test. In MacBook Air M2, My test result is
|
||||
```bash
|
||||
> time ./target/release/word_correction
|
||||
./target/release/word_correction 0.13s user 0.07s system 97% cpu 0.213 total
|
||||
```
|
||||
6167
correrified_words.txt
Normal file
6167
correrified_words.txt
Normal file
File diff suppressed because it is too large
Load Diff
95
src/main.rs
Normal file
95
src/main.rs
Normal file
@ -0,0 +1,95 @@
|
||||
#![doc = include_str!("../README.md")]
|
||||
|
||||
use levenshtein::levenshtein;
|
||||
use std::fs::{File, read_to_string};
|
||||
use std::io::Write;
|
||||
|
||||
fn parse_line(file: &str) -> Vec<String> {
|
||||
read_to_string(file)
|
||||
.expect(format!("No {} found", file).as_str())
|
||||
.lines()
|
||||
.map(String::from)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn parse_words(file: &str) -> Vec<Vec<String>> {
|
||||
let words_line = parse_line(file);
|
||||
let mut words = Vec::new();
|
||||
|
||||
for word in words_line.iter() {
|
||||
let word_chars = word
|
||||
.as_str()
|
||||
.split(&['/', ' '][..])
|
||||
.map(String::from)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
words.push(word_chars);
|
||||
}
|
||||
words
|
||||
}
|
||||
|
||||
fn correrify<'a>(word: &'a str, dict: &'a Vec<String>) -> &'a str {
|
||||
if let Ok(_) = dict.binary_search(&word.to_string()) {
|
||||
return word;
|
||||
}
|
||||
|
||||
let mut temp_min = (usize::MAX, "");
|
||||
for check_word in dict.iter() {
|
||||
let distance = levenshtein(word, check_word.as_str());
|
||||
|
||||
if distance <= 1 {
|
||||
return check_word;
|
||||
}
|
||||
if distance < temp_min.0 {
|
||||
temp_min = (distance, check_word);
|
||||
}
|
||||
}
|
||||
|
||||
temp_min.1
|
||||
}
|
||||
|
||||
fn select_word_correrify(words: &Vec<Vec<String>>, dict: &Vec<String>) -> Vec<Vec<String>> {
|
||||
let mut word_correrified = Vec::new();
|
||||
for word_line in words.iter() {
|
||||
assert!(word_line[0].len() == 4);
|
||||
assert!(word_line[0].chars().all(|c| c.is_numeric()));
|
||||
|
||||
word_correrified.push(
|
||||
vec![word_line[0].clone()]
|
||||
.into_iter()
|
||||
.chain(
|
||||
word_line
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(|word| correrify(word, dict).to_string())
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.collect::<Vec<String>>(),
|
||||
);
|
||||
}
|
||||
|
||||
word_correrified
|
||||
}
|
||||
|
||||
fn write_correrified_words(words: &Vec<Vec<String>>) {
|
||||
let mut file = File::create("correrified_words.txt").expect("Unable to create file");
|
||||
for word_line in words.iter() {
|
||||
writeln!(
|
||||
file,
|
||||
"{} {} {}",
|
||||
word_line[0],
|
||||
word_line[1],
|
||||
word_line[2..].join("/")
|
||||
)
|
||||
.expect("unable to write to file");
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut dict = parse_line("vocabulary.txt");
|
||||
dict.sort_unstable(); // sort the dictionary to accelerate the search
|
||||
let words = parse_words("words.txt");
|
||||
let word_correrified = select_word_correrify(&words, &dict);
|
||||
|
||||
write_correrified_words(&word_correrified);
|
||||
}
|
||||
3242
vocabulary.txt
Normal file
3242
vocabulary.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user