[include func] copy from lib.

This commit is contained in:
YinMo19 2025-06-09 16:11:28 +08:00
parent ab8afdcc89
commit 44fbe96dcb

View File

@ -1,9 +1,72 @@
#![doc = include_str!("../README.md")]
use levenshtein::levenshtein;
use std::fs::{File, read_to_string};
use std::io::Write;
/// calculate levenshtein instance.
///
/// origin see https://crates.io/crates/levenshtein
/// Copyright (c) 2016 Titus Wormer <tituswormer@gmail.com>
fn levenshtein(a: &str, b: &str) -> usize {
let mut result = 0;
/* Shortcut optimizations / degenerate cases. */
if a == b {
return result;
}
let length_a = a.chars().count();
let length_b = b.chars().count();
if length_a == 0 {
return length_b;
}
if length_b == 0 {
return length_a;
}
/* Initialize the vector.
*
* This is why its fast, normally a matrix is used,
* here we use a single vector. */
let mut cache: Vec<usize> = (1..).take(length_a).collect();
let mut distance_a;
let mut distance_b;
/* Loop. */
for (index_b, code_b) in b.chars().enumerate() {
result = index_b;
distance_a = index_b;
for (index_a, code_a) in a.chars().enumerate() {
distance_b = if code_a == code_b {
distance_a
} else {
distance_a + 1
};
distance_a = cache[index_a];
result = if distance_a > result {
if distance_b > result {
result + 1
} else {
distance_b
}
} else if distance_b > distance_a {
distance_a + 1
} else {
distance_b
};
cache[index_a] = result;
}
}
result
}
/// open a specified file and return a vector of strings
/// where each element is a line.
fn parse_line(file: &str) -> Vec<String> {
@ -14,13 +77,12 @@ fn parse_line(file: &str) -> Vec<String> {
.collect()
}
/// Lines of a words.txt are like
/// ```plaintext
/// 1234 hello I/am/a/test/you/can
/// 1231 correrify my/posibily/orrer
/// ```
/// We want to parse a line into a vector
/// We want to parse a line into a vector
/// which elements represents each words,
/// include first number.
fn parse_words(file: &str) -> Vec<Vec<String>> {
@ -39,7 +101,6 @@ fn parse_words(file: &str) -> Vec<Vec<String>> {
words
}
/// Binary-search first. If the word is NOT in the dictionary,
/// we will find the word with the minimum distance.
fn correrify<'a>(word: &'a str, dict: &'a Vec<String>) -> &'a str {
@ -62,7 +123,6 @@ fn correrify<'a>(word: &'a str, dict: &'a Vec<String>) -> &'a str {
temp_min.1
}
/// The words's shape is just like
/// ```
/// [
@ -75,7 +135,7 @@ fn correrify<'a>(word: &'a str, dict: &'a Vec<String>) -> &'a str {
/// assert!(word_line[0].len() == 4);
/// assert!(word_line[0].chars().all(|c| c.is_numeric()));
/// ```
/// We just skip the first word(4 digits number)
/// We just skip the first word(4 digits number)
/// and correrify the rest of words.
fn select_word_correrify(words: &Vec<Vec<String>>, dict: &Vec<String>) -> Vec<Vec<String>> {
let mut word_correrified = Vec::new();
@ -100,8 +160,7 @@ fn select_word_correrify(words: &Vec<Vec<String>>, dict: &Vec<String>) -> Vec<Ve
word_correrified
}
/// this function just write to the correrified_words.txt
/// this function just write to the correrified_words.txt
/// with same format as words.txt
fn write_correrified_words(words: &Vec<Vec<String>>) {
let mut file = File::create("correrified_words.txt").expect("Unable to create file");